~iliaplatone/spacedrone.eu/inova-sis-pack

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "ext_list.hpp"
#include "ext_base.hpp"

#include <cmath>
#include <string>
#include <vector>
#include <cassert>
#include "ie_parallel.hpp"

namespace InferenceEngine {
namespace Extensions {
namespace Cpu {

class SpaceToDepthImpl: public ExtLayerBase {
#define CNTR_SIZE 5

public:
    explicit SpaceToDepthImpl(const CNNLayer* layer) {
        try {
            if (layer->insData.empty() || layer->outData.empty())
                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";

            SizeVector src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
            if (src_dims.size() < 2)
                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input dimensions!";
            if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32)
                THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only F32 is supported!";

            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
            if (dst_dims.size() < 3)
                THROW_IE_EXCEPTION << layer->name << " Incorrect number of output dimensions!";
            if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::FP32)
                THROW_IE_EXCEPTION << layer->name << " Incorrect output precision. Only F32 is supported!";

            size_t block_size = layer->GetParamAsUInt("block_size", 1);
            if (block_size == 0)
                THROW_IE_EXCEPTION << layer->name << " Incorrect block_size parameter is zero!";

            if (dst_dims[dst_dims.size() - 3] % (block_size * block_size))
                THROW_IE_EXCEPTION << layer->name << " block_size parameter is incompatible with input tensor Color dimension size!";

            if (src_dims.size() > 2 && dst_dims[dst_dims.size() - 3] != (src_dims[src_dims.size() - 3] * block_size * block_size))
                THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Color dimension is incompatible with block_size!";

            if (src_dims[src_dims.size() - 2] != (dst_dims[dst_dims.size() - 2] * block_size))
                THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Height dimension is incompatible with block_size!";

            if (src_dims[src_dims.size() - 1] != (dst_dims[dst_dims.size() - 1] * block_size))
                THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Width dimension is incompatible with block_size!";

            own_dims[0] = 1;
            for (size_t i = 0; i < (dst_dims.size() - 3); i++)
                own_dims[0] *= dst_dims[i];
            own_dims[1] = dst_dims[dst_dims.size() - 2];
            own_dims[2] = dst_dims[dst_dims.size() - 3] / block_size;
            own_dims[3] = dst_dims[dst_dims.size() - 1];
            own_dims[4] = block_size;

            size_t C = dst_dims[dst_dims.size() - 2] * dst_dims[dst_dims.size() - 1];
            ownStrides[0] = dst_dims[dst_dims.size() - 3] * C;
            ownStrides[1] = dst_dims[dst_dims.size() - 1];
            ownStrides[2] = block_size * C;
            ownStrides[3] = 1;
            ownStrides[4] = C;
            work_amount_dst = ownStrides[0] * own_dims[0];

            addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
        } catch (InferenceEngine::details::InferenceEngineException &ex) {
            errorMsg = ex.what();
        }
    }

    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
        const float *src_data = inputs[0]->cbuffer().as<const float *>() +
            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
        float* dst_data = outputs[0]->cbuffer().as<float *>() +
            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();

        //  Parallel
        parallel_nt(0, [&](const int ithr, const int nthr) {
            size_t i, start = 0, end = 0, dst_idx = 0;
            size_t counters[CNTR_SIZE] = { 0 };
            splitter(work_amount_dst, nthr, ithr, start, end);
            i = start;
            for (int j = CNTR_SIZE - 1; j >= 0; j--) {
                counters[j] = i % own_dims[j];
                dst_idx += counters[j] * ownStrides[j];
                i /= own_dims[j];
            }

            for (size_t iwork = start, i = 1; iwork < end; ++iwork) {
                dst_data[dst_idx] = src_data[iwork];
                for (int j = CNTR_SIZE - 1; j >= 0; j--) {
                    counters[j]++;
                    if (counters[j] < own_dims[j]) {
                        dst_idx += ownStrides[j];
                        break;
                    } else {
                        counters[j] = i = 0;
                    }
                }
                if (!i) {
                    for (dst_idx = 0; i < CNTR_SIZE; ++i)
                        dst_idx += counters[i] * ownStrides[i];
                }
            }
        });

        return OK;
    }

private:
    size_t work_amount_dst;
    size_t own_dims[CNTR_SIZE];
    size_t ownStrides[CNTR_SIZE];
};

REG_FACTORY_FOR(ImplFactory<SpaceToDepthImpl>, SpaceToDepth);

}  // namespace Cpu
}  // namespace Extensions
}  // namespace InferenceEngine