1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ext_list.hpp"
#include "ext_base.hpp"
#include <cmath>
#include <string>
#include <vector>
#include <cassert>
#include "ie_parallel.hpp"
namespace InferenceEngine {
namespace Extensions {
namespace Cpu {
class RangeImpl: public ExtLayerBase {
public:
explicit RangeImpl(const CNNLayer* layer) {
try {
if (layer->insData.empty() || layer->outData.empty())
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
if (layer->insData.size() != 3)
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";
SizeVector start_dims = layer->insData[RANGE_START].lock()->getTensorDesc().getDims();
if (start_dims.size() > 1)
THROW_IE_EXCEPTION << layer->name << " Start scalar should have 1 dimension";
SizeVector limit_dims = layer->insData[RANGE_LIMIT].lock()->getTensorDesc().getDims();
if (limit_dims.size() > 1)
THROW_IE_EXCEPTION << layer->name << " Limit scalar should have 1 dimension";
SizeVector delta_dims = layer->insData[RANGE_DELTA].lock()->getTensorDesc().getDims();
if (delta_dims.size() > 1)
THROW_IE_EXCEPTION << layer->name << " Delta scalar should have 1 dimension";
SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
if (dst_dims.size() > 1)
THROW_IE_EXCEPTION << layer->name << " Output vector should have 1 dimension";
if (!(layer->insData[RANGE_START].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
layer->insData[RANGE_LIMIT].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
layer->insData[RANGE_DELTA].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
!(layer->insData[RANGE_START].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
layer->insData[RANGE_LIMIT].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
layer->insData[RANGE_DELTA].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
THROW_IE_EXCEPTION << layer->name <<
" 'Start', 'Limit', 'Delta' input scalars and output tensor should have same precision" <<
"and only FP32 and I32 are supported!";
}
addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
{ DataConfigurator(ConfLayout::PLN) });
} catch (InferenceEngine::details::InferenceEngineException &ex) {
errorMsg = ex.what();
}
}
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
StatusCode retcode = OK;
switch (outputs[0]->getTensorDesc().getPrecision()) {
case Precision::FP32: {
retcode = range((inputs[RANGE_START]->cbuffer().as<float *>() +
inputs[RANGE_START]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
(inputs[RANGE_LIMIT]->cbuffer().as<float *>() +
inputs[RANGE_LIMIT]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
(inputs[RANGE_DELTA]->cbuffer().as<float *>() +
inputs[RANGE_DELTA]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0], outputs[0]);
}
break;
case Precision::I32: {
retcode = range((inputs[RANGE_START]->cbuffer().as<int32_t *>() +
inputs[RANGE_START]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
(inputs[RANGE_LIMIT]->cbuffer().as<int32_t *>() +
inputs[RANGE_LIMIT]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
(inputs[RANGE_DELTA]->cbuffer().as<int32_t *>() +
inputs[RANGE_DELTA]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0], outputs[0]);
}
break;
default:
if (resp) {
std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!";
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
}
retcode = GENERAL_ERROR;
}
if (resp && retcode == PARAMETER_MISMATCH) {
std::string errorMsg = "Range indexes exceeds data tensor dimension";
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
}
return retcode;
}
private:
const size_t RANGE_START = 0;
const size_t RANGE_LIMIT = 1;
const size_t RANGE_DELTA = 2;
template <typename data_t>
StatusCode range(data_t start, data_t limit, data_t delta, Blob::Ptr output);
};
template <typename data_t>
StatusCode RangeImpl::range(data_t start, data_t limit, data_t delta, Blob::Ptr output) {
size_t dst_size = (output->getTensorDesc().getDims())[0];
data_t* dst_data = output->cbuffer().as<data_t *>() +
output->getTensorDesc().getBlockingDesc().getOffsetPadding();
size_t work_amount_dst = static_cast<size_t>(std::floor(std::abs((limit - start) / delta)));
if (work_amount_dst != dst_size)
return PARAMETER_MISMATCH;
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t iwork = 0, end = 0;
splitter(work_amount_dst, nthr, ithr, iwork, end);
data_t dst_value = start + iwork * delta;
for (; iwork < end; ++iwork, dst_value += delta) {
dst_data[iwork] = dst_value;
}
});
return OK;
}
REG_FACTORY_FOR(ImplFactory<RangeImpl>, Range);
} // namespace Cpu
} // namespace Extensions
} // namespace InferenceEngine
|