1
by Ilia Platone
Initial commit |
1 |
// Copyright (C) 2018-2019 Intel Corporation
|
2 |
// SPDX-License-Identifier: Apache-2.0
|
|
3 |
//
|
|
4 |
||
5 |
#include "ext_list.hpp" |
|
6 |
#include "ext_base.hpp" |
|
7 |
||
8 |
#include <cmath> |
|
9 |
#include <string> |
|
10 |
#include <vector> |
|
11 |
#include <cassert> |
|
12 |
#include "ie_parallel.hpp" |
|
13 |
||
14 |
namespace InferenceEngine { |
|
15 |
namespace Extensions { |
|
16 |
namespace Cpu { |
|
17 |
||
18 |
class MathImpl: public ExtLayerBase { |
|
19 |
static float error_function(float x) { |
|
20 |
const float clip_bound = 2.86f; |
|
21 |
// Points clip_bound and -clip_bound are extremums for this polynom
|
|
22 |
// So in order to provide better accuracy comparing to std::erf we have to clip input range
|
|
23 |
if (x > clip_bound) |
|
24 |
return 1; |
|
25 |
if (x < -clip_bound) |
|
26 |
return -1; |
|
27 |
||
28 |
// A polynomial approximation of the error function
|
|
29 |
const float erfNumerator[4] = { 90.0260162353515625f, 2232.00537109375f, |
|
30 |
7003.3251953125f, 55592.30078125f }; |
|
31 |
const float erfDenominator[5] = { 33.56171417236328125f, 521.35797119140625f, |
|
32 |
4594.32373046875f, 22629.0f, 49267.39453125f }; |
|
33 |
float polynom = 9.60497379302978515625f; |
|
34 |
float x2 = x * x; |
|
35 |
for (float c : erfNumerator) { |
|
36 |
polynom = polynom * x2 + c; |
|
37 |
}
|
|
38 |
x *= polynom; |
|
39 |
polynom = 1.0f; |
|
40 |
for (float c : erfDenominator) { |
|
41 |
polynom = polynom * x2 + c; |
|
42 |
}
|
|
43 |
return x / polynom; |
|
44 |
}
|
|
45 |
||
46 |
public: |
|
47 |
explicit MathImpl(const CNNLayer* layer) { |
|
48 |
try { |
|
49 |
if (layer->insData.empty() || layer->outData.empty()) |
|
50 |
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; |
|
51 |
||
52 |
if (layer->insData.size() != 1) |
|
53 |
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; |
|
54 |
||
55 |
if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32) |
|
56 |
THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32 is supported!"; |
|
57 |
||
58 |
if (layer->insData[0].lock()->getTensorDesc().getDims() != layer->outData[0]->getTensorDesc().getDims()) |
|
59 |
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!"; |
|
60 |
||
61 |
alpha = layer->GetParamAsFloat("alpha", 0.0f); |
|
62 |
beta = layer->GetParamAsFloat("beta", 0.0f); |
|
63 |
gamma = layer->GetParamAsFloat("gamma", 0.0f); |
|
64 |
||
65 |
std::string math_func = layer->type; |
|
66 |
if (math_func == "Erf") mathFunction = Math::Erf; |
|
67 |
else if (math_func == "Abs") mathFunction = Math::Abs; |
|
68 |
else if (math_func == "Acos") mathFunction = Math::Acos; |
|
69 |
else if (math_func == "Acosh") mathFunction = Math::Acosh; |
|
70 |
else if (math_func == "Asin") mathFunction = Math::Asin; |
|
71 |
else if (math_func == "Asinh") mathFunction = Math::Asinh; |
|
72 |
else if (math_func == "Atan") mathFunction = Math::Atan; |
|
73 |
else if (math_func == "Atanh") mathFunction = Math::Atanh; |
|
74 |
else if (math_func == "Ceil") mathFunction = Math::Ceil; |
|
75 |
else if (math_func == "Cos") mathFunction = Math::Cos; |
|
76 |
else if (math_func == "Cosh") mathFunction = Math::Cosh; |
|
77 |
else if (math_func == "Floor") mathFunction = Math::Floor; |
|
78 |
else if (math_func == "HardSigmoid") mathFunction = Math::HardSigmoid; |
|
79 |
else if (math_func == "Log") mathFunction = Math::Log; |
|
80 |
else if (math_func == "Neg") mathFunction = Math::Neg; |
|
81 |
else if (math_func == "Reciprocal") mathFunction = Math::Reciprocal; |
|
82 |
else if (math_func == "Selu") mathFunction = Math::Selu; |
|
83 |
else if (math_func == "Sign") mathFunction = Math::Sign; |
|
84 |
else if (math_func == "Sin") mathFunction = Math::Sin; |
|
85 |
else if (math_func == "Sinh") mathFunction = Math::Sinh; |
|
86 |
else if (math_func == "Softplus") mathFunction = Math::Softplus; |
|
87 |
else if (math_func == "Softsign") mathFunction = Math::Softsign; |
|
88 |
else if (math_func == "Tan") mathFunction = Math::Tan; |
|
89 |
else
|
|
90 |
THROW_IE_EXCEPTION << layer->name << " Incorrect Math layer type!"; |
|
91 |
||
92 |
addConfig(layer, { { ConfLayout::PLN, false, 0 } }, { { ConfLayout::PLN, false, 0 } }); |
|
93 |
} catch (InferenceEngine::details::InferenceEngineException &ex) { |
|
94 |
errorMsg = ex.what(); |
|
95 |
}
|
|
96 |
}
|
|
97 |
||
98 |
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override { |
|
99 |
size_t dataSize = outputs[0]->size(); |
|
100 |
const float *src_data = inputs[0]->cbuffer().as<const float *>() + |
|
101 |
inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); |
|
102 |
float* dst_data = outputs[0]->cbuffer().as<float *>() + |
|
103 |
outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); |
|
104 |
||
105 |
switch (mathFunction) { |
|
106 |
case Math::Erf: |
|
107 |
parallel_for(dataSize, [&](size_t i) { |
|
108 |
dst_data[i] = error_function(src_data[i]); |
|
109 |
});
|
|
110 |
break; |
|
111 |
case Math::Abs: |
|
112 |
parallel_for(dataSize, [&](size_t i) { |
|
113 |
dst_data[i] = (std::abs)(src_data[i]); |
|
114 |
});
|
|
115 |
break; |
|
116 |
case Math::Acos: |
|
117 |
parallel_for(dataSize, [&](size_t i) { |
|
118 |
dst_data[i] = acosf(src_data[i]); |
|
119 |
});
|
|
120 |
break; |
|
121 |
case Math::Acosh: |
|
122 |
parallel_for(dataSize, [&](size_t i) { |
|
123 |
dst_data[i] = acoshf(src_data[i]); |
|
124 |
});
|
|
125 |
break; |
|
126 |
case Math::Asin: |
|
127 |
parallel_for(dataSize, [&](size_t i) { |
|
128 |
dst_data[i] = asinf(src_data[i]); |
|
129 |
});
|
|
130 |
break; |
|
131 |
case Math::Asinh: |
|
132 |
parallel_for(dataSize, [&](size_t i) { |
|
133 |
dst_data[i] = asinhf(src_data[i]); |
|
134 |
});
|
|
135 |
break; |
|
136 |
case Math::Atan: |
|
137 |
parallel_for(dataSize, [&](size_t i) { |
|
138 |
dst_data[i] = atanf(src_data[i]); |
|
139 |
});
|
|
140 |
break; |
|
141 |
case Math::Atanh: |
|
142 |
parallel_for(dataSize, [&](size_t i) { |
|
143 |
dst_data[i] = atanhf(src_data[i]); |
|
144 |
});
|
|
145 |
break; |
|
146 |
case Math::Ceil: |
|
147 |
parallel_for(dataSize, [&](size_t i) { |
|
148 |
dst_data[i] = ceilf(src_data[i]); |
|
149 |
});
|
|
150 |
break; |
|
151 |
case Math::Cos: |
|
152 |
parallel_for(dataSize, [&](size_t i) { |
|
153 |
dst_data[i] = cosf(src_data[i]); |
|
154 |
});
|
|
155 |
break; |
|
156 |
case Math::Cosh: |
|
157 |
parallel_for(dataSize, [&](size_t i) { |
|
158 |
dst_data[i] = coshf(src_data[i]); |
|
159 |
});
|
|
160 |
break; |
|
161 |
case Math::Floor: |
|
162 |
parallel_for(dataSize, [&](size_t i) { |
|
163 |
dst_data[i] = floorf(src_data[i]); |
|
164 |
});
|
|
165 |
break; |
|
166 |
case Math::HardSigmoid: |
|
167 |
alpha = (alpha == 0.0f) ? 0.2f : alpha; |
|
168 |
beta = (beta == 0.0f) ? 0.5f : beta; |
|
169 |
parallel_for(dataSize, [&](size_t i) { |
|
170 |
dst_data[i] = (std::max)(0.f, (std::min)(1.f, alpha * src_data[i] + beta)); |
|
171 |
});
|
|
172 |
break; |
|
173 |
case Math::Log: |
|
174 |
parallel_for(dataSize, [&](size_t i) { |
|
175 |
dst_data[i] = logf(src_data[i]); |
|
176 |
});
|
|
177 |
break; |
|
178 |
case Math::Neg: |
|
179 |
parallel_for(dataSize, [&](size_t i) { |
|
180 |
dst_data[i] = -src_data[i]; |
|
181 |
});
|
|
182 |
break; |
|
183 |
case Math::Reciprocal: |
|
184 |
parallel_for(dataSize, [&](size_t i) { |
|
185 |
dst_data[i] = 1.0f / src_data[i]; |
|
186 |
});
|
|
187 |
break; |
|
188 |
case Math::Selu: |
|
189 |
alpha = (alpha == 0.0f) ? 1.67326f : alpha; |
|
190 |
gamma = (gamma == 0.0f) ? 1.0507f : gamma; |
|
191 |
parallel_for(dataSize, [&](size_t i) { |
|
192 |
float x = src_data[i]; |
|
193 |
dst_data[i] = (x > 0.0f) ? (gamma * x) : (gamma * alpha * (exp(x) - 1.0f)); |
|
194 |
});
|
|
195 |
break; |
|
196 |
case Math::Sign: |
|
197 |
parallel_for(dataSize, [&](size_t i) { |
|
198 |
if (src_data[i] > 0.0f) |
|
199 |
dst_data[i] = 1.0f; |
|
200 |
else if (src_data[i] < 0.0f) |
|
201 |
dst_data[i] = -1.0f; |
|
202 |
else
|
|
203 |
dst_data[i] = 0.0f; |
|
204 |
});
|
|
205 |
break; |
|
206 |
case Math::Sin: |
|
207 |
parallel_for(dataSize, [&](size_t i) { |
|
208 |
dst_data[i] = sinf(src_data[i]); |
|
209 |
});
|
|
210 |
break; |
|
211 |
case Math::Sinh: |
|
212 |
parallel_for(dataSize, [&](size_t i) { |
|
213 |
dst_data[i] = sinhf(src_data[i]); |
|
214 |
});
|
|
215 |
break; |
|
216 |
case Math::Softplus: |
|
217 |
parallel_for(dataSize, [&](size_t i) { |
|
218 |
dst_data[i] = logf(expf(src_data[i]) + 1); |
|
219 |
});
|
|
220 |
break; |
|
221 |
case Math::Softsign: |
|
222 |
parallel_for(dataSize, [&](size_t i) { |
|
223 |
float x = src_data[i]; |
|
224 |
dst_data[i] = x / (1.f + (std::abs)(x)); |
|
225 |
});
|
|
226 |
break; |
|
227 |
case Math::Tan: |
|
228 |
parallel_for(dataSize, [&](size_t i) { |
|
229 |
dst_data[i] = tanf(src_data[i]); |
|
230 |
});
|
|
231 |
break; |
|
232 |
default: |
|
233 |
if (resp) { |
|
234 |
std::string errorMsg = "Incorrect Reduce layer type"; |
|
235 |
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); |
|
236 |
}
|
|
237 |
return GENERAL_ERROR; |
|
238 |
}
|
|
239 |
return OK; |
|
240 |
}
|
|
241 |
||
242 |
private: |
|
243 |
enum class Math { |
|
244 |
Abs, |
|
245 |
Acos, |
|
246 |
Acosh, |
|
247 |
Asin, |
|
248 |
Asinh, |
|
249 |
Atan, |
|
250 |
Atanh, |
|
251 |
Ceil, |
|
252 |
Cos, |
|
253 |
Cosh, |
|
254 |
Erf, |
|
255 |
Floor, |
|
256 |
HardSigmoid, |
|
257 |
Log, |
|
258 |
Neg, |
|
259 |
Reciprocal, |
|
260 |
Selu, |
|
261 |
Sign, |
|
262 |
Sin, |
|
263 |
Sinh, |
|
264 |
Softplus, |
|
265 |
Softsign, |
|
266 |
Tan
|
|
267 |
};
|
|
268 |
||
269 |
Math mathFunction = Math::Erf; |
|
270 |
float alpha = 0.0f; |
|
271 |
float beta = 0.0f; |
|
272 |
float gamma = 0.0f; |
|
273 |
};
|
|
274 |
||
275 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Abs); |
|
276 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Acos); |
|
277 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Acosh); |
|
278 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Asin); |
|
279 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Asinh); |
|
280 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Atan); |
|
281 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Atanh); |
|
282 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Ceil); |
|
283 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Cos); |
|
284 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Cosh); |
|
285 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Erf); |
|
286 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Floor); |
|
287 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, HardSigmoid); |
|
288 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Log); |
|
289 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Neg); |
|
290 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Reciprocal); |
|
291 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Selu); |
|
292 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Sign); |
|
293 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Sin); |
|
294 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Sinh); |
|
295 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Softplus); |
|
296 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Softsign); |
|
297 |
REG_FACTORY_FOR(ImplFactory<MathImpl>, Tan); |
|
298 |
||
299 |
} // namespace Cpu |
|
300 |
} // namespace Extensions |
|
301 |
} // namespace InferenceEngine |