4
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
5
* Licensed under the Apache License, Version 2.0 (the "License"); you may
6
* not use this file except in compliance with the License. You may obtain
7
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
8
* Unless required by applicable law or agreed to in writing, software
9
* distributed under the License is distributed on an "AS IS" BASIS,
10
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
* or implied. See the License for the specific language governing
12
* permissions and limitations under the License. Algorithms used by
13
* this code may be covered by patents by HP, Microsoft, or other parties.
20
#ifndef __PRIMTEST_H_INCLUDED__
21
#define __PRIMTEST_H_INCLUDED__
25
#include <winpr/wtypes.h>
31
#include <freerdp/primitives.h>
32
#include <winpr/platform.h>
39
#define BLOCK_ALIGNMENT 16
41
#define ALIGN(x) x __attribute((aligned(BLOCK_ALIGNMENT)))
42
#define POSSIBLY_UNUSED(x) x __attribute((unused))
44
/* TODO: Someone needs to finish this for non-GNU C */
46
#define POSSIBLY_UNUSED(x) x
48
#define ABS(_x_) ((_x_) < 0 ? (-(_x_)) : (_x_))
49
#define MAX_TEST_SIZE 4096
51
extern int test_sizes[];
52
#define NUM_TEST_SIZES 10
54
extern void get_random_data(void *buffer, size_t size);
63
extern int test_copy8u_func(void);
64
extern int test_copy8u_speed(void);
66
extern int test_set8u_func(void);
67
extern int test_set32s_func(void);
68
extern int test_set32u_func(void);
69
extern int test_set8u_speed(void);
70
extern int test_set32s_speed(void);
71
extern int test_set32u_speed(void);
73
extern int test_sign16s_func(void);
74
extern int test_sign16s_speed(void);
76
extern int test_add16s_func(void);
77
extern int test_add16s_speed(void);
79
extern int test_lShift_16s_func(void);
80
extern int test_lShift_16u_func(void);
81
extern int test_rShift_16s_func(void);
82
extern int test_rShift_16u_func(void);
83
extern int test_lShift_16s_speed(void);
84
extern int test_lShift_16u_speed(void);
85
extern int test_rShift_16s_speed(void);
86
extern int test_rShift_16u_speed(void);
88
extern int test_RGBToRGB_16s8u_P3AC4R_func(void);
89
extern int test_RGBToRGB_16s8u_P3AC4R_speed(void);
90
extern int test_yCbCrToRGB_16s16s_P3P3_func(void);
91
extern int test_yCbCrToRGB_16s16s_P3P3_speed(void);
93
extern int test_alphaComp_func(void);
94
extern int test_alphaComp_speed(void);
96
extern int test_and_32u_func(void);
97
extern int test_and_32u_speed(void);
98
extern int test_or_32u_func(void);
99
extern int test_or_32u_speed(void);
101
/* Since so much of this code is repeated, define a macro to build
102
* functions to do speed tests.
105
#define SIMD_TYPE "Neon"
107
#define SIMD_TYPE "SSE"
110
#define DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_) \
112
for (s=0; s<num_sizes; ++s) \
116
int size = size_array[s]; \
118
iter = iterations/size; \
119
sprintf(label, "%s-%-4d", oplabel, size); \
120
MEASURE_TIMED(label, iter, test_time, resultNormal[s], \
125
#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(_M_ARM) && defined(WITH_NEON))
126
#define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \
128
for (s=0; s<num_sizes; ++s) \
132
int size = size_array[s]; \
134
iter = iterations/size; \
135
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
136
MEASURE_TIMED(label, iter, test_time, resultOpt[s], \
141
#define DO_OPT_MEASUREMENTS(_funcSSE_, _prework_)
144
#if defined(_M_IX86_AMD64) && defined(WITH_IPP)
145
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_) \
147
for (s=0; s<num_sizes; ++s) \
151
int size = size_array[s]; \
153
iter = iterations/size; \
154
sprintf(label, "IPP-%s-%-4d", oplabel, size); \
155
MEASURE_TIMED(label, iter, test_time, resultIPP[s], \
160
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_)
163
#define PRIM_NOP do {} while (0)
164
/* ------------------------------------------------------------------------- */
165
#define STD_SPEED_TEST( \
166
_name_, _srctype_, _dsttype_, _prework_, \
167
_doNormal_, _funcNormal_, \
168
_doOpt_, _funcOpt_, _flagOpt_, _flagExt_, \
169
_doIPP_, _funcIPP_) \
170
static void _name_( \
171
const char *oplabel, const char *type, \
172
const _srctype_ *src1, const _srctype_ *src2, _srctype_ constant, \
174
const int *size_array, int num_sizes, \
175
int iterations, float test_time) \
178
float *resultNormal, *resultOpt, *resultIPP; \
179
resultNormal = (float *) calloc(num_sizes, sizeof(float)); \
180
resultOpt = (float *) calloc(num_sizes, sizeof(float)); \
181
resultIPP = (float *) calloc(num_sizes, sizeof(float)); \
182
printf("******************** %s %s ******************\n", \
184
if (_doNormal_) { DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_); } \
189
if (IsProcessorFeaturePresentEx(_flagOpt_)) \
191
DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
196
if (IsProcessorFeaturePresent(_flagOpt_)) \
198
DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
202
if (_doIPP_) { DO_IPP_MEASUREMENTS(_funcIPP_, _prework_); } \
203
printf("----------------------- SUMMARY ----------------------------\n"); \
204
printf("%8s: %15s %15s %5s %15s %5s\n", \
205
"size", "general", SIMD_TYPE, "%", "IPP", "%"); \
206
for (s=0; s<num_sizes; ++s) \
208
char sN[32], sSN[32], sSNp[8], sIPP[32], sIPPp[8]; \
209
strcpy(sN, "N/A"); strcpy(sSN, "N/A"); strcpy(sSNp, "N/A"); \
210
strcpy(sIPP, "N/A"); strcpy(sIPPp, "N/A"); \
211
if (resultNormal[s] > 0.0) _floatprint(resultNormal[s], sN); \
212
if (resultOpt[s] > 0.0) \
214
_floatprint(resultOpt[s], sSN); \
215
if (resultNormal[s] > 0.0) \
217
sprintf(sSNp, "%d%%", \
218
(int) (resultOpt[s] / resultNormal[s] * 100.0 + 0.5)); \
221
if (resultIPP[s] > 0.0) \
223
_floatprint(resultIPP[s], sIPP); \
224
if (resultNormal[s] > 0.0) \
226
sprintf(sIPPp, "%d%%", \
227
(int) (resultIPP[s] / resultNormal[s] * 100.0 + 0.5)); \
230
printf("%8d: %15s %15s %5s %15s %5s\n", \
231
size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \
233
free(resultNormal); free(resultOpt); free(resultIPP); \
236
#endif // !__PRIMTEST_H_INCLUDED__