31
by Ricardo Salveti de Araujo
* dont_always_use_sse2_ssse3_instructions_all_archs.patch: |
1 |
//
|
2 |
// ZoneMinder General Utility Functions, $Date$, $Revision$
|
|
3 |
// Copyright (C) 2001-2008 Philip Coombes
|
|
4 |
//
|
|
5 |
// This program is free software; you can redistribute it and/or
|
|
6 |
// modify it under the terms of the GNU General Public License
|
|
7 |
// as published by the Free Software Foundation; either version 2
|
|
8 |
// of the License, or (at your option) any later version.
|
|
9 |
//
|
|
10 |
// This program is distributed in the hope that it will be useful,
|
|
11 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 |
// GNU General Public License for more details.
|
|
14 |
//
|
|
15 |
// You should have received a copy of the GNU General Public License
|
|
16 |
// along with this program; if not, write to the Free Software
|
|
17 |
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
18 |
//
|
|
19 |
||
20 |
//#include "zm_logger.h"
|
|
21 |
#include "zm.h" |
|
22 |
#include "zm_utils.h" |
|
23 |
||
24 |
#include <string.h> |
|
25 |
#include <stdio.h> |
|
26 |
#include <stdarg.h> |
|
27 |
||
28 |
unsigned int sseversion = 0; |
|
29 |
||
30 |
const std::string stringtf( const char *format, ... ) |
|
31 |
{
|
|
32 |
va_list ap; |
|
33 |
char tempBuffer[8192]; |
|
34 |
std::string tempString; |
|
35 |
||
36 |
va_start(ap, format ); |
|
37 |
vsnprintf( tempBuffer, sizeof(tempBuffer), format , ap ); |
|
38 |
va_end(ap); |
|
39 |
||
40 |
tempString = tempBuffer; |
|
41 |
||
42 |
return( tempString ); |
|
43 |
}
|
|
44 |
||
45 |
const std::string stringtf( const std::string &format, ... ) |
|
46 |
{
|
|
47 |
va_list ap; |
|
48 |
char tempBuffer[8192]; |
|
49 |
std::string tempString; |
|
50 |
||
51 |
va_start(ap, format ); |
|
52 |
vsnprintf( tempBuffer, sizeof(tempBuffer), format.c_str() , ap ); |
|
53 |
va_end(ap); |
|
54 |
||
55 |
tempString = tempBuffer; |
|
56 |
||
57 |
return( tempString ); |
|
58 |
}
|
|
59 |
||
60 |
bool startsWith( const std::string &haystack, const std::string &needle ) |
|
61 |
{
|
|
62 |
return( haystack.substr( 0, needle.length() ) == needle ); |
|
63 |
}
|
|
64 |
||
65 |
StringVector split( const std::string &string, const std::string chars, int limit ) |
|
66 |
{
|
|
67 |
StringVector stringVector; |
|
68 |
std::string tempString = string; |
|
69 |
std::string::size_type startIndex = 0; |
|
70 |
std::string::size_type endIndex = 0; |
|
71 |
||
72 |
//Info( "Looking for '%s' in '%s', limit %d", chars.c_str(), string.c_str(), limit );
|
|
73 |
do
|
|
74 |
{
|
|
75 |
// Find delimiters
|
|
76 |
endIndex = string.find_first_of( chars, startIndex ); |
|
77 |
//Info( "Got endIndex at %d", endIndex );
|
|
78 |
if ( endIndex > 0 ) |
|
79 |
{
|
|
80 |
//Info( "Adding '%s'", string.substr( startIndex, endIndex-startIndex ).c_str() );
|
|
81 |
stringVector.push_back( string.substr( startIndex, endIndex-startIndex ) ); |
|
82 |
}
|
|
83 |
if ( endIndex == std::string::npos ) |
|
84 |
break; |
|
85 |
// Find non-delimiters
|
|
86 |
startIndex = tempString.find_first_not_of( chars, endIndex ); |
|
87 |
if ( limit && (stringVector.size() == (unsigned int)(limit-1)) ) |
|
88 |
{
|
|
89 |
stringVector.push_back( string.substr( startIndex ) ); |
|
90 |
break; |
|
91 |
}
|
|
92 |
//Info( "Got new startIndex at %d", startIndex );
|
|
93 |
} while ( startIndex != std::string::npos ); |
|
94 |
//Info( "Finished with %d strings", stringVector.size() );
|
|
95 |
||
96 |
return( stringVector ); |
|
97 |
}
|
|
98 |
||
99 |
const std::string base64Encode( const std::string &inString ) |
|
100 |
{
|
|
101 |
static char base64_table[64] = { '\0' }; |
|
102 |
||
103 |
if ( !base64_table[0] ) |
|
104 |
{
|
|
105 |
int i = 0; |
|
106 |
for ( char c = 'A'; c <= 'Z'; c++ ) |
|
107 |
base64_table[i++] = c; |
|
108 |
for ( char c = 'a'; c <= 'z'; c++ ) |
|
109 |
base64_table[i++] = c; |
|
110 |
for ( char c = '0'; c <= '9'; c++ ) |
|
111 |
base64_table[i++] = c; |
|
112 |
base64_table[i++] = '+'; |
|
113 |
base64_table[i++] = '/'; |
|
114 |
}
|
|
115 |
||
116 |
std::string outString; |
|
117 |
outString.reserve( 2 * inString.size() ); |
|
118 |
||
119 |
const char *inPtr = inString.c_str(); |
|
120 |
while( *inPtr ) |
|
121 |
{
|
|
122 |
unsigned char selection = *inPtr >> 2; |
|
123 |
unsigned char remainder = (*inPtr++ & 0x03) << 4; |
|
124 |
outString += base64_table[selection]; |
|
125 |
||
126 |
if ( *inPtr ) |
|
127 |
{
|
|
128 |
selection = remainder | (*inPtr >> 4); |
|
129 |
remainder = (*inPtr++ & 0x0f) << 2; |
|
130 |
outString += base64_table[selection]; |
|
131 |
||
132 |
if ( *inPtr ) |
|
133 |
{
|
|
134 |
selection = remainder | (*inPtr >> 6); |
|
135 |
outString += base64_table[selection]; |
|
136 |
selection = (*inPtr++ & 0x3f); |
|
137 |
outString += base64_table[selection]; |
|
138 |
}
|
|
139 |
else
|
|
140 |
{
|
|
141 |
outString += base64_table[remainder]; |
|
142 |
outString += '='; |
|
143 |
}
|
|
144 |
}
|
|
145 |
else
|
|
146 |
{
|
|
147 |
outString += base64_table[remainder]; |
|
148 |
outString += '='; |
|
149 |
outString += '='; |
|
150 |
}
|
|
151 |
}
|
|
152 |
return( outString ); |
|
153 |
}
|
|
154 |
||
155 |
/* Sets sse_version */
|
|
156 |
void ssedetect() { |
|
157 |
#if (defined(__i386__) || defined(__x86_64__))
|
|
158 |
/* x86 or x86-64 processor */
|
|
159 |
uint32_t r_edx, r_ecx; |
|
160 |
||
161 |
__asm__ __volatile__( |
|
162 |
"mov $0x1,%%eax\n\t" |
|
163 |
"cpuid\n\t" |
|
164 |
: "=d" (r_edx), "=c" (r_ecx) |
|
165 |
:
|
|
166 |
: "%eax", "%ebx" |
|
167 |
);
|
|
168 |
||
169 |
if (r_ecx & 0x00000200) { |
|
170 |
sseversion = 35; /* SSSE3 */ |
|
171 |
Debug(1,"Detected a x86\\x86-64 processor with SSSE3"); |
|
172 |
} else if (r_ecx & 0x00000001) { |
|
173 |
sseversion = 30; /* SSE3 */ |
|
174 |
Debug(1,"Detected a x86\\x86-64 processor with SSE3"); |
|
175 |
} else if (r_edx & 0x04000000) { |
|
176 |
sseversion = 20; /* SSE2 */ |
|
177 |
Debug(1,"Detected a x86\\x86-64 processor with SSE2"); |
|
178 |
} else if (r_edx & 0x02000000) { |
|
179 |
sseversion = 10; /* SSE */ |
|
180 |
Debug(1,"Detected a x86\\x86-64 processor with SSE"); |
|
181 |
} else { |
|
182 |
sseversion = 0; |
|
183 |
Debug(1,"Detected a x86\\x86-64 processor"); |
|
184 |
}
|
|
185 |
||
186 |
#else
|
|
187 |
/* Non x86 or x86-64 processor, SSE2 is not available */
|
|
188 |
Debug(1,"Detected a non x86\\x86-64 processor"); |
|
189 |
sseversion = 0; |
|
190 |
#endif
|
|
191 |
}
|
|
192 |
||
193 |
/* SSE2 aligned memory copy. Useful for big copying of aligned memory like image buffers in ZM */
|
|
194 |
/* For platforms without SSE2 we will use standard x86 asm memcpy or glibc's memcpy() */
|
|
195 |
__attribute__((noinline,__target__("sse2"))) void* sse2_aligned_memcpy(void* dest, const void* src, size_t bytes) { |
|
196 |
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
|
197 |
if(bytes > 128) { |
|
198 |
unsigned int remainder = bytes % 128; |
|
199 |
const uint8_t* lastsrc = (uint8_t*)src + (bytes - remainder); |
|
200 |
||
201 |
__asm__ __volatile__( |
|
202 |
"sse2_copy_iter:\n\t" |
|
203 |
"movdqa (%0),%%xmm0\n\t" |
|
204 |
"movdqa 0x10(%0),%%xmm1\n\t" |
|
205 |
"movdqa 0x20(%0),%%xmm2\n\t" |
|
206 |
"movdqa 0x30(%0),%%xmm3\n\t" |
|
207 |
"movdqa 0x40(%0),%%xmm4\n\t" |
|
208 |
"movdqa 0x50(%0),%%xmm5\n\t" |
|
209 |
"movdqa 0x60(%0),%%xmm6\n\t" |
|
210 |
"movdqa 0x70(%0),%%xmm7\n\t" |
|
211 |
"movntdq %%xmm0,(%1)\n\t" |
|
212 |
"movntdq %%xmm1,0x10(%1)\n\t" |
|
213 |
"movntdq %%xmm2,0x20(%1)\n\t" |
|
214 |
"movntdq %%xmm3,0x30(%1)\n\t" |
|
215 |
"movntdq %%xmm4,0x40(%1)\n\t" |
|
216 |
"movntdq %%xmm5,0x50(%1)\n\t" |
|
217 |
"movntdq %%xmm6,0x60(%1)\n\t" |
|
218 |
"movntdq %%xmm7,0x70(%1)\n\t" |
|
219 |
"add $0x80, %0\n\t" |
|
220 |
"add $0x80, %1\n\t" |
|
221 |
"cmp %2, %0\n\t" |
|
222 |
"jb sse2_copy_iter\n\t" |
|
223 |
"test %3, %3\n\t" |
|
224 |
"jz sse2_copy_finish\n\t" |
|
225 |
"cld\n\t" |
|
226 |
"rep movsb\n\t" |
|
227 |
"sse2_copy_finish:\n\t" |
|
228 |
:
|
|
229 |
: "S" (src), "D" (dest), "r" (lastsrc), "c" (remainder) |
|
230 |
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory" |
|
231 |
);
|
|
232 |
||
233 |
} else { |
|
234 |
/* Standard memcpy */
|
|
235 |
__asm__ __volatile__("cld; rep movsb" :: "S"(src), "D"(dest), "c"(bytes) : "cc", "memory"); |
|
236 |
}
|
|
237 |
#else
|
|
238 |
/* Non x86\x86-64 platform, use memcpy */
|
|
239 |
memcpy(dest,src,bytes); |
|
240 |
#endif
|
|
241 |
return dest; |
|
242 |
}
|
|
243 |
||
244 |
void timespec_diff(struct timespec *start, struct timespec *end, struct timespec *diff) { |
|
245 |
if (((end->tv_nsec)-(start->tv_nsec))<0) { |
|
246 |
diff->tv_sec = end->tv_sec-start->tv_sec-1; |
|
247 |
diff->tv_nsec = 1000000000+end->tv_nsec-start->tv_nsec; |
|
248 |
} else { |
|
249 |
diff->tv_sec = end->tv_sec-start->tv_sec; |
|
250 |
diff->tv_nsec = end->tv_nsec-start->tv_nsec; |
|
251 |
}
|
|
252 |
}
|
|
253 |