1
/*------------------------------------------------------------------------------
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
4
* Distributable under the terms of either the Apache License (Version 2.0) or
5
* the GNU Lesser General Public License, as specified in the COPYING file.
6
------------------------------------------------------------------------------*/
7
#include "CLucene/_ApiHeader.h"
13
#if defined(_CL_HAVE_SYS_TIME_H)
14
# include <sys/time.h>
15
#elif defined(_CL_HAVE_TIME_H)
18
#ifdef _CL_HAVE_SYS_TIMEB_H
19
#include <sys/timeb.h>
22
#if defined(_CL_HAVE_SYS_STAT_H)
25
#ifdef _CL_HAVE_STRINGS_H
28
#ifdef _CL_HAVE_UNISTD_H
34
#include "CLucene/util/dirent.h" //if we have dirent, then the native one will be used
38
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
41
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
43
# define SET_BINARY_MODE(file)
45
#define CL_Z_DEFAULT_CHUNK 1024
48
#ifdef _CL_HAVE_FUNCTION_SLEEP
49
//don't ignore windows.h... breaks mingw32 in some cases. Define Sleep instead
50
extern "C" __declspec(dllimport) void __stdcall Sleep(_cl_dword_t);
55
size_t Misc::ahashCode(const char* str){
56
// Compute the hash code using a local variable to be reentrant.
59
hashCode = hashCode * 31 + *str++;
62
size_t Misc::ahashCode(const char* str, size_t len){
63
// Compute the hash code using a local variable to be reentrant.
65
for (size_t i = 0; i<len; i++)
66
hashCode = hashCode * 31 + *str++;
70
int64_t Misc::filelength(int filehandle)
72
struct cl_stat_t info;
73
if (fileHandleStat(filehandle, &info) == -1)
79
void Misc::sleep(const int ms){
80
#if defined(_CL_HAVE_FUNCTION_USLEEP)
81
usleep(ms*1000);//expects microseconds
82
#elif defined(SLEEPFUNCTION)
85
#error no sleep function???
91
size_t Misc::whashCode(const wchar_t* str){
92
// Compute the hash code using a local variable to be reentrant.
95
hashCode = hashCode * 31 + *str++;
98
size_t Misc::whashCode(const wchar_t* str, size_t len){
99
// Compute the hash code using a local variable to be reentrant.
101
for (size_t i = 0; i<len; i++)
102
hashCode = hashCode * 31 + *str++;
106
std::string Misc::toString(const TCHAR* s, int32_t len){
107
if ( s == NULL || len == 0 ) return "";
108
if ( len < 0 ) len = _tcslen(s);
109
char* buf = _CL_NEWARRAY(char,len+1);
110
STRCPY_WtoA(buf,s,len+1);
112
_CLDELETE_CaARRAY(buf);
116
char* Misc::_wideToChar(const wchar_t* s){
117
size_t len = _tcslen(s);
118
char* msg=_CL_NEWARRAY(char,len+1);
119
_cpywideToChar( s,msg,len+1 );
122
wchar_t* Misc::_charToWide(const char* s){
123
size_t len = strlen(s);
124
wchar_t* msg = _CL_NEWARRAY(wchar_t,len+1);
125
_cpycharToWide(s,msg,len+1);
129
void Misc::_cpywideToChar(const wchar_t* s, char* d, size_t len){
130
size_t sLen = wcslen(s);
131
for ( uint32_t i=0;i<len&&i<sLen+1;i++ )
132
d[i] = LUCENE_OOR_CHAR(s[i]);
134
void Misc::_cpycharToWide(const char* s, wchar_t* d, size_t len){
135
size_t sLen = strlen(s);
136
for ( uint32_t i=0;i<len&&i<sLen+1;i++ )
142
uint64_t Misc::currentTimeMillis() {
143
#ifndef _CL_HAVE_FUNCTION_GETTIMEOFDAY
144
struct _timeb tstruct;
147
return (((uint64_t) tstruct.time) * 1000) + tstruct.millitm;
149
struct timeval tstruct;
150
if (gettimeofday(&tstruct, NULL) < 0) {
154
return (((uint64_t) tstruct.tv_sec) * 1000) + tstruct.tv_usec / 1000;
159
const TCHAR* Misc::replace_all( const TCHAR* val, const TCHAR* srch, const TCHAR* repl )
162
size_t repLen = _tcslen(repl);
163
size_t srchLen = _tcslen(srch);
164
size_t srcLen = _tcslen(val);
166
const TCHAR* pos = val;
167
while( (pos = _tcsstr(pos+1, srch)) != NULL ) {
171
size_t lenNew = (srcLen - (srchLen * cnt)) + (repLen * cnt);
172
TCHAR* ret = _CL_NEWARRAY(TCHAR,lenNew+1);
179
TCHAR* cur = ret; //position of return buffer
180
const TCHAR* lst = val; //position of value buffer
181
pos = val; //searched position of value buffer
182
while( (pos = _tcsstr(pos+1,srch)) != NULL ) {
183
_tcsncpy(cur,lst,pos-lst); //copy till current
185
lst = pos; //move val position
187
_tcscpy( cur,repl); //copy replace
188
cur += repLen; //move return buffer position
189
lst += srchLen; //move last value buffer position
191
_tcscpy(cur, lst ); //copy rest of buffer
197
bool Misc::dir_Exists(const char* path){
198
if ( !path || !*path )
200
struct cl_stat_t buf;
201
int32_t ret = fileStat(path,&buf);
205
int64_t Misc::file_Size(const char* path){
206
struct cl_stat_t buf;
207
if ( fileStat(path,&buf) == 0 )
213
int Misc::file_Unlink(const char* path, int32_t maxAttempts )
217
if( ! path || ! * path )
220
if( maxAttempts == 0 )
223
while( maxAttempts != 0 )
225
if( _unlink( path ) != 0 )
231
if( ! Misc::dir_Exists( path ) )
234
if( ++i > 50 ) // if it still doesn't show up, then we do some sleeping for the last 50ms
238
if( maxAttempts > 0 )
247
TCHAR* Misc::join ( const TCHAR* a, const TCHAR* b, const TCHAR* c, const TCHAR* d,const TCHAR* e,const TCHAR* f ) {
248
#define LEN(x) (x == NULL ? 0 : _tcslen(x))
249
const size_t totalLen =
250
LEN(a) + LEN(b) + LEN(c) + LEN(d) + LEN(e) + LEN(f)
251
+ sizeof(TCHAR); /* Space for terminator. */
253
TCHAR* buf = _CL_NEWARRAY(TCHAR,totalLen);
255
if ( a != NULL) _tcscat(buf,a);
256
if ( b != NULL) _tcscat(buf,b);
257
if ( c != NULL) _tcscat(buf,c);
258
if ( d != NULL) _tcscat(buf,d);
259
if ( e != NULL) _tcscat(buf,e);
260
if ( f != NULL) _tcscat(buf,f);
264
char* Misc::ajoin ( const char* a, const char* b, const char* c, const char* d,const char* e,const char* f ) {
265
#define aLEN(x) (x == NULL ? 0 : strlen(x))
266
const size_t totalLen =
267
aLEN(a) + aLEN(b) + aLEN(c) + aLEN(d) + aLEN(e) + aLEN(f)
268
+ sizeof(char); /* Space for terminator. */
270
char* buf = _CL_NEWARRAY(char,totalLen);
272
if ( a != NULL) strcat(buf,a);
273
if ( b != NULL) strcat(buf,b);
274
if ( c != NULL) strcat(buf,c);
275
if ( d != NULL) strcat(buf,d);
276
if ( e != NULL) strcat(buf,e);
277
if ( f != NULL) strcat(buf,f);
282
bool Misc::priv_isDotDir( const TCHAR* name )
284
if( name[0] == '\0' ) {
287
if( name[0] == '.' && name[1] == '\0' ) {
290
if( name[1] == '\0' ) {
293
if( name[0] == '.' && name[1] == '.' && name[2] == '\0' ) {
300
//internal static function shared for clucene
301
string Misc::segmentname( const char* segment, const char* ext, const int32_t x ){
302
//Func - Returns an allocated buffer in which it creates a filename by
303
// concatenating segment with ext and x
304
//Pre ext != NULL and holds the extension
305
// x contains a number
306
//Post - A buffer has been instantiated an when x = -1 buffer contains the concatenation of
307
// segment and ext otherwise buffer contains the contentation of segment, ext and x
309
CND_PRECONDITION(ext != NULL, "ext is NULL");
314
_snprintf(buf,10,"%d",x);
315
return string(segment) + ext + buf;
317
return string(segment) + ext;
320
void Misc::segmentname(char* buffer,int32_t bufferLen, const char* Segment, const char* ext, const int32_t x){
321
//Func - Static Method
322
// Creates a filename in buffer by concatenating Segment with ext and x
323
//Pre - buffer != NULL
324
// Segment != NULL and holds the name of the segment
325
// ext != NULL and holds the extension
326
// x contains a number
327
//Post - When x = -1 buffer contains the concatenation of Segment and ext otherwise
328
// buffer contains the contentation of Segment, ext and x
330
CND_PRECONDITION(buffer != NULL, "buffer is NULL");
331
CND_PRECONDITION(Segment != NULL, "Segment is NULL");
332
CND_PRECONDITION(ext != NULL, "ext is NULL");
335
_snprintf(buffer,bufferLen,"%s%s", Segment,ext );
337
_snprintf(buffer,bufferLen,"%s%s%d", Segment,ext,x );
342
int32_t Misc::stringDifference(const TCHAR* s1, const int32_t len1, const TCHAR* s2, const int32_t len2) {
343
int32_t len = len1 < len2 ? len1 : len2;
344
for (int32_t i = 0; i < len; i++)
350
TCHAR* Misc::stringTrim(TCHAR* text) {
352
size_t len = _tcslen(text);
354
for ( i=0;i<len;i++ ){ // find the first non-space character and store it as i
355
if ( ! _istspace(text[i]) )
358
for ( j=len-1; j > i; --j ){ // find the last non-space character and store it as j
359
if ( ! _istspace(text[j]) ) {
364
if (i==0 && j==len-1) // prevent unnecessary copy
371
_tcsncpy(text, text+i, j-i);
378
TCHAR* Misc::wordTrim(TCHAR* text) {
380
size_t len = _tcslen(text);
382
for ( i=0;i<len;i++ ){ // find the first non-space character and store it as i
383
if ( ! _istspace(text[i]) )
386
for ( j=i; j < len; j++ ){ // find the last non-space character and store it as j
387
if ( _istspace(text[j]) ) {
392
if (i == 0 && j==len)
395
if (i==j) // empty string
402
_tcsncpy(text, text+i, j-i);
409
size_t Misc::longToBase( int64_t value, int32_t base, char* retval ) {
410
static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
411
char buf[(sizeof(unsigned long) << 3) + 1];
414
ptr = end = buf + sizeof(buf) - 1;
418
*--ptr = digits[ value % base ];
420
} while ( ptr > buf && value );
422
memcpy( retval, ptr, end - ptr );
428
int64_t Misc::base36ToLong( const char* value ) {
429
char* ptr = (char*)value;
432
while ( *ptr != '\0' ) {
433
lval = isdigit(*ptr) ? ( 36 * lval ) + ( *ptr - '0' ) : ( 36 * lval ) + ( *ptr - 'a' + 10 );
440
bool Misc::listFiles(const char* directory, std::vector<std::string>& files, bool fullPath){
442
DIR* dir = opendir(directory);
443
if ( dir == NULL ) return false;
444
struct dirent* fl = readdir(dir);
445
struct cl_stat_t buf;
447
while ( fl != NULL ){
448
path = string(directory) + "/" + fl->d_name;
449
int32_t ret = fileStat(path.c_str(),&buf);
450
if ( ret==0 && !(buf.st_mode & S_IFDIR) ) {
451
if ( (strcmp(fl->d_name, ".")) && (strcmp(fl->d_name, "..")) ) {
453
files.push_back(path);
455
files.push_back(fl->d_name);
466
std::string Misc::toString(const bool value){
467
return value ? "true" : "false";
469
std::string Misc::toString(_LUCENE_THREADID_TYPE value){
470
static int32_t nextindex = 0;
471
static std::map<_LUCENE_THREADID_TYPE, int32_t> ids;
472
if (ids.find(value) == ids.end()) {
473
ids[value] = nextindex++;
475
return toString(ids[value]);
477
std::string Misc::toString(const int32_t value){
480
_i64tot(value, tbuf, 10);
481
STRCPY_TtoA(buf,tbuf,20);
484
std::string Misc::toString(const int64_t value){
487
_i64tot(value, tbuf, 10);
488
STRCPY_TtoA(buf,tbuf,20);
491
std::string Misc::toString(const float_t value){
493
_snprintf(buf,20,"%0.2f",(double)value);
497
void Misc::zerr(int ret, string& err)
501
err = "error occurred while reading or writing from the zlib streams";
504
err = "invalid compression level";
507
err = "invalid or incomplete deflate data";
510
err = "out of memory";
512
case Z_VERSION_ERROR:
513
err ="zlib version mismatch";
519
/* Compress from file source to file dest until EOF on source.
520
def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
521
allocated for processing, Z_STREAM_ERROR if an invalid compression
522
level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
523
version of the library linked do not match, or Z_ERRNO if there is
524
an error reading or writing the files. */
525
bool Misc::deflate(const uint8_t* in, size_t inlen, std::ostream& dest, string& err, int CHUNK, int level)
530
if ( level == -1 ) level = Z_BEST_COMPRESSION;
531
if ( CHUNK == -1 ) CHUNK = CL_Z_DEFAULT_CHUNK;
532
uint8_t* out = (uint8_t*)malloc(CHUNK);
534
/* allocate deflate state */
535
strm.zalloc = Z_NULL;
537
strm.opaque = Z_NULL;
538
ret = deflateInit(&strm, level);
545
/* compress until end of file */
547
strm.avail_in = inlen;
548
strm.next_in = (uint8_t*)in;
551
/* run deflate() on input until output buffer not full, finish
552
compression if all of source has been read in */
554
strm.avail_out = CHUNK;
556
ret = ::deflate(&strm, flush); /* no bad return value */
557
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
558
have = CHUNK - strm.avail_out;
559
dest.write( (char*)out,have);
561
(void)deflateEnd(&strm);
566
} while (strm.avail_out == 0);
567
assert(strm.avail_in == 0); /* all input will be used */
569
/* done when last data in file processed */
570
} while (flush != Z_FINISH);
571
assert(ret == Z_STREAM_END); /* stream will be complete */
573
/* clean up and return */
574
(void)deflateEnd(&strm);
579
/* Decompress from file source to file dest until stream ends or EOF.
580
inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
581
allocated for processing, Z_DATA_ERROR if the deflate data is
582
invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
583
the version of the library linked do not match, or Z_ERRNO if there
584
is an error reading or writing the files. */
585
bool Misc::inflate(const uint8_t* in, size_t inlen, std::ostream& dest, string& err, int CHUNK)
590
if ( CHUNK == -1 ) CHUNK = CL_Z_DEFAULT_CHUNK;
591
uint8_t* out = (uint8_t*)malloc(CHUNK);
593
/* allocate inflate state */
594
strm.zalloc = Z_NULL;
596
strm.opaque = Z_NULL;
598
strm.next_in = Z_NULL;
599
ret = inflateInit(&strm);
606
/* decompress until deflate stream ends or end of file */
608
strm.avail_in = inlen;
609
if (strm.avail_in == 0)
611
strm.next_in = (uint8_t*)in;
613
/* run inflate() on input until output buffer not full */
615
strm.avail_out = CHUNK;
617
ret = ::inflate(&strm, Z_NO_FLUSH);
618
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
621
ret = Z_DATA_ERROR; /* and fall through */
624
(void)inflateEnd(&strm);
629
have = CHUNK - strm.avail_out;
630
dest.write( (char*)out,have);
632
(void)inflateEnd(&strm);
637
} while (strm.avail_out == 0);
639
/* done when inflate() says it's done */
640
} while (ret != Z_STREAM_END);
642
/* clean up and return */
643
(void)inflateEnd(&strm);
645
if ( ret == Z_STREAM_END )
647
zerr(Z_DATA_ERROR, err);