4
* Copyright 2007 Houng Jen Yee (PCMan) <pcman.tw@gmail.com>
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22
/* Currently this library is NOT MT-safe */
28
#include "mime-type.h"
29
#include "mime-cache.h"
36
#include <sys/types.h>
42
* Currently, mmap cannot be used because of the limitation of mmap.
43
* When a file is mapped for mime-type sniffing (checking file magic),
44
* they could be deleted during the check and hence result in Bus error.
45
* (Refer to the man page of mmap for detail)
46
* So here I undef HAVE_MMAP to disable the implementation using mmap.
54
/* max extent used to checking text files */
55
#define TEXT_MAX_EXTENT 512
57
const char xdg_mime_type_unknown[] = "application/octet-stream";
58
const char xdg_mime_type_directory[] = "inode/directory";
59
const char xdg_mime_type_executable[] = "application/x-executable";
60
const char xdg_mime_type_plain_text[] = "text/plain";
62
static MimeCache** caches = NULL;
63
static guint n_caches = 0;
64
guint32 mime_cache_max_extent = 0;
66
/* allocated buffer used for mime magic checking to
67
prevent frequent memory allocation */
68
static char* mime_magic_buf = NULL;
69
/* for MT safety, the buffer should be locked */
70
G_LOCK_DEFINE_STATIC(mime_magic_buf);
72
/* load all mime.cache files on the system,
73
* including /usr/share/mime/mime.cache,
74
* /usr/local/share/mime/mime.cache,
75
* and $HOME/.local/share/mime/mime.cache. */
76
static void mime_cache_load_all();
78
/* free all mime.cache files on the system */
79
static void mime_cache_free_all();
81
static gboolean mime_type_is_data_plain_text( const char* data, int len );
84
* Get mime-type of the specified file (quick, but less accurate):
85
* Mime-type of the file is determined by cheking the filename only.
86
* If statbuf != NULL, it will be used to determine if the file is a directory.
88
const char* mime_type_get_by_filename( const char* filename, struct stat* statbuf )
90
const char* type = NULL, *suffix_pos = NULL, *prev_suffix_pos = (const char*)-1;
94
if( G_UNLIKELY( statbuf && S_ISDIR( statbuf->st_mode ) ) )
95
return XDG_MIME_TYPE_DIRECTORY;
97
for( i = 0; ! type && i < n_caches; ++i )
100
type = mime_cache_lookup_literal( cache, filename );
101
if( G_LIKELY( ! type ) )
103
const char* _type = mime_cache_lookup_suffix( cache, filename, &suffix_pos );
104
if( _type && suffix_pos < prev_suffix_pos )
107
prev_suffix_pos = suffix_pos;
112
if( G_UNLIKELY( ! type ) ) /* glob matching */
114
int max_glob_len = 0, glob_len = 0;
115
for( i = 0; ! type && i < n_caches; ++i )
118
const char* matched_type;
119
matched_type = mime_cache_lookup_glob( cache, filename, &glob_len );
120
/* according to the mime.cache 1.0 spec, we should use the longest glob matched. */
121
if( matched_type && glob_len > max_glob_len )
124
max_glob_len = glob_len;
130
return type && *type ? type : XDG_MIME_TYPE_UNKNOWN;
134
* Get mime-type info of the specified file (slow, but more accurate):
135
* To determine the mime-type of the file, mime_type_get_by_filename() is
136
* tried first. If the mime-type couldn't be determined, the content of
137
* the file will be checked, which is much more time-consuming.
138
* If statbuf is not NULL, it will be used to determine if the file is a directory,
139
* or if the file is an executable file; otherwise, the function will call stat()
140
* to gather this info itself. So if you already have stat info of the file,
141
* pass it to the function to prevent checking the file stat again.
142
* If you have basename of the file, pass it to the function can improve the
143
* efifciency, too. Otherwise, the function will try to get the basename of
144
* the specified file again.
146
const char* mime_type_get_by_file( const char* filepath, struct stat* statbuf, const char* basename )
149
struct stat _statbuf;
151
if( statbuf == NULL || G_UNLIKELY( S_ISLNK(statbuf->st_mode) ) )
154
if( stat ( filepath, statbuf ) == -1 )
155
return XDG_MIME_TYPE_UNKNOWN;
158
if( S_ISDIR( statbuf->st_mode ) )
159
return XDG_MIME_TYPE_DIRECTORY;
161
if( basename == NULL )
163
basename = g_utf8_strrchr( filepath, -1, '/' );
164
if( G_LIKELY( basename ) )
170
if( G_LIKELY(basename) )
172
type = mime_type_get_by_filename( basename, statbuf );
173
if( G_LIKELY( strcmp( type, XDG_MIME_TYPE_UNKNOWN ) ) )
178
if( G_LIKELY(statbuf->st_size > 0) )
183
/* Open the file and map it into memory */
184
fd = open ( filepath, O_RDONLY, 0 );
187
int len = mime_cache_max_extent < statbuf->st_size ? mime_cache_max_extent : statbuf->st_size;
189
data = (char*) mmap( NULL, len, PROT_READ, MAP_SHARED, fd, 0 );
192
* FIXME: Can g_alloca() be used here? It's very fast, but is it safe?
193
* Actually, we can allocate a block of memory with the size of mime_cache_max_extent,
194
* then we don't need to do dynamic allocation/free every time, but multi-threading
195
* will be a nightmare, so...
197
/* try to lock the common buffer */
198
if( G_LIKELY( G_TRYLOCK( mime_magic_buf ) ) )
199
data = mime_magic_buf;
200
else /* the buffer is in use, allocate new one */
201
data = g_malloc( len );
203
len = read( fd, data, len );
205
if( G_UNLIKELY( len == -1 ) )
207
if( G_LIKELY( data == mime_magic_buf ) )
208
G_UNLOCK( mime_magic_buf );
214
if( data != (void*)-1 )
217
for( i = 0; ! type && i < n_caches; ++i )
218
type = mime_cache_lookup_magic( caches[i], data, len );
220
/* Check for executable file */
221
if( ! type && g_file_test( filepath, G_FILE_TEST_IS_EXECUTABLE ) )
222
type = XDG_MIME_TYPE_EXECUTABLE;
224
/* fallback: check for plain text */
227
if( mime_type_is_data_plain_text( data, len > TEXT_MAX_EXTENT ? TEXT_MAX_EXTENT : len ) )
228
type = XDG_MIME_TYPE_PLAIN_TEXT;
232
munmap ( (char*)data, len );
234
if( G_LIKELY( data == mime_magic_buf ) )
235
G_UNLOCK( mime_magic_buf ); /* unlock the common buffer */
236
else /* we use our own buffer */
245
/* empty file can be viewed as text file */
246
type = XDG_MIME_TYPE_PLAIN_TEXT;
248
return type && *type ? type : XDG_MIME_TYPE_UNKNOWN;
251
/* Get the name of mime-type */
252
/*const char* mime_type_get_type( MimeInfo* info )
254
return info->type_name;
258
static char* parse_xml_desc( const char* buf, size_t len, const char* locale )
260
const char *buf_end = buf + len;
261
const char *comment = NULL, *comment_end, *eng_comment;
262
size_t eng_comment_len = 0, comment_len = 0;
264
static const char end_comment_tag[]="</comment>";
266
eng_comment = g_strstr_len( buf, len, "<comment>" ); /* default English comment */
267
if( G_UNLIKELY( ! eng_comment ) ) /* This xml file is invalid */
271
comment_end = g_strstr_len( eng_comment, len, end_comment_tag ); /* find </comment> */
272
if( G_UNLIKELY( ! comment_end ) )
274
eng_comment_len = comment_end - eng_comment;
276
if( G_LIKELY( locale ) )
278
int target_len = g_snprintf( target, 64, "<comment xml:lang=\"%s\">", locale);
279
buf = comment_end + 10;
280
len = (buf_end - buf);
281
if( G_LIKELY( ( comment = g_strstr_len( buf, len, target ) ) ) )
284
comment += target_len;
285
comment_end = g_strstr_len( comment, len, end_comment_tag ); /* find </comment> */
286
if( G_LIKELY( comment_end ) )
287
comment_len = (comment_end - comment);
292
if( G_LIKELY( comment ) )
293
return g_strndup( comment, comment_len );
294
return g_strndup( eng_comment, eng_comment_len );
297
static char* _mime_type_get_desc( const char* type, const char* data_dir, const char* locale )
301
char *buffer, *_locale, *desc;
302
char file_path[ 256 ];
304
/* FIXME: This path shouldn't be hard-coded. */
305
g_snprintf( file_path, 256, "%s/mime/%s.xml", data_dir, type );
307
fd = open ( file_path, O_RDONLY, 0 );
308
if ( G_UNLIKELY( fd == -1 ) )
310
if( G_UNLIKELY( fstat ( fd, &statbuf ) == -1 ) )
316
buffer = (char*)mmap( NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0 );
318
buffer = (char*)g_malloc( statbuf.st_size );
319
if( read( fd, buffer, statbuf.st_size ) == -1 )
326
if ( G_UNLIKELY( buffer == (void*)-1 ) )
332
const char* const * langs = g_get_language_names();
333
char* dot = strchr( langs[0], '.' );
335
locale = _locale = g_strndup( langs[0], (size_t)(dot - langs[0]) );
339
desc = parse_xml_desc( buffer, statbuf.st_size, locale );
343
munmap ( buffer, statbuf.st_size );
351
* Get human-readable description of the mime-type
352
* If locale is NULL, current locale will be used.
353
* The returned string should be freed when no longer used.
355
char* mime_type_get_desc( const char* type, const char* locale )
358
const gchar* const * dir;
360
dir = g_get_system_data_dirs();
363
desc = _mime_type_get_desc( type, *dir, locale );
369
* FIXME: According to specs on freedesktop.org, user_data_dir has
370
* higher priority than system_data_dirs, but in most cases, there was
371
* no file, or very few files in user_data_dir, so checking it first will
372
* result in many unnecessary open() system calls, yealding bad performance.
373
* Since the spec really sucks, we don't follow it here.
375
desc = _mime_type_get_desc( type, g_get_user_data_dir(), locale );
379
void mime_type_finalize()
382
if( G_LIKELY( table ) )
384
g_hash_table_destroy( table );
388
mime_cache_free_all();
392
void test_parents(const char* type)
395
const char** parents = NULL;
398
for( i = 0; i < n_caches; ++i )
400
parents = mime_cache_lookup_parents( caches[i], type );
405
for( p = parents; *p; ++p )
407
g_debug( "%s is parent of %s", *p, type );
410
g_debug( "no parent found" );
413
void test_alias( const char* type )
416
const char* alias = NULL;
417
for( i = 0; i < n_caches; ++i )
419
alias = mime_cache_lookup_alias( caches[i], type );
423
g_debug("test:\nalias of %s is %s", type, alias );
428
void mime_type_init()
430
mime_cache_load_all();
431
// table = g_hash_table_new_full( g_str_hash, g_str_equal, g_free, (GDestroyNotify)mime_type_unref );
434
/* load all mime.cache files on the system,
435
* including /usr/share/mime/mime.cache,
436
* /usr/local/share/mime/mime.cache,
437
* and $HOME/.local/share/mime/mime.cache. */
438
void mime_cache_load_all()
440
const char* const * dirs;
442
const char filename[] = "/mime/mime.cache";
445
dirs = g_get_system_data_dirs();
446
n_caches = g_strv_length( (char**)dirs ) + 1;
447
caches = (MimeCache**)g_slice_alloc( n_caches * sizeof(MimeCache*) );
449
path = g_build_filename( g_get_user_data_dir(), filename, NULL );
450
caches[0] = mime_cache_new( path );
452
if( caches[0]->magic_max_extent > mime_cache_max_extent )
453
mime_cache_max_extent = caches[0]->magic_max_extent;
455
for( i = 1; i < n_caches; ++i )
457
path = g_build_filename( dirs[i - 1], filename, NULL );
458
caches[ i ] = mime_cache_new( path );
460
if( caches[i]->magic_max_extent > mime_cache_max_extent )
461
mime_cache_max_extent = caches[i]->magic_max_extent;
463
mime_magic_buf = g_malloc( mime_cache_max_extent );
467
/* free all mime.cache files on the system */
468
void mime_cache_free_all()
470
mime_cache_foreach( (GFunc)mime_cache_free, NULL );
471
g_slice_free1( n_caches * sizeof(MimeCache*), caches );
474
mime_cache_max_extent = 0;
476
g_free( mime_magic_buf );
477
mime_magic_buf = NULL;
480
/* Iterate through all mime caches */
481
void mime_cache_foreach( GFunc func, gpointer user_data )
484
for( i = 0; i < n_caches; ++i )
485
func( caches[i], user_data );
488
gboolean mime_cache_reload( MimeCache* cache )
491
gboolean ret = mime_cache_load( cache, cache->file_path );
492
/* recalculate max magic extent */
493
for( i = 0; i < n_caches; ++i )
495
if( caches[i]->magic_max_extent > mime_cache_max_extent )
496
mime_cache_max_extent = caches[i]->magic_max_extent;
499
G_LOCK( mime_magic_buf );
501
mime_magic_buf = g_realloc( mime_magic_buf, mime_cache_max_extent );
503
G_UNLOCK( mime_magic_buf );
508
gboolean mime_type_is_data_plain_text( const char* data, int len )
511
if ( G_LIKELY( len >= 0 && data ) )
513
for ( i = 0; i < len; ++i )
515
if ( data[ i ] == '\0' )
523
gboolean mime_type_is_text_file( const char *file_path, const char* mime_type )
527
gboolean ret = FALSE;
531
if( mime_type_is_subclass( mime_type, XDG_MIME_TYPE_PLAIN_TEXT ) )
533
if( ! g_str_has_prefix( mime_type, "text/" ) && ! g_str_has_prefix( mime_type, "application/" ) )
540
file = open ( file_path, O_RDONLY );
544
if( fstat( file, &statbuf ) != -1 )
546
if( S_ISREG( statbuf.st_mode ) )
550
rlen = statbuf.st_size < TEXT_MAX_EXTENT ? statbuf.st_size : TEXT_MAX_EXTENT;
551
data = (char*) mmap( NULL, rlen, PROT_READ, MAP_SHARED, file, 0 );
552
ret = mime_type_is_data_plain_text( data, rlen );
553
munmap ( (char*)data, rlen );
555
unsigned char data[ TEXT_MAX_EXTENT ];
556
rlen = read ( file, data, sizeof( data ) );
557
ret = mime_type_is_data_plain_text( (char*) data, rlen );
566
gboolean mime_type_is_executable_file( const char *file_path, const char* mime_type )
570
mime_type = mime_type_get_by_file( file_path, NULL, NULL );
574
* Only executable types can be executale.
575
* Since some common types, such as application/x-shellscript,
576
* are not in mime database, we have to add them ourselves.
578
if ( mime_type != XDG_MIME_TYPE_UNKNOWN &&
579
(mime_type_is_subclass( mime_type, XDG_MIME_TYPE_EXECUTABLE ) ||
580
mime_type_is_subclass( mime_type, "application/x-shellscript" ) ) )
584
if ( ! g_file_test( file_path, G_FILE_TEST_IS_EXECUTABLE ) )
592
/* Check if the specified mime_type is the subclass of the specified parent type */
593
gboolean mime_type_is_subclass( const char* type, const char* parent )
596
const char** parents = NULL;
599
/* special case, the type specified is identical to the parent type. */
600
if( G_UNLIKELY( 0 == strcmp(type, parent) ) )
603
for( i = 0; i < n_caches; ++i )
605
parents = mime_cache_lookup_parents( caches[i], type );
608
for( p = parents; *p; ++p )
610
if( 0 == strcmp( parent, *p ) )
619
* Get all parent type of this mime_type
620
* The returned string array should be freed with g_strfreev().
622
char** mime_type_get_parents( const char* type )
625
const char** parents = NULL;
627
GArray* ret = g_array_sized_new( TRUE, TRUE, sizeof(char*), 5 );
629
for( i = 0; i < n_caches; ++i )
631
parents = mime_cache_lookup_parents( caches[i], type );
634
for( p = parents; *p; ++p )
636
char* parent = g_strdup( *p );
637
g_array_append_val( ret, parent );
641
return (char**)g_array_free( ret, (0 == ret->len) );
645
* Get all alias types of this mime_type
646
* The returned string array should be freed with g_strfreev().
648
char** mime_type_get_alias( const char* type )
651
const char** alias = NULL;
653
GArray* ret = g_array_sized_new( TRUE, TRUE, sizeof(char*), 5 );
655
for( i = 0; i < n_caches; ++i )
657
alias = (const char **) mime_cache_lookup_alias( caches[i], type );
660
for( p = alias; *p; ++p )
662
char* type = g_strdup( *p );
663
g_array_append_val( ret, type );
667
return (char**)g_array_free( ret, (0 == ret->len) );
673
MimeCache** mime_type_get_caches( int* n )