2
This file is part of libextractor.
3
(C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff
5
libextractor is free software; you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published
7
by the Free Software Foundation; either version 2, or (at your
8
option) any later version.
10
libextractor is distributed in the hope that it will be useful, but
11
WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with libextractor; see the file COPYING. If not, write to the
17
Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18
Boston, MA 02111-1307, USA.
22
#include "extractor.h"
27
* The .deb is an ar-chive file. It contains a tar.gz file
28
* named "control.tar.gz" which then contains a file 'control'
29
* that has the meta-data. And which variant of the various
30
* ar file formats is used is also not quite certain. Yuck.
33
* http://www.mkssoftware.com/docs/man4/tar.4.asp
34
* http://lists.debian.org/debian-policy/2003/12/msg00000.html
35
* http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html
38
static EXTRACTOR_KeywordList * addKeyword(EXTRACTOR_KeywordType type,
40
EXTRACTOR_KeywordList * next) {
41
EXTRACTOR_KeywordList * result;
45
result = malloc(sizeof(EXTRACTOR_KeywordList));
47
result->keyword = keyword;
48
result->keywordType = type;
52
static char * stndup(const char * str,
65
EXTRACTOR_KeywordType type;
68
/* see also: "man 5 deb-control" */
69
static Matches tmap[] = {
70
{ "Package: ", EXTRACTOR_SOFTWARE },
71
{ "Version: ", EXTRACTOR_VERSIONNUMBER },
72
{ "Section: ", EXTRACTOR_GENRE },
73
{ "Priority: ", EXTRACTOR_PRIORITY },
74
{ "Architecture: ", EXTRACTOR_CREATED_FOR },
75
{ "Depends: ", EXTRACTOR_DEPENDENCY },
76
{ "Recommends: ", EXTRACTOR_RELATION },
77
{ "Suggests: ", EXTRACTOR_RELATION },
78
{ "Installed-Size: ", EXTRACTOR_SIZE },
79
{ "Maintainer: ", EXTRACTOR_PACKAGER },
80
{ "Description: ", EXTRACTOR_DESCRIPTION },
81
{ "Source: ", EXTRACTOR_SOURCE },
82
{ "Pre-Depends: ", EXTRACTOR_DEPENDENCY },
83
{ "Conflicts: ", EXTRACTOR_CONFLICTS },
84
{ "Replaces: ", EXTRACTOR_REPLACES },
85
{ "Provides: ", EXTRACTOR_PROVIDES },
87
{ "Essential: ", EXTRACTOR_UNKNOWN }
92
* Process the control file.
94
static struct EXTRACTOR_Keywords * processControl(const char * data,
96
struct EXTRACTOR_Keywords * prev) {
107
while (data[colon] != ':') {
108
if ( (colon > size) || (data[colon] == '\n') )
113
while ( (colon < size) &&
114
(isspace(data[colon]) ) )
117
while ( (eol < size) &&
118
(data[eol] != '\n') ||
120
(data[eol+1] == ' ') ) )
122
if ( (eol == colon) || (eol > size) )
124
key = stndup(&data[pos], colon-pos);
126
while (tmap[i].text != NULL) {
127
if (0 == strcmp(key, tmap[i].text)) {
130
val = stndup(&data[colon], eol-colon);
131
prev = addKeyword(tmap[i].type,
151
char lastModTime [12];
169
* Process the control.tar file.
171
static struct EXTRACTOR_Keywords *
172
processControlTar(const char * data,
174
struct EXTRACTOR_Keywords * prev) {
180
while (pos + sizeof(TarHeader) < size) {
181
unsigned long long fsize;
184
tar = (TarHeader*) &data[pos];
185
if (pos + sizeof(USTarHeader) < size) {
186
ustar = (USTarHeader*) &data[pos];
187
if (0 == strncmp("ustar",
190
pos += 512; /* sizeof(USTarHeader); */
192
pos += 257; /* sizeof(TarHeader); minus gcc alignment... */
194
pos += 257; /* sizeof(TarHeader); minus gcc alignment... */
197
memcpy(buf, &tar->filesize[0], 12);
199
if (1 != sscanf(buf, "%12llo", &fsize)) /* octal! Yuck yuck! */
201
if ( (pos + fsize > size) ||
203
(pos + fsize < pos) )
206
if (0 == strncmp(&tar->name[0],
208
strlen("./control"))) {
209
return processControl(&data[pos],
213
if ( (fsize & 511) != 0)
214
fsize = (fsize |= 511)+1; /* round up! */
215
if (pos + fsize < pos)
223
#define MAX_CONTROL_SIZE (1024 * 1024)
231
void * writeThread(void * arg) {
234
WTC * wtc = (WTC*) arg;
237
while (pos < wtc->size) {
243
return strerror(errno);
252
* Process the control.tar.gz file.
254
static struct EXTRACTOR_Keywords *
255
processControlTGZ(const unsigned char * data,
257
struct EXTRACTOR_Keywords * prev) {
268
bufSize = data[size-4] + 256 * data[size-3] + 65536 * data[size-2] + 256*65536 * data[size-1];
269
if (bufSize > MAX_CONTROL_SIZE)
277
if (0 != pthread_create(&pt, NULL, &writeThread, &wtc)) {
282
gzf = gzdopen(fdes[0], "rb");
286
pthread_join(pt, &error);
289
buf = malloc(bufSize);
293
pthread_join(pt, &error);
296
if (bufSize != gzread(gzf, buf, bufSize)) {
300
pthread_join(pt, &error);
304
pthread_join(pt, &error);
306
prev = processControlTar(buf,
315
char lastModTime [12];
323
struct EXTRACTOR_Keywords *
324
libextractor_deb_extract(const char * filename,
327
struct EXTRACTOR_Keywords * prev) {
333
if (0 != strncmp("!<arch>\n",
335
strlen("!<arch>\n")))
337
pos = strlen("!<arch>\n");
338
while (pos + sizeof(ObjectHeader) < size) {
340
unsigned long long fsize;
343
hdr = (ObjectHeader*) &data[pos];
344
if (0 != strncmp(&hdr->trailer[0],
349
memcpy(buf, &hdr->filesize[0], 10);
351
if (1 != sscanf(buf, "%10llu", &fsize))
353
pos += sizeof(ObjectHeader);
354
if ( (pos + fsize > size) ||
356
(pos + fsize < pos) )
358
if (0 == strncmp(&hdr->name[0],
360
strlen("control.tar.gz"))) {
361
prev = processControlTGZ(&data[pos],
366
if (0 == strncmp(&hdr->name[0],
368
strlen("debian-binary"))) {
369
prev = addKeyword(EXTRACTOR_MIMETYPE,
370
strdup("application/x-debian-package"),
376
break; /* no need to process the rest of the archive */
383
int main (int argc, char **argv) {
386
struct stat fstatbuf;
391
"Call with filename as argument\n");
394
file = OPEN(argv[1],O_RDONLY);
397
if (-1 == FSTAT(file, &fstatbuf)) {
401
size = fstatbuf.st_size;
402
buffer = mmap(NULL, size, PROT_READ, MAP_SHARED, file, 0);
405
EXTRACTOR_printKeywords(stdout,
406
libextractor_deb_extract(argv[1],
410
munmap(buffer, size);