1
.\" Copyright (c) 2003-2005 Tim Kientzle
2
.\" All rights reserved.
4
.\" Redistribution and use in source and binary forms, with or without
5
.\" modification, are permitted provided that the following conditions
7
.\" 1. Redistributions of source code must retain the above copyright
8
.\" notice, this list of conditions and the following disclaimer.
9
.\" 2. Redistributions in binary form must reproduce the above copyright
10
.\" notice, this list of conditions and the following disclaimer in the
11
.\" documentation and/or other materials provided with the distribution.
13
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25
.\" $FreeBSD: src/lib/libarchive/archive_read.3,v 1.22 2005/09/20 17:48:57 kientzle Exp $
31
.Nm archive_read_new ,
32
.Nm archive_read_set_bytes_per_block ,
33
.Nm archive_read_support_compression_all ,
34
.Nm archive_read_support_compression_bzip2 ,
35
.Nm archive_read_support_compression_compress ,
36
.Nm archive_read_support_compression_gzip ,
37
.Nm archive_read_support_compression_none ,
38
.Nm archive_read_support_format_all ,
39
.Nm archive_read_support_format_cpio ,
40
.Nm archive_read_support_format_iso9660 ,
41
.Nm archive_read_support_format_tar ,
42
.Nm archive_read_support_format_zip ,
43
.Nm archive_read_open ,
44
.Nm archive_read_open_fd ,
45
.Nm archive_read_open_file ,
46
.Nm archive_read_next_header ,
47
.Nm archive_read_data ,
48
.Nm archive_read_data_block ,
49
.Nm archive_read_data_skip ,
50
.Nm archive_read_data_into_buffer ,
51
.Nm archive_read_data_into_fd ,
52
.Nm archive_read_extract ,
53
.Nm archive_read_extract_set_progress_callback ,
54
.Nm archive_read_close ,
55
.Nm archive_read_finish
56
.Nd functions for reading streaming archives
60
.Fn archive_read_new "void"
62
.Fn archive_read_set_bytes_per_block "struct archive *" "int"
64
.Fn archive_read_support_compression_all "struct archive *"
66
.Fn archive_read_support_compression_bzip2 "struct archive *"
68
.Fn archive_read_support_compression_compress "struct archive *"
70
.Fn archive_read_support_compression_gzip "struct archive *"
72
.Fn archive_read_support_compression_none "struct archive *"
74
.Fn archive_read_support_format_all "struct archive *"
76
.Fn archive_read_support_format_cpio "struct archive *"
78
.Fn archive_read_support_format_iso9660 "struct archive *"
80
.Fn archive_read_support_format_tar "struct archive *"
82
.Fn archive_read_support_format_zip "struct archive *"
84
.Fn archive_read_open "struct archive *" "void *client_data" "archive_open_archive_callback *" "archive_read_archive_callback *" "archive_close_archive_callback *"
86
.Fn archive_read_open_fd "struct archive *" "int fd" "size_t block_size"
88
.Fn archive_read_open_file "struct archive *" "const char *filename" "size_t block_size"
90
.Fn archive_read_next_header "struct archive *" "struct archive_entry **"
92
.Fn archive_read_data "struct archive *" "void *buff" "size_t len"
94
.Fn archive_read_data_block "struct archive *" "const void **buff" "size_t *len" "off_t *offset"
96
.Fn archive_read_data_skip "struct archive *"
98
.Fn archive_read_data_into_buffer "struct archive *" "void *" "size_t len"
100
.Fn archive_read_data_into_fd "struct archive *" "int fd"
102
.Fn archive_read_extract "struct archive *" "struct archive_entry *" "int flags"
104
.Fn archive_read_extract_set_progress_callback "struct archive *" "void (*func)(void *)" "void *user_data"
106
.Fn archive_read_close "struct archive *"
108
.Fn archive_read_finish "struct archive *"
110
These functions provide a complete API for reading streaming archives.
111
The general process is to first create the
113
object, set options, initialize the reader, iterate over the archive
114
headers and associated data, then close the archive and release all
116
The following summary describes the functions in approximately the
117
order they would be used:
118
.Bl -tag -compact -width indent
119
.It Fn archive_read_new
120
Allocates and initializes a
122
object suitable for reading from an archive.
123
.It Fn archive_read_set_bytes_per_block
124
Sets the block size used for reading the archive data.
125
This controls the size that will be used when invoking the read
127
The default is 20 records or 10240 bytes for tar formats.
128
.It Fn archive_read_support_compression_all , Fn archive_read_support_compression_bzip2 , Fn archive_read_support_compression_compress , Fn archive_read_support_compression_gzip , Fn archive_read_support_compression_none
129
Enables auto-detection code and decompression support for the
130
specified compression.
133
is always enabled by default.
135
.Fn archive_read_support_compression_all
136
enables all available decompression code.
137
.It Fn archive_read_support_format_all , Fn archive_read_support_format_cpio , Fn archive_read_support_format_iso9660 , Fn archive_read_support_format_tar, Fn archive_read_support_format_zip
138
Enables support---including auto-detection code---for the
139
specified archive format.
141
.Fn archive_read_support_format_tar
142
enables support for a variety of standard tar formats, old-style tar,
143
ustar, pax interchange format, and many common variants.
145
.Fn archive_read_support_format_all
146
enables support for all available formats.
147
Note that there is no default.
148
.It Fn archive_read_open
149
Freeze the settings, open the archive, and prepare for reading entries.
150
This is the most generic version of this call, which accepts
151
three callback functions.
152
Most clients will want to use
153
.Fn archive_read_open_file
155
.Fn archive_read_open_fd
157
The library invokes the client-provided functions to obtain
158
raw bytes from the archive.
159
Note: The API permits a decompression method to fork and invoke the
160
callbacks from another process.
161
Although none of the current decompression methods use this technique,
162
future decompression methods may utilize this technique.
163
If the decompressor forks, it will ensure that the open and close
164
callbacks are invoked within the same process as the read callback.
165
In particular, clients should not attempt to use shared variables to
166
communicate between the open/read/close callbacks and the mainline code.
167
.It Fn archive_read_open_fd
169
.Fn archive_read_open ,
170
except that it accepts a file descriptor and block size rather than
171
a trio of function pointers.
172
Note that the file descriptor will not be automatically closed at
174
.It Fn archive_read_open_file
176
.Fn archive_read_open ,
177
except that it accepts a simple filename and a block size.
178
A NULL filename represents standard input.
179
.It Fn archive_read_next_header
180
Read the header for the next entry and return a pointer to
182
.Tn struct archive_entry .
183
.It Fn archive_read_data
184
Read data associated with the header just read.
185
Internally, this is a convenience function that calls
186
.Fn archive_read_data_block
187
and fills any gaps with nulls so that callers see a single
188
continuous stream of data.
189
.It Fn archive_read_data_block
190
Return the next available block of data for this entry.
192
.Fn archive_read_data ,
194
.Fn archive_read_data_block
195
function avoids copying data and allows you to correctly handle
196
sparse files, as supported by some archive formats.
197
The library guarantees that offsets will increase and that blocks
199
Note that the blocks returned from this function can be much larger
200
than the block size read from disk, due to compression
201
and internal buffer optimizations.
202
.It Fn archive_read_data_skip
203
A convenience function that repeatedly calls
204
.Fn archive_read_data_block
205
to skip all of the data for this archive entry.
206
.It Fn archive_read_data_into_buffer
207
A convenience function that repeatedly calls
208
.Fn archive_read_data_block
209
to copy the entire entry into the client-supplied buffer.
210
Note that the client is responsible for sizing the buffer appropriately.
211
.It Fn archive_read_data_into_fd
212
A convenience function that repeatedly calls
213
.Fn archive_read_data_block
214
to copy the entire entry to the provided file descriptor.
215
.It Fn archive_read_extract
216
A convenience function that recreates the specified object on
217
disk and reads the entry data into that object.
218
The filename, permissions, and other critical information
219
are taken from the provided
224
argument modifies how the object is recreated.
225
It consists of a bitwise OR of one or more of the following values:
226
.Bl -tag -compact -width "indent"
227
.It Cm ARCHIVE_EXTRACT_OWNER
228
The user and group IDs should be set on the restored file.
229
By default, the user and group IDs are not restored.
230
.It Cm ARCHIVE_EXTRACT_PERM
231
The permissions (mode bits) should be restored for all objects.
232
By default, permissions are only restored for regular files.
233
.It Cm ARCHIVE_EXTRACT_TIME
234
The timestamps (mtime, ctime, and atime) should be restored.
235
By default, they are ignored.
236
Note that restoring of atime is not currently supported.
237
.It Cm ARCHIVE_EXTRACT_NO_OVERWRITE
238
Existing files on disk will not be overwritten.
239
By default, existing regular files are truncated and overwritten;
240
existing directories will have their permissions updated;
241
other pre-existing objects are unlinked and recreated from scratch.
242
.It Cm ARCHIVE_EXTRACT_UNLINK
243
Existing files on disk will be unlinked and recreated from scratch.
244
By default, existing files are truncated and rewritten, but
245
the file is not recreated.
246
In particular, the default behavior does not break existing hard links.
247
.It Cm ARCHIVE_EXTRACT_ACL
248
Attempt to restore ACLs.
249
By default, extended ACLs are ignored.
250
.It Cm ARCHIVE_EXTRACT_FFLAGS
251
Attempt to restore extended file flags.
252
By default, file flags are ignored.
254
Note that not all attributes are set immediately;
255
some attributes are cached in memory and written to disk only
256
when the archive is closed.
257
(For example, read-only directories are initially created
258
writable so that files within those directories can be
260
The final permissions are set when the archive is closed.)
261
.It Fn archive_read_extract_set_progress_callback
262
Sets a pointer to a user-defined callback that can be used
263
for updating progress displays during extraction.
264
The progress function will be invoked during the extraction of large
266
The progress function will be invoked with the pointer provided to this call.
267
Generally, the data pointed to should include a reference to the archive
268
object and the archive_entry object so that various statistics
269
can be retrieved for the progress display.
270
.It Fn archive_read_close
271
Complete the archive and invoke the close callback.
272
.It Fn archive_read_finish
274
.Fn archive_read_close
275
if it was not invoked manually, then release all resources.
278
Note that the library determines most of the relevant information about
279
the archive by inspection.
280
In particular, it automatically detects
284
compression and transparently performs the appropriate decompression.
285
It also automatically detects the archive format.
287
A complete description of the
290
.Tn struct archive_entry
291
objects can be found in the overview manual page for
294
The callback functions must match the following prototypes:
295
.Bl -item -offset indent
298
.Fn archive_read_callback "struct archive *" "void *client_data" "const void **buffer"
301
.Fn archive_open_callback "struct archive *" "void *client_data"
304
.Fn archive_close_callback "struct archive *" "void *client_data"
307
The open callback is invoked by
311
if the underlying file or data source is successfully
313
If the open fails, it should call
314
.Fn archive_set_error
315
to register an error code and message and return
318
The read callback is invoked whenever the library
319
requires raw bytes from the archive.
320
The read callback should read data into a buffer,
322
.Li const void **buffer
323
argument to point to the available data, and
324
return a count of the number of bytes available.
325
The library will invoke the read callback again
326
only after it has consumed this data.
327
The library imposes no constraints on the size
328
of the data blocks returned.
329
On end-of-file, the read callback should
331
On error, the read callback should invoke
332
.Fn archive_set_error
333
to register an error code and message and
336
The close callback is invoked by archive_close when
337
the archive processing is complete.
338
The callback should return
341
On failure, the callback should invoke
342
.Fn archive_set_error
343
to register an error code and message and
347
The following illustrates basic usage of the library.
349
the callback functions are simply wrappers around the standard
355
.Bd -literal -offset indent
357
list_archive(const char *name)
359
struct mydata *mydata;
361
struct archive_entry *entry;
363
mydata = malloc(sizeof(struct mydata));
364
a = archive_read_new();
366
archive_read_support_compression_all(a);
367
archive_read_support_format_all(a);
368
archive_read_open(a, mydata, myopen, myread, myclose);
369
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
370
printf("%s\\n",archive_entry_pathname(entry));
371
archive_read_data_skip(a);
373
archive_read_finish(a);
378
myread(struct archive *a, void *client_data, const void **buff)
380
struct mydata *mydata = client_data;
382
*buff = mydata->buff;
383
return (read(mydata->fd, mydata->buff, 10240));
387
myopen(struct archive *a, void *client_data)
389
struct mydata *mydata = client_data;
391
mydata->fd = open(mydata->name, O_RDONLY);
392
return (mydata->fd >= 0 ? ARCHIVE_OK : ARCHIVE_FATAL);
396
myclose(struct archive *a, void *client_data)
398
struct mydata *mydata = client_data;
406
Most functions return zero on success, non-zero on error.
407
The possible return codes include:
409
(the operation succeeded),
411
(the operation succeeded but a non-critical error was encountered),
413
(end-of-archive was encountered),
415
(the operation failed but can be retried),
418
(there was a fatal error; the archive should be closed immediately).
419
Detailed error codes and textual descriptions are available from the
422
.Fn archive_error_string
426
returns a pointer to a freshly allocated
433
.Fn archive_read_data
434
returns a count of bytes actually read or zero at the end of the entry.
440
is returned and an error code and textual description can be retrieved from the
443
.Fn archive_error_string
446
The library expects the client callbacks to behave similarly.
447
If there is an error, you can use
448
.Fn archive_set_error
449
to set an appropriate error code and description,
450
then return one of the non-zero values above.
451
(Note that the value eventually returned to the client may
452
not be the same; many errors that are not critical at the level
453
of basic I/O can prevent the archive from being properly read,
454
thus most I/O errors eventually cause
466
library first appeared in
472
library was written by
473
.An Tim Kientzle Aq kientzle@acm.org .
475
Directories are actually extracted in two distinct phases.
476
Directories are created during
477
.Fn archive_read_extract ,
478
but final permissions are not set until
479
.Fn archive_read_close .
480
This separation is necessary to correctly handle borderline
481
cases such as a non-writable directory containing
482
files, but can cause unexpected results.
483
In particular, directory permissions are not fully
484
restored until the archive is closed.
487
to change the current directory between calls to
488
.Fn archive_read_extract
490
.Fn archive_read_close ,
491
you may confuse the permission-setting logic with
492
the result that directory permissions are restored