2
* compress.c - Compressed attribute handling code. Part of the Linux-NTFS
5
* Copyright (c) 2004-2005 Anton Altaparmakov
6
* Copyright (c) 2004-2006 Szabolcs Szakacsits
7
* Copyright (c) 2005 Yura Pakhuchiy
9
* This program/include file is free software; you can redistribute it and/or
10
* modify it under the terms of the GNU General Public License as published
11
* by the Free Software Foundation; either version 2 of the License, or
12
* (at your option) any later version.
14
* This program/include file is distributed in the hope that it will be
15
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
16
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
* GNU General Public License for more details.
19
* You should have received a copy of the GNU General Public License
20
* along with this program (in the main directory of the Linux-NTFS
21
* distribution in the file COPYING); if not, write to the Free Software
22
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
53
* enum ntfs_compression_constants - constants used in the compression code
56
/* Token types and access mask. */
57
NTFS_SYMBOL_TOKEN = 0,
58
NTFS_PHRASE_TOKEN = 1,
61
/* Compression sub-block constants. */
62
NTFS_SB_SIZE_MASK = 0x0fff,
63
NTFS_SB_SIZE = 0x1000,
64
NTFS_SB_IS_COMPRESSED = 0x8000,
65
} ntfs_compression_constants;
68
* ntfs_decompress - decompress a compression block into an array of pages
69
* @dest: buffer to which to write the decompressed data
70
* @dest_size: size of buffer @dest in bytes
71
* @cb_start: compression block to decompress
72
* @cb_size: size of compression block @cb_start in bytes
74
* This decompresses the compression block @cb_start into the destination
77
* @cb_start is a pointer to the compression block which needs decompressing
78
* and @cb_size is the size of @cb_start in bytes (8-64kiB).
80
* Return 0 if success or -EOVERFLOW on error in the compressed stream.
82
static int ntfs_decompress(u8 *dest, const u32 dest_size,
83
u8 *const cb_start, const u32 cb_size)
86
* Pointers into the compressed data, i.e. the compression block (cb),
87
* and the therein contained sub-blocks (sb).
89
u8 *cb_end = cb_start + cb_size; /* End of cb. */
90
u8 *cb = cb_start; /* Current position in cb. */
91
u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */
92
u8 *cb_sb_end; /* End of current sb / beginning of next sb. */
93
/* Variables for uncompressed data / destination. */
94
u8 *dest_end = dest + dest_size; /* End of dest buffer. */
95
u8 *dest_sb_start; /* Start of current sub-block in dest. */
96
u8 *dest_sb_end; /* End of current sb in dest. */
97
/* Variables for tag and token parsing. */
98
u8 tag; /* Current tag. */
99
int token; /* Loop counter for the eight tokens in tag. */
101
ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size);
103
ntfs_log_debug("Beginning sub-block at offset = 0x%x in the cb.\n",
106
* Have we reached the end of the compression block or the end of the
107
* decompressed data? The latter can happen for example if the current
108
* position in the compression block is one byte before its end so the
109
* first two checks do not detect it.
111
if (cb == cb_end || !le16_to_cpup((u16*)cb) || dest == dest_end) {
112
ntfs_log_debug("Completed. Returning success (0).\n");
115
/* Setup offset for the current sub-block destination. */
116
dest_sb_start = dest;
117
dest_sb_end = dest + NTFS_SB_SIZE;
118
/* Check that we are still within allowed boundaries. */
119
if (dest_sb_end > dest_end)
120
goto return_overflow;
121
/* Does the minimum size of a compressed sb overflow valid range? */
123
goto return_overflow;
124
/* Setup the current sub-block source pointers and validate range. */
126
cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK)
128
if (cb_sb_end > cb_end)
129
goto return_overflow;
130
/* Now, we are ready to process the current sub-block (sb). */
131
if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) {
132
ntfs_log_debug("Found uncompressed sub-block.\n");
133
/* This sb is not compressed, just copy it into destination. */
134
/* Advance source position to first data byte. */
136
/* An uncompressed sb must be full size. */
137
if (cb_sb_end - cb != NTFS_SB_SIZE)
138
goto return_overflow;
139
/* Copy the block and advance the source position. */
140
memcpy(dest, cb, NTFS_SB_SIZE);
142
/* Advance destination position to next sub-block. */
143
dest += NTFS_SB_SIZE;
146
ntfs_log_debug("Found compressed sub-block.\n");
147
/* This sb is compressed, decompress it into destination. */
148
/* Forward to the first tag in the sub-block. */
151
if (cb == cb_sb_end) {
152
/* Check if the decompressed sub-block was not full-length. */
153
if (dest < dest_sb_end) {
154
int nr_bytes = dest_sb_end - dest;
156
ntfs_log_debug("Filling incomplete sub-block with zeroes.\n");
157
/* Zero remainder and update destination position. */
158
memset(dest, 0, nr_bytes);
161
/* We have finished the current sub-block. */
164
/* Check we are still in range. */
165
if (cb > cb_sb_end || dest > dest_sb_end)
166
goto return_overflow;
167
/* Get the next tag and advance to first token. */
169
/* Parse the eight tokens described by the tag. */
170
for (token = 0; token < 8; token++, tag >>= 1) {
171
u16 lg, pt, length, max_non_overlap;
175
/* Check if we are done / still in range. */
176
if (cb >= cb_sb_end || dest > dest_sb_end)
178
/* Determine token type and parse appropriately.*/
179
if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
181
* We have a symbol token, copy the symbol across, and
182
* advance the source and destination positions.
185
/* Continue with the next token. */
189
* We have a phrase token. Make sure it is not the first tag in
190
* the sb as this is illegal and would confuse the code below.
192
if (dest == dest_sb_start)
193
goto return_overflow;
195
* Determine the number of bytes to go back (p) and the number
196
* of bytes to copy (l). We use an optimized algorithm in which
197
* we first calculate log2(current destination position in sb),
198
* which allows determination of l and p in O(1) rather than
199
* O(n). We just need an arch-optimized log2() function now.
202
for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1)
204
/* Get the phrase token into i. */
205
pt = le16_to_cpup((u16*)cb);
207
* Calculate starting position of the byte sequence in
208
* the destination using the fact that p = (pt >> (12 - lg)) + 1
209
* and make sure we don't go too far back.
211
dest_back_addr = dest - (pt >> (12 - lg)) - 1;
212
if (dest_back_addr < dest_sb_start)
213
goto return_overflow;
214
/* Now calculate the length of the byte sequence. */
215
length = (pt & (0xfff >> lg)) + 3;
216
/* Verify destination is in range. */
217
if (dest + length > dest_sb_end)
218
goto return_overflow;
219
/* The number of non-overlapping bytes. */
220
max_non_overlap = dest - dest_back_addr;
221
if (length <= max_non_overlap) {
222
/* The byte sequence doesn't overlap, just copy it. */
223
memcpy(dest, dest_back_addr, length);
224
/* Advance destination pointer. */
228
* The byte sequence does overlap, copy non-overlapping
229
* part and then do a slow byte by byte copy for the
230
* overlapping part. Also, advance the destination
233
memcpy(dest, dest_back_addr, max_non_overlap);
234
dest += max_non_overlap;
235
dest_back_addr += max_non_overlap;
236
length -= max_non_overlap;
238
*dest++ = *dest_back_addr++;
240
/* Advance source position and continue with the next token. */
243
/* No tokens left in the current tag. Continue with the next tag. */
247
ntfs_log_perror("Failed to decompress file");
252
* ntfs_is_cb_compressed - internal function, do not use
254
* This is a very specialised function determining if a cb is compressed or
255
* uncompressed. It is assumed that checking for a sparse cb has already been
256
* performed and that the cb is not sparse. It makes all sorts of other
257
* assumptions as well and hence it is not useful anywhere other than where it
258
* is used at the moment. Please, do not make this function available for use
259
* outside of compress.c as it is bound to confuse people and not do what they
262
* Return TRUE on errors so that the error will be detected later on in the
263
* code. Might be a bit confusing to debug but there really should never be
264
* errors coming from here.
266
static BOOL ntfs_is_cb_compressed(ntfs_attr *na, runlist_element *rl,
267
VCN cb_start_vcn, int cb_clusters)
270
* The simplest case: the run starting at @cb_start_vcn contains
271
* @cb_clusters clusters which are all not sparse, thus the cb is not
275
cb_clusters -= rl->length - (cb_start_vcn - rl->vcn);
276
while (cb_clusters > 0) {
277
/* Go to the next run. */
279
/* Map the next runlist fragment if it is not mapped. */
280
if (rl->lcn < LCN_HOLE || !rl->length) {
281
cb_start_vcn = rl->vcn;
282
rl = ntfs_attr_find_vcn(na, rl->vcn);
283
if (!rl || rl->lcn < LCN_HOLE || !rl->length)
286
* If the runs were merged need to deal with the
287
* resulting partial run so simply restart.
289
if (rl->vcn < cb_start_vcn)
292
/* If the current run is sparse, the cb is compressed. */
293
if (rl->lcn == LCN_HOLE)
295
/* If the whole cb is not sparse, it is not compressed. */
296
if (rl->length >= cb_clusters)
298
cb_clusters -= rl->length;
300
/* All cb_clusters were not sparse thus the cb is not compressed. */
305
* ntfs_compressed_attr_pread - read from a compressed attribute
306
* @na: ntfs attribute to read from
307
* @pos: byte position in the attribute to begin reading from
308
* @count: number of bytes to read
309
* @b: output data buffer
311
* NOTE: You probably want to be using attrib.c::ntfs_attr_pread() instead.
313
* This function will read @count bytes starting at offset @pos from the
314
* compressed ntfs attribute @na into the data buffer @b.
316
* On success, return the number of successfully read bytes. If this number
317
* is lower than @count this means that the read reached end of file or that
318
* an error was encountered during the read so that the read is partial.
319
* 0 means end of file or nothing was read (also return 0 when @count is 0).
321
* On error and nothing has been read, return -1 with errno set appropriately
322
* to the return code of ntfs_pread(), or to EINVAL in case of invalid
325
s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b)
327
s64 br, to_read, ofs, total, total2;
329
VCN start_vcn, vcn, end_vcn;
332
u8 *dest, *cb, *cb_pos, *cb_end;
335
unsigned int nr_cbs, cb_clusters;
337
ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n",
338
(unsigned long long)na->ni->mft_no, na->type,
339
(long long)pos, (long long)count);
340
if (!na || !NAttrCompressed(na) || !na->ni || !na->ni->vol || !b ||
341
pos < 0 || count < 0) {
346
* Encrypted attributes are not supported. We return access denied,
347
* which is what Windows NT4 does, too.
349
if (NAttrEncrypted(na)) {
355
/* Truncate reads beyond end of attribute. */
356
if (pos + count > na->data_size) {
357
if (pos >= na->data_size) {
360
count = na->data_size - pos;
362
/* If it is a resident attribute, simply use ntfs_attr_pread(). */
363
if (!NAttrNonResident(na))
364
return ntfs_attr_pread(na, pos, count, b);
366
/* Zero out reads beyond initialized size. */
367
if (pos + count > na->initialized_size) {
368
if (pos >= na->initialized_size) {
372
total2 = pos + count - na->initialized_size;
374
memset((u8*)b + count, 0, total2);
377
cb_size = na->compression_block_size;
378
cb_size_mask = cb_size - 1UL;
379
cb_clusters = na->compression_block_clusters;
381
/* Need a temporary buffer for each loaded compression block. */
382
cb = ntfs_malloc(cb_size);
386
/* Need a temporary buffer for each uncompressed block. */
387
dest = ntfs_malloc(cb_size);
393
* The first vcn in the first compression block (cb) which we need to
396
start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits;
397
/* Offset in the uncompressed cb at which to start reading data. */
398
ofs = pos & cb_size_mask;
400
* The first vcn in the cb after the last cb which we need to
403
end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >>
404
vol->cluster_size_bits;
405
/* Number of compression blocks (cbs) in the wanted vcn range. */
406
nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >>
407
na->compression_block_size_bits;
408
cb_end = cb + cb_size;
413
start_vcn += cb_clusters;
415
/* Check whether the compression block is sparse. */
416
rl = ntfs_attr_find_vcn(na, vcn);
417
if (!rl || rl->lcn < LCN_HOLE) {
422
/* FIXME: Do we want EIO or the error code? (AIA) */
426
if (rl->lcn == LCN_HOLE) {
427
/* Sparse cb, zero out destination range overlapping the cb. */
428
ntfs_log_debug("Found sparse compression block.\n");
429
to_read = min(count, cb_size - ofs);
430
memset(b, 0, to_read);
434
b = (u8*)b + to_read;
435
} else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) {
436
s64 tdata_size, tinitialized_size;
438
* Uncompressed cb, read it straight into the destination range
439
* overlapping the cb.
441
ntfs_log_debug("Found uncompressed compression block.\n");
443
* Read the uncompressed data into the destination buffer.
444
* NOTE: We cheat a little bit here by marking the attribute as
445
* not compressed in the ntfs_attr structure so that we can
446
* read the data by simply using ntfs_attr_pread(). (-8
447
* NOTE: we have to modify data_size and initialized_size
448
* temporarily as well...
450
to_read = min(count, cb_size - ofs);
451
ofs += vcn << vol->cluster_size_bits;
452
NAttrClearCompressed(na);
453
tdata_size = na->data_size;
454
tinitialized_size = na->initialized_size;
455
na->data_size = na->initialized_size = na->allocated_size;
457
br = ntfs_attr_pread(na, ofs, to_read, b);
460
na->data_size = tdata_size;
461
na->initialized_size = tinitialized_size;
462
NAttrSetCompressed(na);
475
} while (to_read > 0);
476
na->data_size = tdata_size;
477
na->initialized_size = tinitialized_size;
478
NAttrSetCompressed(na);
481
s64 tdata_size, tinitialized_size;
484
* Compressed cb, decompress it into the temporary buffer, then
485
* copy the data to the destination range overlapping the cb.
487
ntfs_log_debug("Found compressed compression block.\n");
489
* Read the compressed data into the temporary buffer.
490
* NOTE: We cheat a little bit here by marking the attribute as
491
* not compressed in the ntfs_attr structure so that we can
492
* read the raw, compressed data by simply using
493
* ntfs_attr_pread(). (-8
494
* NOTE: We have to modify data_size and initialized_size
495
* temporarily as well...
498
NAttrClearCompressed(na);
499
tdata_size = na->data_size;
500
tinitialized_size = na->initialized_size;
501
na->data_size = na->initialized_size = na->allocated_size;
503
br = ntfs_attr_pread(na,
504
(vcn << vol->cluster_size_bits) +
505
(cb_pos - cb), to_read, cb_pos);
508
na->data_size = tdata_size;
509
na->initialized_size = tinitialized_size;
510
NAttrSetCompressed(na);
520
} while (to_read > 0);
521
na->data_size = tdata_size;
522
na->initialized_size = tinitialized_size;
523
NAttrSetCompressed(na);
524
/* Just a precaution. */
525
if (cb_pos + 2 <= cb_end)
527
ntfs_log_debug("Successfully read the compression block.\n");
528
if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) {
537
to_read = min(count, cb_size - ofs);
538
memcpy(b, dest + ofs, to_read);
541
b = (u8*)b + to_read;
544
/* Do we have more work to do? */
547
/* We no longer need the buffers. */
550
/* Return number of bytes read. */
551
return total + total2;