1
/*****************************************************************************
3
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/**************************************************//**
21
Compressed page interface
23
Created June 2005 by Marko Makela
24
*******************************************************/
29
# include "page0zip.ic"
32
#include "page0page.h"
35
#include "dict0dict.h"
37
#include "page0types.h"
40
#ifndef UNIV_HOTBACKUP
43
# include "dict0boot.h"
44
# include "lock0lock.h"
45
#else /* !UNIV_HOTBACKUP */
46
# define lock_move_reorganize_page(block, temp_block) ((void) 0)
47
# define buf_LRU_stat_inc_unzip() ((void) 0)
48
#endif /* !UNIV_HOTBACKUP */
50
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
51
UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
53
/* Please refer to ../include/page0zip.ic for a description of the
54
compressed page format. */
56
/* The infimum and supremum records are omitted from the compressed page.
57
On compress, we compare that the records are there, and on uncompress we
58
restore the records. */
59
/** Extra bytes of an infimum record */
60
static const byte infimum_extra[] = {
61
0x01, /* info_bits=0, n_owned=1 */
62
0x00, 0x02 /* heap_no=0, status=2 */
63
/* ?, ? */ /* next=(first user rec, or supremum) */
65
/** Data bytes of an infimum record */
66
static const byte infimum_data[] = {
67
0x69, 0x6e, 0x66, 0x69,
68
0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
70
/** Extra bytes and data bytes of a supremum record */
71
static const byte supremum_extra_data[] = {
72
/* 0x0?, */ /* info_bits=0, n_owned=1..8 */
73
0x00, 0x0b, /* heap_no=1, status=3 */
74
0x00, 0x00, /* next=0 */
75
0x73, 0x75, 0x70, 0x72,
76
0x65, 0x6d, 0x75, 0x6d /* "supremum" */
79
/** Assert that a block of memory is filled with zero bytes.
80
Compare at most sizeof(field_ref_zero) bytes.
81
@param b in: memory block
82
@param s in: size of the memory block, in bytes */
83
#define ASSERT_ZERO(b, s) \
84
ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
85
/** Assert that a BLOB pointer is filled with zero bytes.
86
@param b in: BLOB pointer */
87
#define ASSERT_ZERO_BLOB(b) \
88
ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
90
/* Enable some extra debugging output. This code can be enabled
91
independently of any UNIV_ debugging conditions. */
92
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
94
__attribute__((format (printf, 1, 2)))
95
/**********************************************************************//**
96
Report a failure to decompress or compress.
97
@return number of characters printed */
102
const char* fmt, /*!< in: printf(3) format string */
103
...) /*!< in: arguments corresponding to fmt */
108
ut_print_timestamp(stderr);
109
fputs(" InnoDB: ", stderr);
111
res = vfprintf(stderr, fmt, ap);
116
/** Wrapper for page_zip_fail_func()
117
@param fmt_args in: printf(3) format string and arguments */
118
# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
119
#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
120
/** Dummy wrapper for page_zip_fail_func()
121
@param fmt_args ignored: printf(3) format string and arguments */
122
# define page_zip_fail(fmt_args) /* empty */
123
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
125
#ifndef UNIV_HOTBACKUP
126
/**********************************************************************//**
127
Determine the guaranteed free space on an empty page.
128
@return minimum payload size on the page */
133
ulint n_fields, /*!< in: number of columns in the index */
134
ulint zip_size) /*!< in: compressed page size in bytes */
137
/* subtract the page header and the longest
138
uncompressed data needed for one record */
140
+ PAGE_ZIP_DIR_SLOT_SIZE
141
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
142
+ 1/* encoded heap_no==2 in page_zip_write_rec() */
143
+ 1/* end of modification log */
144
- REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
145
/* subtract the space for page_zip_fields_encode() */
146
- compressBound(2 * (n_fields + 1));
147
return(size > 0 ? (ulint) size : 0);
149
#endif /* !UNIV_HOTBACKUP */
151
/*************************************************************//**
152
Gets the size of the compressed page trailer (the dense page directory),
153
including deleted records (the free list).
154
@return length of dense page directory, in bytes */
159
const page_zip_des_t* page_zip) /*!< in: compressed page */
161
/* Exclude the page infimum and supremum from the record count. */
162
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
163
* (page_dir_get_n_heap(page_zip->data)
164
- PAGE_HEAP_NO_USER_LOW);
168
/*************************************************************//**
169
Gets the size of the compressed page trailer (the dense page directory),
170
only including user records (excluding the free list).
171
@return length of dense page directory comprising existing records, in bytes */
174
page_zip_dir_user_size(
175
/*===================*/
176
const page_zip_des_t* page_zip) /*!< in: compressed page */
178
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
179
* page_get_n_recs(page_zip->data);
180
ut_ad(size <= page_zip_dir_size(page_zip));
184
/*************************************************************//**
185
Find the slot of the given record in the dense page directory.
186
@return dense directory slot, or NULL if record not found */
189
page_zip_dir_find_low(
190
/*==================*/
191
byte* slot, /*!< in: start of records */
192
byte* end, /*!< in: end of records */
193
ulint offset) /*!< in: offset of user record */
197
for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
198
if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
207
/*************************************************************//**
208
Find the slot of the given non-free record in the dense page directory.
209
@return dense directory slot, or NULL if record not found */
214
page_zip_des_t* page_zip, /*!< in: compressed page */
215
ulint offset) /*!< in: offset of user record */
217
byte* end = page_zip->data + page_zip_get_size(page_zip);
219
ut_ad(page_zip_simple_validate(page_zip));
221
return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
226
/*************************************************************//**
227
Find the slot of the given free record in the dense page directory.
228
@return dense directory slot, or NULL if record not found */
231
page_zip_dir_find_free(
232
/*===================*/
233
page_zip_des_t* page_zip, /*!< in: compressed page */
234
ulint offset) /*!< in: offset of user record */
236
byte* end = page_zip->data + page_zip_get_size(page_zip);
238
ut_ad(page_zip_simple_validate(page_zip));
240
return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
241
end - page_zip_dir_user_size(page_zip),
245
/*************************************************************//**
246
Read a given slot in the dense page directory.
247
@return record offset on the uncompressed page, possibly ORed with
248
PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */
253
const page_zip_des_t* page_zip, /*!< in: compressed page */
254
ulint slot) /*!< in: slot
255
(0=first user record) */
257
ut_ad(page_zip_simple_validate(page_zip));
258
ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
259
return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
260
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
263
#ifndef UNIV_HOTBACKUP
264
/**********************************************************************//**
265
Write a log record of compressing an index page. */
268
page_zip_compress_write_log(
269
/*========================*/
270
const page_zip_des_t* page_zip,/*!< in: compressed page */
271
const page_t* page, /*!< in: uncompressed page */
272
dict_index_t* index, /*!< in: index of the B-tree node */
273
mtr_t* mtr) /*!< in: mini-transaction */
278
ut_ad(!dict_index_is_ibuf(index));
280
log_ptr = mlog_open(mtr, 11 + 2 + 2);
287
/* Read the number of user records. */
288
trailer_size = page_dir_get_n_heap(page_zip->data)
289
- PAGE_HEAP_NO_USER_LOW;
290
/* Multiply by uncompressed of size stored per record */
291
if (!page_is_leaf(page)) {
292
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
293
} else if (dict_index_is_clust(index)) {
294
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
295
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
297
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
299
/* Add the space occupied by BLOB pointers. */
300
trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
301
ut_a(page_zip->m_end > PAGE_DATA);
302
#if FIL_PAGE_DATA > PAGE_DATA
303
# error "FIL_PAGE_DATA > PAGE_DATA"
305
ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
307
log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
308
MLOG_ZIP_PAGE_COMPRESS,
310
mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
312
mach_write_to_2(log_ptr, trailer_size);
314
mlog_close(mtr, log_ptr);
316
/* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
317
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
318
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
319
/* Write most of the page header, the compressed stream and
320
the modification log. */
321
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
322
page_zip->m_end - FIL_PAGE_TYPE);
323
/* Write the uncompressed trailer of the compressed page. */
324
mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
325
- trailer_size, trailer_size);
327
#endif /* !UNIV_HOTBACKUP */
329
/******************************************************//**
330
Determine how many externally stored columns are contained
331
in existing records with smaller heap_no than rec. */
334
page_zip_get_n_prev_extern(
335
/*=======================*/
336
const page_zip_des_t* page_zip,/*!< in: dense page directory on
338
const rec_t* rec, /*!< in: compact physical record
339
on a B-tree leaf page */
340
dict_index_t* index) /*!< in: record descriptor */
342
const page_t* page = page_align(rec);
347
ulint n_recs = page_get_n_recs(page_zip->data);
349
ut_ad(page_is_leaf(page));
350
ut_ad(page_is_comp(page));
351
ut_ad(dict_table_is_comp(index->table));
352
ut_ad(dict_index_is_clust(index));
353
ut_ad(!dict_index_is_ibuf(index));
355
heap_no = rec_get_heap_no_new(rec);
356
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
357
left = heap_no - PAGE_HEAP_NO_USER_LOW;
358
if (UNIV_UNLIKELY(!left)) {
362
for (i = 0; i < n_recs; i++) {
363
const rec_t* r = page + (page_zip_dir_get(page_zip, i)
364
& PAGE_ZIP_DIR_SLOT_MASK);
366
if (rec_get_heap_no_new(r) < heap_no) {
367
n_ext += rec_get_n_extern_new(r, index,
378
/**********************************************************************//**
379
Encode the length of a fixed-length column.
380
@return buf + length of encoded val */
383
page_zip_fixed_field_encode(
384
/*========================*/
385
byte* buf, /*!< in: pointer to buffer where to write */
386
ulint val) /*!< in: value to write */
390
if (UNIV_LIKELY(val < 126)) {
392
0 = nullable variable field of at most 255 bytes length;
393
1 = not null variable field of at most 255 bytes length;
394
126 = nullable variable field with maximum length >255;
395
127 = not null variable field with maximum length >255
399
*buf++ = (byte) (0x80 | val >> 8);
406
/**********************************************************************//**
407
Write the index information for the compressed page.
408
@return used size of buf */
411
page_zip_fields_encode(
412
/*===================*/
413
ulint n, /*!< in: number of fields to compress */
414
dict_index_t* index, /*!< in: index comprising at least n fields */
415
ulint trx_id_pos,/*!< in: position of the trx_id column
416
in the index, or ULINT_UNDEFINED if
417
this is a non-leaf page */
418
byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */
420
const byte* buf_start = buf;
423
ulint trx_id_col = 0;
424
/* sum of lengths of preceding non-nullable fixed fields, or 0 */
427
ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
429
for (i = col = 0; i < n; i++) {
430
dict_field_t* field = dict_index_get_nth_field(index, i);
433
if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
434
val = 1; /* set the "not nullable" flag */
436
val = 0; /* nullable field */
439
if (!field->fixed_len) {
440
/* variable-length field */
441
const dict_col_t* column
442
= dict_field_get_col(field);
444
if (UNIV_UNLIKELY(column->len > 255)
445
|| UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
446
val |= 0x7e; /* max > 255 bytes */
450
/* write out the length of any
451
preceding non-nullable fields */
452
buf = page_zip_fixed_field_encode(
453
buf, fixed_sum << 1 | 1);
461
/* fixed-length non-nullable field */
463
if (fixed_sum && UNIV_UNLIKELY
464
(fixed_sum + field->fixed_len
465
> DICT_MAX_INDEX_COL_LEN)) {
466
/* Write out the length of the
467
preceding non-nullable fields,
468
to avoid exceeding the maximum
469
length of a fixed-length column. */
470
buf = page_zip_fixed_field_encode(
471
buf, fixed_sum << 1 | 1);
476
if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
478
/* Write out the length of any
479
preceding non-nullable fields,
480
and start a new trx_id column. */
481
buf = page_zip_fixed_field_encode(
482
buf, fixed_sum << 1 | 1);
487
fixed_sum = field->fixed_len;
490
fixed_sum += field->fixed_len;
493
/* fixed-length nullable field */
496
/* write out the length of any
497
preceding non-nullable fields */
498
buf = page_zip_fixed_field_encode(
499
buf, fixed_sum << 1 | 1);
504
buf = page_zip_fixed_field_encode(
505
buf, field->fixed_len << 1);
511
/* Write out the lengths of last fixed-length columns. */
512
buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
515
if (trx_id_pos != ULINT_UNDEFINED) {
516
/* Write out the position of the trx_id column */
519
/* Write out the number of nullable fields */
520
i = index->n_nullable;
526
*buf++ = (byte) (0x80 | i >> 8);
530
ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
531
return((ulint) (buf - buf_start));
534
/**********************************************************************//**
535
Populate the dense page directory from the sparse directory. */
540
const page_t* page, /*!< in: compact page */
541
byte* buf, /*!< in: pointer to dense page directory[-1];
542
out: dense directory on compressed page */
543
const rec_t** recs) /*!< in: pointer to an array of 0, or NULL;
544
out: dense page directory sorted by ascending
545
address (and heap_no) */
557
if (page_is_leaf(page)) {
558
status = REC_STATUS_ORDINARY;
560
status = REC_STATUS_NODE_PTR;
562
(mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
563
min_mark = REC_INFO_MIN_REC_FLAG;
567
n_heap = page_dir_get_n_heap(page);
569
/* Traverse the list of stored records in the collation order,
570
starting from the first user record. */
572
rec = page + PAGE_NEW_INFIMUM, TRUE;
578
offs = rec_get_next_offs(rec, TRUE);
579
if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
583
heap_no = rec_get_heap_no_new(rec);
584
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
585
ut_a(heap_no < n_heap);
586
ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
587
ut_a(offs >= PAGE_ZIP_START);
588
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
589
# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
591
#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
592
# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
594
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
595
offs |= PAGE_ZIP_DIR_SLOT_OWNED;
598
info_bits = rec_get_info_bits(rec, TRUE);
599
if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
600
info_bits &= ~REC_INFO_DELETED_FLAG;
601
offs |= PAGE_ZIP_DIR_SLOT_DEL;
603
ut_a(info_bits == min_mark);
604
/* Only the smallest user record can have
605
REC_INFO_MIN_REC_FLAG set. */
608
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
610
if (UNIV_LIKELY_NULL(recs)) {
611
/* Ensure that each heap_no occurs at most once. */
612
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
613
/* exclude infimum and supremum */
614
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
617
ut_a(rec_get_status(rec) == status);
620
offs = page_header_get_field(page, PAGE_FREE);
622
/* Traverse the free list (of deleted records). */
624
ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
627
heap_no = rec_get_heap_no_new(rec);
628
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
629
ut_a(heap_no < n_heap);
631
ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
632
ut_a(rec_get_status(rec) == status);
634
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
636
if (UNIV_LIKELY_NULL(recs)) {
637
/* Ensure that each heap_no occurs at most once. */
638
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
639
/* exclude infimum and supremum */
640
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
643
offs = rec_get_next_offs(rec, TRUE);
646
/* Ensure that each heap no occurs at least once. */
647
ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
650
/**********************************************************************//**
651
Allocate memory for zlib. */
656
void* opaque, /*!< in/out: memory heap */
657
uInt items, /*!< in: number of items to allocate */
658
uInt size) /*!< in: size of an item in bytes */
660
return(mem_heap_alloc(opaque, items * size));
663
/**********************************************************************//**
664
Deallocate memory for zlib. */
669
void* opaque __attribute__((unused)), /*!< in: memory heap */
670
void* address __attribute__((unused)))/*!< in: object to free */
674
/**********************************************************************//**
675
Configure the zlib allocator to use the given memory heap. */
680
void* stream, /*!< in/out: zlib stream */
681
mem_heap_t* heap) /*!< in: memory heap to use */
683
z_stream* strm = stream;
685
strm->zalloc = page_zip_malloc;
686
strm->zfree = page_zip_free;
690
#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
691
/** Symbol for enabling compression and decompression diagnostics */
692
# define PAGE_ZIP_COMPRESS_DBG
695
#ifdef PAGE_ZIP_COMPRESS_DBG
696
/** Set this variable in a debugger to enable
697
excessive logging in page_zip_compress(). */
698
UNIV_INTERN ibool page_zip_compress_dbg;
699
/** Set this variable in a debugger to enable
700
binary logging of the data passed to deflate().
701
When this variable is nonzero, it will act
702
as a log file name generator. */
703
UNIV_INTERN unsigned page_zip_compress_log;
705
/**********************************************************************//**
706
Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set.
707
@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
710
page_zip_compress_deflate(
711
/*======================*/
712
FILE* logfile,/*!< in: log file, or NULL */
713
z_streamp strm, /*!< in/out: compressed stream for deflate() */
714
int flush) /*!< in: deflate() flushing method */
717
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
718
ut_print_buf(stderr, strm->next_in, strm->avail_in);
720
if (UNIV_LIKELY_NULL(logfile)) {
721
fwrite(strm->next_in, 1, strm->avail_in, logfile);
723
status = deflate(strm, flush);
724
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
725
fprintf(stderr, " -> %d\n", status);
730
/* Redefine deflate(). */
732
/** Debug wrapper for the zlib compression routine deflate().
733
Log the operation if page_zip_compress_dbg is set.
734
@param strm in/out: compressed stream
735
@param flush in: flushing method
736
@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
737
# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
738
/** Declaration of the logfile parameter */
739
# define FILE_LOGFILE FILE* logfile,
740
/** The logfile parameter */
741
# define LOGFILE logfile,
742
#else /* PAGE_ZIP_COMPRESS_DBG */
743
/** Empty declaration of the logfile parameter */
744
# define FILE_LOGFILE
745
/** Missing logfile parameter */
747
#endif /* PAGE_ZIP_COMPRESS_DBG */
749
/**********************************************************************//**
750
Compress the records of a node pointer page.
751
@return Z_OK, or a zlib error code */
754
page_zip_compress_node_ptrs(
755
/*========================*/
757
z_stream* c_stream, /*!< in/out: compressed page stream */
758
const rec_t** recs, /*!< in: dense page directory
760
ulint n_dense, /*!< in: size of recs[] */
761
dict_index_t* index, /*!< in: the index of the page */
762
byte* storage, /*!< in: end of dense page directory */
763
mem_heap_t* heap) /*!< in: temporary memory heap */
766
ulint* offsets = NULL;
769
const rec_t* rec = *recs++;
771
offsets = rec_get_offsets(rec, index, offsets,
772
ULINT_UNDEFINED, &heap);
773
/* Only leaf nodes may contain externally stored columns. */
774
ut_ad(!rec_offs_any_extern(offsets));
776
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
777
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
778
rec_offs_extra_size(offsets));
780
/* Compress the extra bytes. */
781
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
784
if (c_stream->avail_in) {
785
err = deflate(c_stream, Z_NO_FLUSH);
786
if (UNIV_UNLIKELY(err != Z_OK)) {
790
ut_ad(!c_stream->avail_in);
792
/* Compress the data bytes, except node_ptr. */
793
c_stream->next_in = (byte*) rec;
794
c_stream->avail_in = rec_offs_data_size(offsets)
796
ut_ad(c_stream->avail_in);
798
err = deflate(c_stream, Z_NO_FLUSH);
799
if (UNIV_UNLIKELY(err != Z_OK)) {
803
ut_ad(!c_stream->avail_in);
805
memcpy(storage - REC_NODE_PTR_SIZE
806
* (rec_get_heap_no_new(rec) - 1),
807
c_stream->next_in, REC_NODE_PTR_SIZE);
808
c_stream->next_in += REC_NODE_PTR_SIZE;
814
/**********************************************************************//**
815
Compress the records of a leaf node of a secondary index.
816
@return Z_OK, or a zlib error code */
819
page_zip_compress_sec(
820
/*==================*/
822
z_stream* c_stream, /*!< in/out: compressed page stream */
823
const rec_t** recs, /*!< in: dense page directory
825
ulint n_dense) /*!< in: size of recs[] */
832
const rec_t* rec = *recs++;
834
/* Compress everything up to this record. */
835
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
838
if (UNIV_LIKELY(c_stream->avail_in)) {
839
UNIV_MEM_ASSERT_RW(c_stream->next_in,
841
err = deflate(c_stream, Z_NO_FLUSH);
842
if (UNIV_UNLIKELY(err != Z_OK)) {
847
ut_ad(!c_stream->avail_in);
848
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
850
/* Skip the REC_N_NEW_EXTRA_BYTES. */
852
c_stream->next_in = (byte*) rec;
858
/**********************************************************************//**
859
Compress a record of a leaf node of a clustered index that contains
860
externally stored columns.
861
@return Z_OK, or a zlib error code */
864
page_zip_compress_clust_ext(
865
/*========================*/
867
z_stream* c_stream, /*!< in/out: compressed page stream */
868
const rec_t* rec, /*!< in: record */
869
const ulint* offsets, /*!< in: rec_get_offsets(rec) */
870
ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
871
byte* deleted, /*!< in: dense directory entry pointing
872
to the head of the free list */
873
byte* storage, /*!< in: end of dense page directory */
874
byte** externs, /*!< in/out: pointer to the next
875
available BLOB pointer */
876
ulint* n_blobs) /*!< in/out: number of
877
externally stored columns */
882
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
883
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
884
rec_offs_extra_size(offsets));
886
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
890
if (UNIV_UNLIKELY(i == trx_id_col)) {
891
ut_ad(!rec_offs_nth_extern(offsets, i));
892
/* Store trx_id and roll_ptr
893
in uncompressed form. */
894
src = rec_get_nth_field(rec, offsets, i, &len);
895
ut_ad(src + DATA_TRX_ID_LEN
896
== rec_get_nth_field(rec, offsets,
898
ut_ad(len == DATA_ROLL_PTR_LEN);
900
/* Compress any preceding bytes. */
902
= src - c_stream->next_in;
904
if (c_stream->avail_in) {
905
err = deflate(c_stream, Z_NO_FLUSH);
906
if (UNIV_UNLIKELY(err != Z_OK)) {
912
ut_ad(!c_stream->avail_in);
913
ut_ad(c_stream->next_in == src);
916
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
917
* (rec_get_heap_no_new(rec) - 1),
919
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
922
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
924
/* Skip also roll_ptr */
926
} else if (rec_offs_nth_extern(offsets, i)) {
927
src = rec_get_nth_field(rec, offsets, i, &len);
928
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
929
src += len - BTR_EXTERN_FIELD_REF_SIZE;
931
c_stream->avail_in = src
933
if (UNIV_LIKELY(c_stream->avail_in)) {
934
err = deflate(c_stream, Z_NO_FLUSH);
935
if (UNIV_UNLIKELY(err != Z_OK)) {
941
ut_ad(!c_stream->avail_in);
942
ut_ad(c_stream->next_in == src);
944
/* Reserve space for the data at
945
the end of the space reserved for
946
the compressed data and the page
951
<= BTR_EXTERN_FIELD_REF_SIZE)) {
956
ut_ad(*externs == c_stream->next_out
957
+ c_stream->avail_out
958
+ 1/* end of modif. log */);
961
+= BTR_EXTERN_FIELD_REF_SIZE;
963
/* Skip deleted records. */
965
(page_zip_dir_find_low(
967
page_offset(rec)))) {
973
-= BTR_EXTERN_FIELD_REF_SIZE;
974
*externs -= BTR_EXTERN_FIELD_REF_SIZE;
976
/* Copy the BLOB pointer */
977
memcpy(*externs, c_stream->next_in
978
- BTR_EXTERN_FIELD_REF_SIZE,
979
BTR_EXTERN_FIELD_REF_SIZE);
986
/**********************************************************************//**
987
Compress the records of a leaf node of a clustered index.
988
@return Z_OK, or a zlib error code */
991
page_zip_compress_clust(
992
/*====================*/
994
z_stream* c_stream, /*!< in/out: compressed page stream */
995
const rec_t** recs, /*!< in: dense page directory
997
ulint n_dense, /*!< in: size of recs[] */
998
dict_index_t* index, /*!< in: the index of the page */
999
ulint* n_blobs, /*!< in: 0; out: number of
1000
externally stored columns */
1001
ulint trx_id_col, /*!< index of the trx_id column */
1002
byte* deleted, /*!< in: dense directory entry pointing
1003
to the head of the free list */
1004
byte* storage, /*!< in: end of dense page directory */
1005
mem_heap_t* heap) /*!< in: temporary memory heap */
1008
ulint* offsets = NULL;
1009
/* BTR_EXTERN_FIELD_REF storage */
1010
byte* externs = storage - n_dense
1011
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1013
ut_ad(*n_blobs == 0);
1016
const rec_t* rec = *recs++;
1018
offsets = rec_get_offsets(rec, index, offsets,
1019
ULINT_UNDEFINED, &heap);
1020
ut_ad(rec_offs_n_fields(offsets)
1021
== dict_index_get_n_fields(index));
1022
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1023
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1024
rec_offs_extra_size(offsets));
1026
/* Compress the extra bytes. */
1027
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
1028
- c_stream->next_in;
1030
if (c_stream->avail_in) {
1031
err = deflate(c_stream, Z_NO_FLUSH);
1032
if (UNIV_UNLIKELY(err != Z_OK)) {
1037
ut_ad(!c_stream->avail_in);
1038
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
1040
/* Compress the data bytes. */
1042
c_stream->next_in = (byte*) rec;
1044
/* Check if there are any externally stored columns.
1045
For each externally stored column, store the
1046
BTR_EXTERN_FIELD_REF separately. */
1047
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1048
ut_ad(dict_index_is_clust(index));
1050
err = page_zip_compress_clust_ext(
1052
c_stream, rec, offsets, trx_id_col,
1053
deleted, storage, &externs, n_blobs);
1055
if (UNIV_UNLIKELY(err != Z_OK)) {
1063
/* Store trx_id and roll_ptr in uncompressed form. */
1064
src = rec_get_nth_field(rec, offsets,
1066
ut_ad(src + DATA_TRX_ID_LEN
1067
== rec_get_nth_field(rec, offsets,
1068
trx_id_col + 1, &len));
1069
ut_ad(len == DATA_ROLL_PTR_LEN);
1070
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1071
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1072
rec_offs_extra_size(offsets));
1074
/* Compress any preceding bytes. */
1075
c_stream->avail_in = src - c_stream->next_in;
1077
if (c_stream->avail_in) {
1078
err = deflate(c_stream, Z_NO_FLUSH);
1079
if (UNIV_UNLIKELY(err != Z_OK)) {
1085
ut_ad(!c_stream->avail_in);
1086
ut_ad(c_stream->next_in == src);
1089
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1090
* (rec_get_heap_no_new(rec) - 1),
1092
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1095
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1097
/* Skip also roll_ptr */
1098
ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
1101
/* Compress the last bytes of the record. */
1102
c_stream->avail_in = rec + rec_offs_data_size(offsets)
1103
- c_stream->next_in;
1105
if (c_stream->avail_in) {
1106
err = deflate(c_stream, Z_NO_FLUSH);
1107
if (UNIV_UNLIKELY(err != Z_OK)) {
1112
ut_ad(!c_stream->avail_in);
1113
} while (--n_dense);
1119
/**********************************************************************//**
1121
@return TRUE on success, FALSE on failure; page_zip will be left
1122
intact on failure. */
1127
page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs,
1128
m_start, m_end, m_nonempty */
1129
const page_t* page, /*!< in: uncompressed page */
1130
dict_index_t* index, /*!< in: index of the B-tree node */
1131
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
1135
ulint n_fields;/* number of index fields needed */
1136
byte* fields; /*!< index field information */
1137
byte* buf; /*!< compressed payload of the page */
1138
byte* buf_end;/* end of buf */
1140
ulint slot_size;/* amount of uncompressed bytes per record */
1141
const rec_t** recs; /*!< dense page directory, sorted by address */
1144
ulint* offsets = NULL;
1146
byte* storage;/* storage of uncompressed columns */
1147
ullint usec = ut_time_us(NULL);
1148
#ifdef PAGE_ZIP_COMPRESS_DBG
1149
FILE* logfile = NULL;
1152
ut_a(page_is_comp(page));
1153
ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
1154
ut_ad(page_simple_validate_new((page_t*) page));
1155
ut_ad(page_zip_simple_validate(page_zip));
1156
ut_ad(dict_table_is_comp(index->table));
1157
ut_ad(!dict_index_is_ibuf(index));
1159
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1161
/* Check the data that will be omitted. */
1162
ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
1163
infimum_extra, sizeof infimum_extra));
1164
ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
1165
infimum_data, sizeof infimum_data));
1166
ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
1167
/* info_bits == 0, n_owned <= max */
1168
<= PAGE_DIR_SLOT_MAX_N_OWNED);
1169
ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
1170
supremum_extra_data, sizeof supremum_extra_data));
1172
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
1173
ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
1174
== PAGE_NEW_SUPREMUM);
1177
if (page_is_leaf(page)) {
1178
n_fields = dict_index_get_n_fields(index);
1180
n_fields = dict_index_get_n_unique_in_tree(index);
1183
/* The dense directory excludes the infimum and supremum records. */
1184
n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1185
#ifdef PAGE_ZIP_COMPRESS_DBG
1186
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
1187
fprintf(stderr, "compress %p %p %lu %lu %lu\n",
1188
(void*) page_zip, (void*) page,
1192
if (UNIV_UNLIKELY(page_zip_compress_log)) {
1193
/* Create a log file for every compression attempt. */
1194
char logfilename[9];
1195
ut_snprintf(logfilename, sizeof logfilename,
1196
"%08x", page_zip_compress_log++);
1197
logfile = fopen(logfilename, "wb");
1200
/* Write the uncompressed page to the log. */
1201
fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
1202
/* Record the compressed size as zero.
1203
This will be overwritten at successful exit. */
1210
#endif /* PAGE_ZIP_COMPRESS_DBG */
1211
page_zip_stat[page_zip->ssize - 1].compressed++;
1213
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
1214
>= page_zip_get_size(page_zip))) {
1219
heap = mem_heap_create(page_zip_get_size(page_zip)
1220
+ n_fields * (2 + sizeof *offsets)
1221
+ n_dense * ((sizeof *recs)
1222
- PAGE_ZIP_DIR_SLOT_SIZE)
1223
+ UNIV_PAGE_SIZE * 4
1224
+ (512 << MAX_MEM_LEVEL));
1226
recs = mem_heap_zalloc(heap, n_dense * sizeof *recs);
1228
fields = mem_heap_alloc(heap, (n_fields + 1) * 2);
1230
buf = mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA);
1231
buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
1233
/* Compress the data payload. */
1234
page_zip_set_alloc(&c_stream, heap);
1236
err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
1237
Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
1238
MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
1241
c_stream.next_out = buf;
1242
/* Subtract the space reserved for uncompressed data. */
1243
/* Page header and the end marker of the modification log */
1244
c_stream.avail_out = buf_end - buf - 1;
1245
/* Dense page directory and uncompressed columns, if any */
1246
if (page_is_leaf(page)) {
1247
if (dict_index_is_clust(index)) {
1248
trx_id_col = dict_index_get_sys_col_pos(
1249
index, DATA_TRX_ID);
1250
ut_ad(trx_id_col > 0);
1251
ut_ad(trx_id_col != ULINT_UNDEFINED);
1253
slot_size = PAGE_ZIP_DIR_SLOT_SIZE
1254
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1256
/* Signal the absence of trx_id
1257
in page_zip_fields_encode() */
1258
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
1259
== ULINT_UNDEFINED);
1261
slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
1264
slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
1265
trx_id_col = ULINT_UNDEFINED;
1268
if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
1269
+ 6/* sizeof(zlib header and footer) */)) {
1273
c_stream.avail_out -= n_dense * slot_size;
1274
c_stream.avail_in = page_zip_fields_encode(n_fields, index,
1275
trx_id_col, fields);
1276
c_stream.next_in = fields;
1277
if (UNIV_LIKELY(!trx_id_col)) {
1278
trx_id_col = ULINT_UNDEFINED;
1281
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1282
err = deflate(&c_stream, Z_FULL_FLUSH);
1287
ut_ad(!c_stream.avail_in);
1289
page_zip_dir_encode(page, buf_end, recs);
1291
c_stream.next_in = (byte*) page + PAGE_ZIP_START;
1293
storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
1295
/* Compress the records in heap_no order. */
1296
if (UNIV_UNLIKELY(!n_dense)) {
1297
} else if (!page_is_leaf(page)) {
1298
/* This is a node pointer page. */
1299
err = page_zip_compress_node_ptrs(LOGFILE
1300
&c_stream, recs, n_dense,
1301
index, storage, heap);
1302
if (UNIV_UNLIKELY(err != Z_OK)) {
1305
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1306
/* This is a leaf page in a secondary index. */
1307
err = page_zip_compress_sec(LOGFILE
1308
&c_stream, recs, n_dense);
1309
if (UNIV_UNLIKELY(err != Z_OK)) {
1313
/* This is a leaf page in a clustered index. */
1314
err = page_zip_compress_clust(LOGFILE
1315
&c_stream, recs, n_dense,
1316
index, &n_blobs, trx_id_col,
1317
buf_end - PAGE_ZIP_DIR_SLOT_SIZE
1318
* page_get_n_recs(page),
1320
if (UNIV_UNLIKELY(err != Z_OK)) {
1325
/* Finish the compression. */
1326
ut_ad(!c_stream.avail_in);
1327
/* Compress any trailing garbage, in case the last record was
1328
allocated from an originally longer space on the free list,
1329
or the data of the last record from page_zip_compress_sec(). */
1331
= page_header_get_field(page, PAGE_HEAP_TOP)
1332
- (c_stream.next_in - page);
1333
ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
1335
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1336
err = deflate(&c_stream, Z_FINISH);
1338
if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
1340
deflateEnd(&c_stream);
1341
mem_heap_free(heap);
1343
#ifdef PAGE_ZIP_COMPRESS_DBG
1347
#endif /* PAGE_ZIP_COMPRESS_DBG */
1348
page_zip_stat[page_zip->ssize - 1].compressed_usec
1349
+= ut_time_us(NULL) - usec;
1353
err = deflateEnd(&c_stream);
1356
ut_ad(buf + c_stream.total_out == c_stream.next_out);
1357
ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
1359
/* Valgrind believes that zlib does not initialize some bits
1360
in the last 7 or 8 bytes of the stream. Make Valgrind happy. */
1361
UNIV_MEM_VALID(buf, c_stream.total_out);
1363
/* Zero out the area reserved for the modification log.
1364
Space for the end marker of the modification log is not
1365
included in avail_out. */
1366
memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
1370
#endif /* UNIV_DEBUG */
1371
page_zip->m_end = PAGE_DATA + c_stream.total_out;
1372
page_zip->m_nonempty = FALSE;
1373
page_zip->n_blobs = n_blobs;
1374
/* Copy those header fields that will not be written
1375
in buf_flush_init_for_writing() */
1376
memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
1377
FIL_PAGE_LSN - FIL_PAGE_PREV);
1378
memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
1379
memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
1380
PAGE_DATA - FIL_PAGE_DATA);
1381
/* Copy the rest of the compressed page */
1382
memcpy(page_zip->data + PAGE_DATA, buf,
1383
page_zip_get_size(page_zip) - PAGE_DATA);
1384
mem_heap_free(heap);
1385
#ifdef UNIV_ZIP_DEBUG
1386
ut_a(page_zip_validate(page_zip, page));
1387
#endif /* UNIV_ZIP_DEBUG */
1390
#ifndef UNIV_HOTBACKUP
1391
page_zip_compress_write_log(page_zip, page, index, mtr);
1392
#endif /* !UNIV_HOTBACKUP */
1395
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
1397
#ifdef PAGE_ZIP_COMPRESS_DBG
1399
/* Record the compressed size of the block. */
1401
mach_write_to_4(sz, c_stream.total_out);
1402
fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
1403
fwrite(sz, 1, sizeof sz, logfile);
1406
#endif /* PAGE_ZIP_COMPRESS_DBG */
1408
page_zip_stat_t* zip_stat
1409
= &page_zip_stat[page_zip->ssize - 1];
1410
zip_stat->compressed_ok++;
1411
zip_stat->compressed_usec += ut_time_us(NULL) - usec;
1417
/**********************************************************************//**
1418
Compare two page directory entries.
1419
@return positive if rec1 > rec2 */
1424
const rec_t* rec1, /*!< in: rec1 */
1425
const rec_t* rec2) /*!< in: rec2 */
1427
return(rec1 > rec2);
1430
/**********************************************************************//**
1431
Sort the dense page directory by address (heap_no). */
1436
rec_t** arr, /*!< in/out: dense page directory */
1437
rec_t** aux_arr,/*!< in/out: work area */
1438
ulint low, /*!< in: lower bound of the sorting area, inclusive */
1439
ulint high) /*!< in: upper bound of the sorting area, exclusive */
1441
UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
1445
/**********************************************************************//**
1446
Deallocate the index information initialized by page_zip_fields_decode(). */
1449
page_zip_fields_free(
1450
/*=================*/
1451
dict_index_t* index) /*!< in: dummy index to be freed */
1454
dict_table_t* table = index->table;
1455
mem_heap_free(index->heap);
1456
mutex_free(&(table->autoinc_mutex));
1457
mem_heap_free(table->heap);
1461
/**********************************************************************//**
1462
Read the index information for the compressed page.
1463
@return own: dummy index describing the page, or NULL on error */
1466
page_zip_fields_decode(
1467
/*===================*/
1468
const byte* buf, /*!< in: index information */
1469
const byte* end, /*!< in: end of buf */
1470
ulint* trx_id_col)/*!< in: NULL for non-leaf pages;
1471
for leaf pages, pointer to where to store
1472
the position of the trx_id column */
1478
dict_table_t* table;
1479
dict_index_t* index;
1481
/* Determine the number of fields. */
1482
for (b = buf, n = 0; b < end; n++) {
1484
b++; /* skip the second byte */
1488
n--; /* n_nullable or trx_id */
1490
if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
1492
page_zip_fail(("page_zip_fields_decode: n = %lu\n",
1497
if (UNIV_UNLIKELY(b > end)) {
1499
page_zip_fail(("page_zip_fields_decode: %p > %p\n",
1500
(const void*) b, (const void*) end));
1504
table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
1506
index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
1507
DICT_HDR_SPACE, 0, n);
1508
index->table = table;
1510
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1511
index->cached = TRUE;
1513
/* Initialize the fields. */
1514
for (b = buf, i = 0; i < n; i++) {
1520
if (UNIV_UNLIKELY(val & 0x80)) {
1521
/* fixed length > 62 bytes */
1522
val = (val & 0x7f) << 8 | *b++;
1524
mtype = DATA_FIXBINARY;
1525
} else if (UNIV_UNLIKELY(val >= 126)) {
1526
/* variable length with max > 255 bytes */
1528
mtype = DATA_BINARY;
1529
} else if (val <= 1) {
1530
/* variable length with max <= 255 bytes */
1532
mtype = DATA_BINARY;
1534
/* fixed length < 62 bytes */
1536
mtype = DATA_FIXBINARY;
1539
dict_mem_table_add_col(table, NULL, NULL, mtype,
1540
val & 1 ? DATA_NOT_NULL : 0, len);
1541
dict_index_add_col(index, table,
1542
dict_table_get_nth_col(table, i), 0);
1546
if (UNIV_UNLIKELY(val & 0x80)) {
1547
val = (val & 0x7f) << 8 | *b++;
1550
/* Decode the position of the trx_id column. */
1553
val = ULINT_UNDEFINED;
1554
} else if (UNIV_UNLIKELY(val >= n)) {
1555
page_zip_fields_free(index);
1558
index->type = DICT_CLUSTERED;
1563
/* Decode the number of nullable fields. */
1564
if (UNIV_UNLIKELY(index->n_nullable > val)) {
1565
page_zip_fields_free(index);
1568
index->n_nullable = val;
1577
/**********************************************************************//**
1578
Populate the sparse page directory from the dense directory.
1579
@return TRUE on success, FALSE on failure */
1582
page_zip_dir_decode(
1583
/*================*/
1584
const page_zip_des_t* page_zip,/*!< in: dense page directory on
1586
page_t* page, /*!< in: compact page with valid header;
1587
out: trailer and sparse page directory
1589
rec_t** recs, /*!< out: dense page directory sorted by
1590
ascending address (and heap_no) */
1591
rec_t** recs_aux,/*!< in/out: scratch area */
1592
ulint n_dense)/*!< in: number of user records, and
1593
size of recs[] and recs_aux[] */
1599
n_recs = page_get_n_recs(page);
1601
if (UNIV_UNLIKELY(n_recs > n_dense)) {
1602
page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
1603
(ulong) n_recs, (ulong) n_dense));
1607
/* Traverse the list of stored records in the sorting order,
1608
starting from the first user record. */
1610
slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
1611
UNIV_PREFETCH_RW(slot);
1613
/* Zero out the page trailer. */
1614
memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
1616
mach_write_to_2(slot, PAGE_NEW_INFIMUM);
1617
slot -= PAGE_DIR_SLOT_SIZE;
1618
UNIV_PREFETCH_RW(slot);
1620
/* Initialize the sparse directory and copy the dense directory. */
1621
for (i = 0; i < n_recs; i++) {
1622
ulint offs = page_zip_dir_get(page_zip, i);
1624
if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
1625
mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
1626
slot -= PAGE_DIR_SLOT_SIZE;
1627
UNIV_PREFETCH_RW(slot);
1630
if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
1631
< PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
1632
page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
1633
(unsigned) i, (unsigned) n_recs,
1638
recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
1641
mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
1643
const page_dir_slot_t* last_slot = page_dir_get_nth_slot(
1644
page, page_dir_get_n_slots(page) - 1);
1646
if (UNIV_UNLIKELY(slot != last_slot)) {
1647
page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
1649
(const void*) last_slot));
1654
/* Copy the rest of the dense directory. */
1655
for (; i < n_dense; i++) {
1656
ulint offs = page_zip_dir_get(page_zip, i);
1658
if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1659
page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
1660
(unsigned) i, (unsigned) n_dense,
1665
recs[i] = page + offs;
1668
if (UNIV_LIKELY(n_dense > 1)) {
1669
page_zip_dir_sort(recs, recs_aux, 0, n_dense);
1674
/**********************************************************************//**
1675
Initialize the REC_N_NEW_EXTRA_BYTES of each record.
1676
@return TRUE on success, FALSE on failure */
1679
page_zip_set_extra_bytes(
1680
/*=====================*/
1681
const page_zip_des_t* page_zip,/*!< in: compressed page */
1682
page_t* page, /*!< in/out: uncompressed page */
1683
ulint info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */
1691
n = page_get_n_recs(page);
1692
rec = page + PAGE_NEW_INFIMUM;
1694
for (i = 0; i < n; i++) {
1695
offs = page_zip_dir_get(page_zip, i);
1697
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
1698
info_bits |= REC_INFO_DELETED_FLAG;
1700
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
1701
info_bits |= n_owned;
1706
offs &= PAGE_ZIP_DIR_SLOT_MASK;
1707
if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
1708
+ REC_N_NEW_EXTRA_BYTES)) {
1709
page_zip_fail(("page_zip_set_extra_bytes 1:"
1711
(unsigned) i, (unsigned) n,
1716
rec_set_next_offs_new(rec, offs);
1718
rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
1722
/* Set the next pointer of the last user record. */
1723
rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
1725
/* Set n_owned of the supremum record. */
1726
page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
1728
/* The dense directory excludes the infimum and supremum records. */
1729
n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1732
if (UNIV_LIKELY(i == n)) {
1736
page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
1737
(unsigned) i, (unsigned) n));
1741
offs = page_zip_dir_get(page_zip, i);
1743
/* Set the extra bytes of deleted records on the free list. */
1745
if (UNIV_UNLIKELY(!offs)
1746
|| UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1748
page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
1754
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1760
offs = page_zip_dir_get(page_zip, i);
1761
rec_set_next_offs_new(rec, offs);
1764
/* Terminate the free list. */
1765
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1766
rec_set_next_offs_new(rec, 0);
1771
/**********************************************************************//**
1772
Apply the modification log to a record containing externally stored
1773
columns. Do not copy the fields that are stored separately.
1774
@return pointer to modification log, or NULL on failure */
1777
page_zip_apply_log_ext(
1778
/*===================*/
1779
rec_t* rec, /*!< in/out: record */
1780
const ulint* offsets, /*!< in: rec_get_offsets(rec) */
1781
ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
1782
const byte* data, /*!< in: modification log */
1783
const byte* end) /*!< in: end of modification log */
1787
byte* next_out = rec;
1789
/* Check if there are any externally stored columns.
1790
For each externally stored column, skip the
1791
BTR_EXTERN_FIELD_REF. */
1793
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
1796
if (UNIV_UNLIKELY(i == trx_id_col)) {
1797
/* Skip trx_id and roll_ptr */
1798
dst = rec_get_nth_field(rec, offsets,
1800
if (UNIV_UNLIKELY(dst - next_out >= end - data)
1802
(len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
1803
|| rec_offs_nth_extern(offsets, i)) {
1804
page_zip_fail(("page_zip_apply_log_ext:"
1806
" %p - %p >= %p - %p\n",
1809
(const void*) next_out,
1811
(const void*) data));
1815
memcpy(next_out, data, dst - next_out);
1816
data += dst - next_out;
1817
next_out = dst + (DATA_TRX_ID_LEN
1818
+ DATA_ROLL_PTR_LEN);
1819
} else if (rec_offs_nth_extern(offsets, i)) {
1820
dst = rec_get_nth_field(rec, offsets,
1823
>= BTR_EXTERN_FIELD_REF_SIZE);
1825
len += dst - next_out
1826
- BTR_EXTERN_FIELD_REF_SIZE;
1828
if (UNIV_UNLIKELY(data + len >= end)) {
1829
page_zip_fail(("page_zip_apply_log_ext: "
1830
"ext %p+%lu >= %p\n",
1833
(const void*) end));
1837
memcpy(next_out, data, len);
1840
+ BTR_EXTERN_FIELD_REF_SIZE;
1844
/* Copy the last bytes of the record. */
1845
len = rec_get_end(rec, offsets) - next_out;
1846
if (UNIV_UNLIKELY(data + len >= end)) {
1847
page_zip_fail(("page_zip_apply_log_ext: "
1848
"last %p+%lu >= %p\n",
1851
(const void*) end));
1854
memcpy(next_out, data, len);
1860
/**********************************************************************//**
1861
Apply the modification log to an uncompressed page.
1862
Do not copy the fields that are stored separately.
1863
@return pointer to end of modification log, or NULL on failure */
1868
const byte* data, /*!< in: modification log */
1869
ulint size, /*!< in: maximum length of the log, in bytes */
1870
rec_t** recs, /*!< in: dense page directory,
1871
sorted by address (indexed by
1872
heap_no - PAGE_HEAP_NO_USER_LOW) */
1873
ulint n_dense,/*!< in: size of recs[] */
1874
ulint trx_id_col,/*!< in: column number of trx_id in the index,
1875
or ULINT_UNDEFINED if none */
1877
/*!< in: heap_no and status bits for
1878
the next record to uncompress */
1879
dict_index_t* index, /*!< in: index of the page */
1880
ulint* offsets)/*!< in/out: work area for
1881
rec_get_offsets_reverse() */
1883
const byte* const end = data + size;
1892
if (UNIV_UNLIKELY(!val)) {
1896
val = (val & 0x7f) << 8 | *data++;
1897
if (UNIV_UNLIKELY(!val)) {
1898
page_zip_fail(("page_zip_apply_log:"
1899
" invalid val %x%x\n",
1900
data[-2], data[-1]));
1904
if (UNIV_UNLIKELY(data >= end)) {
1905
page_zip_fail(("page_zip_apply_log: %p >= %p\n",
1907
(const void*) end));
1910
if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
1911
page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
1912
(ulong) val, (ulong) n_dense));
1916
/* Determine the heap number and status bits of the record. */
1917
rec = recs[(val >> 1) - 1];
1919
hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
1920
hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
1922
/* This may either be an old record that is being
1923
overwritten (updated in place, or allocated from
1924
the free list), or a new record, with the next
1925
available_heap_no. */
1926
if (UNIV_UNLIKELY(hs > heap_status)) {
1927
page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
1928
(ulong) hs, (ulong) heap_status));
1930
} else if (hs == heap_status) {
1931
/* A new record was allocated from the heap. */
1932
if (UNIV_UNLIKELY(val & 1)) {
1933
/* Only existing records may be cleared. */
1934
page_zip_fail(("page_zip_apply_log:"
1935
" attempting to create"
1936
" deleted rec %lu\n",
1940
heap_status += 1 << REC_HEAP_NO_SHIFT;
1943
mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
1946
/* Clear the data bytes of the record. */
1947
mem_heap_t* heap = NULL;
1949
offs = rec_get_offsets(rec, index, offsets,
1950
ULINT_UNDEFINED, &heap);
1951
memset(rec, 0, rec_offs_data_size(offs));
1953
if (UNIV_LIKELY_NULL(heap)) {
1954
mem_heap_free(heap);
1959
#if REC_STATUS_NODE_PTR != TRUE
1960
# error "REC_STATUS_NODE_PTR != TRUE"
1962
rec_get_offsets_reverse(data, index,
1963
hs & REC_STATUS_NODE_PTR,
1965
rec_offs_make_valid(rec, index, offsets);
1967
/* Copy the extra bytes (backwards). */
1969
byte* start = rec_get_start(rec, offsets);
1970
byte* b = rec - REC_N_NEW_EXTRA_BYTES;
1971
while (b != start) {
1976
/* Copy the data bytes. */
1977
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1978
/* Non-leaf nodes should not contain any
1979
externally stored columns. */
1980
if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1981
page_zip_fail(("page_zip_apply_log: "
1982
"%lu&REC_STATUS_NODE_PTR\n",
1987
data = page_zip_apply_log_ext(
1988
rec, offsets, trx_id_col, data, end);
1990
if (UNIV_UNLIKELY(!data)) {
1993
} else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1994
len = rec_offs_data_size(offsets)
1995
- REC_NODE_PTR_SIZE;
1996
/* Copy the data bytes, except node_ptr. */
1997
if (UNIV_UNLIKELY(data + len >= end)) {
1998
page_zip_fail(("page_zip_apply_log: "
1999
"node_ptr %p+%lu >= %p\n",
2002
(const void*) end));
2005
memcpy(rec, data, len);
2007
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2008
len = rec_offs_data_size(offsets);
2010
/* Copy all data bytes of
2011
a record in a secondary index. */
2012
if (UNIV_UNLIKELY(data + len >= end)) {
2013
page_zip_fail(("page_zip_apply_log: "
2014
"sec %p+%lu >= %p\n",
2017
(const void*) end));
2021
memcpy(rec, data, len);
2024
/* Skip DB_TRX_ID and DB_ROLL_PTR. */
2025
ulint l = rec_get_nth_field_offs(offsets,
2029
if (UNIV_UNLIKELY(data + l >= end)
2030
|| UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
2031
+ DATA_ROLL_PTR_LEN))) {
2032
page_zip_fail(("page_zip_apply_log: "
2033
"trx_id %p+%lu >= %p\n",
2036
(const void*) end));
2040
/* Copy any preceding data bytes. */
2041
memcpy(rec, data, l);
2044
/* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
2045
b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2046
len = rec_get_end(rec, offsets) - b;
2047
if (UNIV_UNLIKELY(data + len >= end)) {
2048
page_zip_fail(("page_zip_apply_log: "
2049
"clust %p+%lu >= %p\n",
2052
(const void*) end));
2055
memcpy(b, data, len);
2061
/**********************************************************************//**
2062
Decompress the records of a node pointer page.
2063
@return TRUE on success, FALSE on failure */
2066
page_zip_decompress_node_ptrs(
2067
/*==========================*/
2068
page_zip_des_t* page_zip, /*!< in/out: compressed page */
2069
z_stream* d_stream, /*!< in/out: compressed page stream */
2070
rec_t** recs, /*!< in: dense page directory
2071
sorted by address */
2072
ulint n_dense, /*!< in: size of recs[] */
2073
dict_index_t* index, /*!< in: the index of the page */
2074
ulint* offsets, /*!< in/out: temporary offsets */
2075
mem_heap_t* heap) /*!< in: temporary memory heap */
2077
ulint heap_status = REC_STATUS_NODE_PTR
2078
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2080
const byte* storage;
2082
/* Subtract the space reserved for uncompressed data. */
2083
d_stream->avail_in -= n_dense
2084
* (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
2086
/* Decompress the records in heap_no order. */
2087
for (slot = 0; slot < n_dense; slot++) {
2088
rec_t* rec = recs[slot];
2090
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2091
- d_stream->next_out;
2093
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2094
- PAGE_ZIP_START - PAGE_DIR);
2095
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2097
/* Apparently, n_dense has grown
2098
since the time the page was last compressed. */
2102
if (!d_stream->avail_out) {
2107
page_zip_fail(("page_zip_decompress_node_ptrs:"
2108
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2113
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2114
/* Prepare to decompress the data bytes. */
2115
d_stream->next_out = rec;
2116
/* Set heap_no and the status bits. */
2117
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2118
heap_status += 1 << REC_HEAP_NO_SHIFT;
2120
/* Read the offsets. The status bits are needed here. */
2121
offsets = rec_get_offsets(rec, index, offsets,
2122
ULINT_UNDEFINED, &heap);
2124
/* Non-leaf nodes should not have any externally
2126
ut_ad(!rec_offs_any_extern(offsets));
2128
/* Decompress the data bytes, except node_ptr. */
2129
d_stream->avail_out = rec_offs_data_size(offsets)
2130
- REC_NODE_PTR_SIZE;
2132
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2137
if (!d_stream->avail_out) {
2142
page_zip_fail(("page_zip_decompress_node_ptrs:"
2143
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2148
/* Clear the node pointer in case the record
2149
will be deleted and the space will be reallocated
2150
to a smaller record. */
2151
memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
2152
d_stream->next_out += REC_NODE_PTR_SIZE;
2154
ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
2157
/* Decompress any trailing garbage, in case the last record was
2158
allocated from an originally longer space on the free list. */
2159
d_stream->avail_out = page_header_get_field(page_zip->data,
2161
- page_offset(d_stream->next_out);
2162
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2163
- PAGE_ZIP_START - PAGE_DIR)) {
2165
page_zip_fail(("page_zip_decompress_node_ptrs:"
2166
" avail_out = %u\n",
2167
d_stream->avail_out));
2171
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2172
page_zip_fail(("page_zip_decompress_node_ptrs:"
2173
" inflate(Z_FINISH)=%s\n",
2176
inflateEnd(d_stream);
2180
/* Note that d_stream->avail_out > 0 may hold here
2181
if the modification log is nonempty. */
2184
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2189
page_t* page = page_align(d_stream->next_out);
2191
/* Clear the unused heap space on the uncompressed page. */
2192
memset(d_stream->next_out, 0,
2193
page_dir_get_nth_slot(page,
2194
page_dir_get_n_slots(page) - 1)
2195
- d_stream->next_out);
2199
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2200
#endif /* UNIV_DEBUG */
2202
/* Apply the modification log. */
2204
const byte* mod_log_ptr;
2205
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2206
d_stream->avail_in + 1,
2208
ULINT_UNDEFINED, heap_status,
2211
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2214
page_zip->m_end = mod_log_ptr - page_zip->data;
2215
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2219
(page_zip_get_trailer_len(page_zip,
2220
dict_index_is_clust(index), NULL)
2221
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2222
page_zip_fail(("page_zip_decompress_node_ptrs:"
2223
" %lu + %lu >= %lu, %lu\n",
2224
(ulong) page_zip_get_trailer_len(
2225
page_zip, dict_index_is_clust(index),
2227
(ulong) page_zip->m_end,
2228
(ulong) page_zip_get_size(page_zip),
2229
(ulong) dict_index_is_clust(index)));
2233
/* Restore the uncompressed columns in heap_no order. */
2234
storage = page_zip->data + page_zip_get_size(page_zip)
2235
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2237
for (slot = 0; slot < n_dense; slot++) {
2238
rec_t* rec = recs[slot];
2240
offsets = rec_get_offsets(rec, index, offsets,
2241
ULINT_UNDEFINED, &heap);
2242
/* Non-leaf nodes should not have any externally
2244
ut_ad(!rec_offs_any_extern(offsets));
2245
storage -= REC_NODE_PTR_SIZE;
2247
memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
2248
storage, REC_NODE_PTR_SIZE);
2254
/**********************************************************************//**
2255
Decompress the records of a leaf node of a secondary index.
2256
@return TRUE on success, FALSE on failure */
2259
page_zip_decompress_sec(
2260
/*====================*/
2261
page_zip_des_t* page_zip, /*!< in/out: compressed page */
2262
z_stream* d_stream, /*!< in/out: compressed page stream */
2263
rec_t** recs, /*!< in: dense page directory
2264
sorted by address */
2265
ulint n_dense, /*!< in: size of recs[] */
2266
dict_index_t* index, /*!< in: the index of the page */
2267
ulint* offsets) /*!< in/out: temporary offsets */
2269
ulint heap_status = REC_STATUS_ORDINARY
2270
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2273
ut_a(!dict_index_is_clust(index));
2275
/* Subtract the space reserved for uncompressed data. */
2276
d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2278
for (slot = 0; slot < n_dense; slot++) {
2279
rec_t* rec = recs[slot];
2281
/* Decompress everything up to this record. */
2282
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2283
- d_stream->next_out;
2285
if (UNIV_LIKELY(d_stream->avail_out)) {
2286
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2288
/* Apparently, n_dense has grown
2289
since the time the page was last compressed. */
2293
if (!d_stream->avail_out) {
2298
page_zip_fail(("page_zip_decompress_sec:"
2299
" inflate(Z_SYNC_FLUSH)=%s\n",
2305
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2307
/* Skip the REC_N_NEW_EXTRA_BYTES. */
2309
d_stream->next_out = rec;
2311
/* Set heap_no and the status bits. */
2312
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2313
heap_status += 1 << REC_HEAP_NO_SHIFT;
2316
/* Decompress the data of the last record and any trailing garbage,
2317
in case the last record was allocated from an originally longer space
2318
on the free list. */
2319
d_stream->avail_out = page_header_get_field(page_zip->data,
2321
- page_offset(d_stream->next_out);
2322
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2323
- PAGE_ZIP_START - PAGE_DIR)) {
2325
page_zip_fail(("page_zip_decompress_sec:"
2326
" avail_out = %u\n",
2327
d_stream->avail_out));
2331
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2332
page_zip_fail(("page_zip_decompress_sec:"
2333
" inflate(Z_FINISH)=%s\n",
2336
inflateEnd(d_stream);
2340
/* Note that d_stream->avail_out > 0 may hold here
2341
if the modification log is nonempty. */
2344
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2349
page_t* page = page_align(d_stream->next_out);
2351
/* Clear the unused heap space on the uncompressed page. */
2352
memset(d_stream->next_out, 0,
2353
page_dir_get_nth_slot(page,
2354
page_dir_get_n_slots(page) - 1)
2355
- d_stream->next_out);
2359
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2360
#endif /* UNIV_DEBUG */
2362
/* Apply the modification log. */
2364
const byte* mod_log_ptr;
2365
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2366
d_stream->avail_in + 1,
2368
ULINT_UNDEFINED, heap_status,
2371
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2374
page_zip->m_end = mod_log_ptr - page_zip->data;
2375
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2378
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
2379
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2381
page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
2382
(ulong) page_zip_get_trailer_len(
2383
page_zip, FALSE, NULL),
2384
(ulong) page_zip->m_end,
2385
(ulong) page_zip_get_size(page_zip)));
2389
/* There are no uncompressed columns on leaf pages of
2390
secondary indexes. */
2395
/**********************************************************************//**
2396
Decompress a record of a leaf node of a clustered index that contains
2397
externally stored columns.
2398
@return TRUE on success */
2401
page_zip_decompress_clust_ext(
2402
/*==========================*/
2403
z_stream* d_stream, /*!< in/out: compressed page stream */
2404
rec_t* rec, /*!< in/out: record */
2405
const ulint* offsets, /*!< in: rec_get_offsets(rec) */
2406
ulint trx_id_col) /*!< in: position of of DB_TRX_ID */
2410
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2414
if (UNIV_UNLIKELY(i == trx_id_col)) {
2415
/* Skip trx_id and roll_ptr */
2416
dst = rec_get_nth_field(rec, offsets, i, &len);
2417
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2418
+ DATA_ROLL_PTR_LEN)) {
2420
page_zip_fail(("page_zip_decompress_clust_ext:"
2421
" len[%lu] = %lu\n",
2422
(ulong) i, (ulong) len));
2426
if (rec_offs_nth_extern(offsets, i)) {
2428
page_zip_fail(("page_zip_decompress_clust_ext:"
2429
" DB_TRX_ID at %lu is ext\n",
2434
d_stream->avail_out = dst - d_stream->next_out;
2436
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2440
if (!d_stream->avail_out) {
2445
page_zip_fail(("page_zip_decompress_clust_ext:"
2446
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2451
ut_ad(d_stream->next_out == dst);
2453
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2454
avoid uninitialized bytes in case the record
2455
is affected by page_zip_apply_log(). */
2456
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2458
d_stream->next_out += DATA_TRX_ID_LEN
2459
+ DATA_ROLL_PTR_LEN;
2460
} else if (rec_offs_nth_extern(offsets, i)) {
2461
dst = rec_get_nth_field(rec, offsets, i, &len);
2462
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
2463
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2465
d_stream->avail_out = dst - d_stream->next_out;
2466
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2470
if (!d_stream->avail_out) {
2475
page_zip_fail(("page_zip_decompress_clust_ext:"
2476
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2481
ut_ad(d_stream->next_out == dst);
2483
/* Clear the BLOB pointer in case
2484
the record will be deleted and the
2485
space will not be reused. Note that
2486
the final initialization of the BLOB
2487
pointers (copying from "externs"
2488
or clearing) will have to take place
2489
only after the page modification log
2490
has been applied. Otherwise, we
2491
could end up with an uninitialized
2492
BLOB pointer when a record is deleted,
2493
reallocated and deleted. */
2494
memset(d_stream->next_out, 0,
2495
BTR_EXTERN_FIELD_REF_SIZE);
2497
+= BTR_EXTERN_FIELD_REF_SIZE;
2504
/**********************************************************************//**
2505
Compress the records of a leaf node of a clustered index.
2506
@return TRUE on success, FALSE on failure */
2509
page_zip_decompress_clust(
2510
/*======================*/
2511
page_zip_des_t* page_zip, /*!< in/out: compressed page */
2512
z_stream* d_stream, /*!< in/out: compressed page stream */
2513
rec_t** recs, /*!< in: dense page directory
2514
sorted by address */
2515
ulint n_dense, /*!< in: size of recs[] */
2516
dict_index_t* index, /*!< in: the index of the page */
2517
ulint trx_id_col, /*!< index of the trx_id column */
2518
ulint* offsets, /*!< in/out: temporary offsets */
2519
mem_heap_t* heap) /*!< in: temporary memory heap */
2523
ulint heap_status = REC_STATUS_ORDINARY
2524
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2525
const byte* storage;
2526
const byte* externs;
2528
ut_a(dict_index_is_clust(index));
2530
/* Subtract the space reserved for uncompressed data. */
2531
d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
2533
+ DATA_ROLL_PTR_LEN);
2535
/* Decompress the records in heap_no order. */
2536
for (slot = 0; slot < n_dense; slot++) {
2537
rec_t* rec = recs[slot];
2539
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2540
- d_stream->next_out;
2542
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2543
- PAGE_ZIP_START - PAGE_DIR);
2544
err = inflate(d_stream, Z_SYNC_FLUSH);
2547
/* Apparently, n_dense has grown
2548
since the time the page was last compressed. */
2552
if (UNIV_LIKELY(!d_stream->avail_out)) {
2557
page_zip_fail(("page_zip_decompress_clust:"
2558
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2563
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2564
/* Prepare to decompress the data bytes. */
2565
d_stream->next_out = rec;
2566
/* Set heap_no and the status bits. */
2567
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2568
heap_status += 1 << REC_HEAP_NO_SHIFT;
2570
/* Read the offsets. The status bits are needed here. */
2571
offsets = rec_get_offsets(rec, index, offsets,
2572
ULINT_UNDEFINED, &heap);
2574
/* This is a leaf page in a clustered index. */
2576
/* Check if there are any externally stored columns.
2577
For each externally stored column, restore the
2578
BTR_EXTERN_FIELD_REF separately. */
2580
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
2582
(!page_zip_decompress_clust_ext(
2583
d_stream, rec, offsets, trx_id_col))) {
2588
/* Skip trx_id and roll_ptr */
2590
byte* dst = rec_get_nth_field(rec, offsets,
2592
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2593
+ DATA_ROLL_PTR_LEN)) {
2595
page_zip_fail(("page_zip_decompress_clust:"
2596
" len = %lu\n", (ulong) len));
2600
d_stream->avail_out = dst - d_stream->next_out;
2602
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2606
if (!d_stream->avail_out) {
2611
page_zip_fail(("page_zip_decompress_clust:"
2612
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2617
ut_ad(d_stream->next_out == dst);
2619
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2620
avoid uninitialized bytes in case the record
2621
is affected by page_zip_apply_log(). */
2622
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2624
d_stream->next_out += DATA_TRX_ID_LEN
2625
+ DATA_ROLL_PTR_LEN;
2628
/* Decompress the last bytes of the record. */
2629
d_stream->avail_out = rec_get_end(rec, offsets)
2630
- d_stream->next_out;
2632
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2636
if (!d_stream->avail_out) {
2641
page_zip_fail(("page_zip_decompress_clust:"
2642
" 3 inflate(Z_SYNC_FLUSH)=%s\n",
2648
/* Decompress any trailing garbage, in case the last record was
2649
allocated from an originally longer space on the free list. */
2650
d_stream->avail_out = page_header_get_field(page_zip->data,
2652
- page_offset(d_stream->next_out);
2653
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2654
- PAGE_ZIP_START - PAGE_DIR)) {
2656
page_zip_fail(("page_zip_decompress_clust:"
2657
" avail_out = %u\n",
2658
d_stream->avail_out));
2662
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2663
page_zip_fail(("page_zip_decompress_clust:"
2664
" inflate(Z_FINISH)=%s\n",
2667
inflateEnd(d_stream);
2671
/* Note that d_stream->avail_out > 0 may hold here
2672
if the modification log is nonempty. */
2675
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2680
page_t* page = page_align(d_stream->next_out);
2682
/* Clear the unused heap space on the uncompressed page. */
2683
memset(d_stream->next_out, 0,
2684
page_dir_get_nth_slot(page,
2685
page_dir_get_n_slots(page) - 1)
2686
- d_stream->next_out);
2690
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2691
#endif /* UNIV_DEBUG */
2693
/* Apply the modification log. */
2695
const byte* mod_log_ptr;
2696
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2697
d_stream->avail_in + 1,
2699
trx_id_col, heap_status,
2702
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2705
page_zip->m_end = mod_log_ptr - page_zip->data;
2706
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2709
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
2710
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2712
page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
2713
(ulong) page_zip_get_trailer_len(
2714
page_zip, TRUE, NULL),
2715
(ulong) page_zip->m_end,
2716
(ulong) page_zip_get_size(page_zip)));
2720
storage = page_zip->data + page_zip_get_size(page_zip)
2721
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2723
externs = storage - n_dense
2724
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2726
/* Restore the uncompressed columns in heap_no order. */
2728
for (slot = 0; slot < n_dense; slot++) {
2732
rec_t* rec = recs[slot];
2733
ibool exists = !page_zip_dir_find_free(
2734
page_zip, page_offset(rec));
2735
offsets = rec_get_offsets(rec, index, offsets,
2736
ULINT_UNDEFINED, &heap);
2738
dst = rec_get_nth_field(rec, offsets,
2740
ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2741
storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
2742
memcpy(dst, storage,
2743
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2745
/* Check if there are any externally stored
2746
columns in this record. For each externally
2747
stored column, restore or clear the
2748
BTR_EXTERN_FIELD_REF. */
2749
if (!rec_offs_any_extern(offsets)) {
2753
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2754
if (!rec_offs_nth_extern(offsets, i)) {
2757
dst = rec_get_nth_field(rec, offsets, i, &len);
2759
if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
2760
page_zip_fail(("page_zip_decompress_clust:"
2766
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2768
if (UNIV_LIKELY(exists)) {
2770
restore the BLOB pointer */
2771
externs -= BTR_EXTERN_FIELD_REF_SIZE;
2774
(externs < page_zip->data
2775
+ page_zip->m_end)) {
2776
page_zip_fail(("page_zip_"
2777
"decompress_clust: "
2779
(const void*) externs,
2787
memcpy(dst, externs,
2788
BTR_EXTERN_FIELD_REF_SIZE);
2790
page_zip->n_blobs++;
2793
clear the BLOB pointer */
2795
BTR_EXTERN_FIELD_REF_SIZE);
2803
/**********************************************************************//**
2804
Decompress a page. This function should tolerate errors on the compressed
2805
page. Instead of letting assertions fail, it will return FALSE if an
2806
inconsistency is detected.
2807
@return TRUE on success, FALSE on failure */
2810
page_zip_decompress(
2811
/*================*/
2812
page_zip_des_t* page_zip,/*!< in: data, ssize;
2813
out: m_start, m_end, m_nonempty, n_blobs */
2814
page_t* page) /*!< out: uncompressed page, may be trashed */
2817
dict_index_t* index = NULL;
2818
rec_t** recs; /*!< dense page directory, sorted by address */
2819
ulint n_dense;/* number of user records on the page */
2820
ulint trx_id_col = ULINT_UNDEFINED;
2823
ullint usec = ut_time_us(NULL);
2825
ut_ad(page_zip_simple_validate(page_zip));
2826
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
2827
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
2829
/* The dense directory excludes the infimum and supremum records. */
2830
n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
2831
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
2832
>= page_zip_get_size(page_zip))) {
2833
page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
2835
(ulong) page_zip_get_size(page_zip)));
2839
heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
2840
recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs));
2842
#ifdef UNIV_ZIP_DEBUG
2843
/* Clear the page. */
2844
memset(page, 0x55, UNIV_PAGE_SIZE);
2845
#endif /* UNIV_ZIP_DEBUG */
2846
UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE);
2847
/* Copy the page header. */
2848
memcpy(page, page_zip->data, PAGE_DATA);
2850
/* Copy the page directory. */
2851
if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
2852
recs + n_dense, n_dense))) {
2854
mem_heap_free(heap);
2858
/* Copy the infimum and supremum records. */
2859
memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
2860
infimum_extra, sizeof infimum_extra);
2861
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
2862
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2865
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2866
page_zip_dir_get(page_zip, 0)
2867
& PAGE_ZIP_DIR_SLOT_MASK);
2869
memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
2870
memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
2871
supremum_extra_data, sizeof supremum_extra_data);
2873
page_zip_set_alloc(&d_stream, heap);
2875
if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
2880
d_stream.next_in = page_zip->data + PAGE_DATA;
2881
/* Subtract the space reserved for
2882
the page header and the end marker of the modification log. */
2883
d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
2885
d_stream.next_out = page + PAGE_ZIP_START;
2886
d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
2888
/* Decode the zlib header and the index information. */
2889
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2891
page_zip_fail(("page_zip_decompress:"
2892
" 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2896
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2898
page_zip_fail(("page_zip_decompress:"
2899
" 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2903
index = page_zip_fields_decode(
2904
page + PAGE_ZIP_START, d_stream.next_out,
2905
page_is_leaf(page) ? &trx_id_col : NULL);
2907
if (UNIV_UNLIKELY(!index)) {
2912
/* Decompress the user records. */
2913
page_zip->n_blobs = 0;
2914
d_stream.next_out = page + PAGE_ZIP_START;
2917
/* Pre-allocate the offsets for rec_get_offsets_reverse(). */
2918
ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
2919
+ dict_index_get_n_fields(index);
2920
offsets = mem_heap_alloc(heap, n * sizeof(ulint));
2924
/* Decompress the records in heap_no order. */
2925
if (!page_is_leaf(page)) {
2926
/* This is a node pointer page. */
2930
(!page_zip_decompress_node_ptrs(page_zip, &d_stream,
2931
recs, n_dense, index,
2936
info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
2937
? REC_INFO_MIN_REC_FLAG : 0;
2939
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
2943
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2944
/* This is a leaf page in a secondary index. */
2945
if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
2951
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2954
page_zip_fields_free(index);
2955
mem_heap_free(heap);
2959
/* This is a leaf page in a clustered index. */
2960
if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
2968
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2974
ut_a(page_is_comp(page));
2975
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
2977
page_zip_fields_free(index);
2978
mem_heap_free(heap);
2980
page_zip_stat_t* zip_stat
2981
= &page_zip_stat[page_zip->ssize - 1];
2982
zip_stat->decompressed++;
2983
zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
2986
/* Update the stat counter for LRU policy. */
2987
buf_LRU_stat_inc_unzip();
2992
#ifdef UNIV_ZIP_DEBUG
2993
/**********************************************************************//**
2994
Dump a block of memory on the standard error stream. */
2997
page_zip_hexdump_func(
2998
/*==================*/
2999
const char* name, /*!< in: name of the data structure */
3000
const void* buf, /*!< in: data */
3001
ulint size) /*!< in: length of the data, in bytes */
3003
const byte* s = buf;
3005
const ulint width = 32; /* bytes per line */
3007
fprintf(stderr, "%s:\n", name);
3009
for (addr = 0; addr < size; addr += width) {
3012
fprintf(stderr, "%04lx ", (ulong) addr);
3014
i = ut_min(width, size - addr);
3017
fprintf(stderr, "%02x", *s++);
3024
/** Dump a block of memory on the standard error stream.
3026
@param size in: length of the data, in bytes */
3027
#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
3029
/** Flag: make page_zip_validate() compare page headers only */
3030
UNIV_INTERN ibool page_zip_validate_header_only = FALSE;
3032
/**********************************************************************//**
3033
Check that the compressed and decompressed pages match.
3034
@return TRUE if valid, FALSE if not */
3037
page_zip_validate_low(
3038
/*==================*/
3039
const page_zip_des_t* page_zip,/*!< in: compressed page */
3040
const page_t* page, /*!< in: uncompressed page */
3041
ibool sloppy) /*!< in: FALSE=strict,
3042
TRUE=ignore the MIN_REC_FLAG */
3044
page_zip_des_t temp_page_zip;
3045
byte* temp_page_buf;
3049
if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3050
FIL_PAGE_LSN - FIL_PAGE_PREV)
3051
|| memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
3052
|| memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3053
PAGE_DATA - FIL_PAGE_DATA)) {
3054
page_zip_fail(("page_zip_validate: page header\n"));
3055
page_zip_hexdump(page_zip, sizeof *page_zip);
3056
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3057
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3061
ut_a(page_is_comp(page));
3063
if (page_zip_validate_header_only) {
3067
/* page_zip_decompress() expects the uncompressed page to be
3068
UNIV_PAGE_SIZE aligned. */
3069
temp_page_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
3070
temp_page = ut_align(temp_page_buf, UNIV_PAGE_SIZE);
3072
#ifdef UNIV_DEBUG_VALGRIND
3073
/* Get detailed information on the valid bits in case the
3074
UNIV_MEM_ASSERT_RW() checks fail. The v-bits of page[],
3075
page_zip->data[] or page_zip could be viewed at temp_page[] or
3076
temp_page_zip in a debugger when running valgrind --db-attach. */
3077
VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
3078
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3079
VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
3080
UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
3081
VALGRIND_GET_VBITS(page_zip->data, temp_page,
3082
page_zip_get_size(page_zip));
3083
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3084
#endif /* UNIV_DEBUG_VALGRIND */
3086
temp_page_zip = *page_zip;
3087
valid = page_zip_decompress(&temp_page_zip, temp_page);
3089
fputs("page_zip_validate(): failed to decompress\n", stderr);
3092
if (page_zip->n_blobs != temp_page_zip.n_blobs) {
3093
page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
3094
page_zip->n_blobs, temp_page_zip.n_blobs));
3098
if (page_zip->m_start != temp_page_zip.m_start) {
3099
page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
3100
page_zip->m_start, temp_page_zip.m_start));
3103
#endif /* UNIV_DEBUG */
3104
if (page_zip->m_end != temp_page_zip.m_end) {
3105
page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
3106
page_zip->m_end, temp_page_zip.m_end));
3109
if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
3110
page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
3111
page_zip->m_nonempty,
3112
temp_page_zip.m_nonempty));
3115
if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
3116
UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
3118
/* In crash recovery, the "minimum record" flag may be
3119
set incorrectly until the mini-transaction is
3120
committed. Let us tolerate that difference when we
3121
are performing a sloppy validation. */
3124
byte info_bits_diff;
3126
= rec_get_next_offs(page + PAGE_NEW_INFIMUM,
3128
ut_a(offset >= PAGE_NEW_SUPREMUM);
3129
offset -= 5 /* REC_NEW_INFO_BITS */;
3131
info_bits_diff = page[offset] ^ temp_page[offset];
3133
if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
3134
temp_page[offset] = page[offset];
3136
if (!memcmp(page + PAGE_HEADER,
3137
temp_page + PAGE_HEADER,
3138
UNIV_PAGE_SIZE - PAGE_HEADER
3139
- FIL_PAGE_DATA_END)) {
3141
/* Only the minimum record flag
3142
differed. Let us ignore it. */
3143
page_zip_fail(("page_zip_validate: "
3146
"%lu,%lu,0x%02lx)\n",
3147
page_get_space_id(page),
3148
page_get_page_no(page),
3149
(ulong) page[offset]));
3154
page_zip_fail(("page_zip_validate: content\n"));
3160
page_zip_hexdump(page_zip, sizeof *page_zip);
3161
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3162
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3163
page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
3165
ut_free(temp_page_buf);
3169
/**********************************************************************//**
3170
Check that the compressed and decompressed pages match.
3171
@return TRUE if valid, FALSE if not */
3176
const page_zip_des_t* page_zip,/*!< in: compressed page */
3177
const page_t* page) /*!< in: uncompressed page */
3179
return(page_zip_validate_low(page_zip, page,
3180
recv_recovery_is_on()));
3182
#endif /* UNIV_ZIP_DEBUG */
3185
/**********************************************************************//**
3186
Assert that the compressed and decompressed page headers match.
3190
page_zip_header_cmp(
3191
/*================*/
3192
const page_zip_des_t* page_zip,/*!< in: compressed page */
3193
const byte* page) /*!< in: uncompressed page */
3195
ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3196
FIL_PAGE_LSN - FIL_PAGE_PREV));
3197
ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
3199
ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3200
PAGE_DATA - FIL_PAGE_DATA));
3204
#endif /* UNIV_DEBUG */
3206
/**********************************************************************//**
3207
Write a record on the compressed page that contains externally stored
3208
columns. The data must already have been written to the uncompressed page.
3209
@return end of modification log */
3212
page_zip_write_rec_ext(
3213
/*===================*/
3214
page_zip_des_t* page_zip, /*!< in/out: compressed page */
3215
const page_t* page, /*!< in: page containing rec */
3216
const byte* rec, /*!< in: record being written */
3217
dict_index_t* index, /*!< in: record descriptor */
3218
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
3219
ulint create, /*!< in: nonzero=insert, zero=update */
3220
ulint trx_id_col, /*!< in: position of DB_TRX_ID */
3221
ulint heap_no, /*!< in: heap number of rec */
3222
byte* storage, /*!< in: end of dense page directory */
3223
byte* data) /*!< in: end of modification log */
3225
const byte* start = rec;
3228
byte* externs = storage;
3229
ulint n_ext = rec_offs_n_extern(offsets);
3231
ut_ad(rec_offs_validate(rec, index, offsets));
3232
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3233
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3234
rec_offs_extra_size(offsets));
3236
externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3237
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
3239
/* Note that this will not take into account
3240
the BLOB columns of rec if create==TRUE. */
3241
ut_ad(data + rec_offs_data_size(offsets)
3242
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3243
- n_ext * BTR_EXTERN_FIELD_REF_SIZE
3244
< externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
3247
ulint blob_no = page_zip_get_n_prev_extern(
3248
page_zip, rec, index);
3249
byte* ext_end = externs - page_zip->n_blobs
3250
* BTR_EXTERN_FIELD_REF_SIZE;
3251
ut_ad(blob_no <= page_zip->n_blobs);
3252
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
3255
page_zip->n_blobs += n_ext;
3256
ASSERT_ZERO_BLOB(ext_end - n_ext
3257
* BTR_EXTERN_FIELD_REF_SIZE);
3258
memmove(ext_end - n_ext
3259
* BTR_EXTERN_FIELD_REF_SIZE,
3264
ut_a(blob_no + n_ext <= page_zip->n_blobs);
3267
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3270
if (UNIV_UNLIKELY(i == trx_id_col)) {
3271
ut_ad(!rec_offs_nth_extern(offsets,
3273
ut_ad(!rec_offs_nth_extern(offsets,
3275
/* Locate trx_id and roll_ptr. */
3276
src = rec_get_nth_field(rec, offsets,
3278
ut_ad(len == DATA_TRX_ID_LEN);
3279
ut_ad(src + DATA_TRX_ID_LEN
3280
== rec_get_nth_field(
3283
ut_ad(len == DATA_ROLL_PTR_LEN);
3285
/* Log the preceding fields. */
3286
ASSERT_ZERO(data, src - start);
3287
memcpy(data, start, src - start);
3288
data += src - start;
3289
start = src + (DATA_TRX_ID_LEN
3290
+ DATA_ROLL_PTR_LEN);
3292
/* Store trx_id and roll_ptr. */
3293
memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3295
src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3296
i++; /* skip also roll_ptr */
3297
} else if (rec_offs_nth_extern(offsets, i)) {
3298
src = rec_get_nth_field(rec, offsets,
3301
ut_ad(dict_index_is_clust(index));
3303
>= BTR_EXTERN_FIELD_REF_SIZE);
3304
src += len - BTR_EXTERN_FIELD_REF_SIZE;
3306
ASSERT_ZERO(data, src - start);
3307
memcpy(data, start, src - start);
3308
data += src - start;
3309
start = src + BTR_EXTERN_FIELD_REF_SIZE;
3311
/* Store the BLOB pointer. */
3312
externs -= BTR_EXTERN_FIELD_REF_SIZE;
3313
ut_ad(data < externs);
3314
memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
3318
/* Log the last bytes of the record. */
3319
len = rec_offs_data_size(offsets) - (start - rec);
3321
ASSERT_ZERO(data, len);
3322
memcpy(data, start, len);
3328
/**********************************************************************//**
3329
Write an entire record on the compressed page. The data must already
3330
have been written to the uncompressed page. */
3335
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3336
const byte* rec, /*!< in: record being written */
3337
dict_index_t* index, /*!< in: the index the record belongs to */
3338
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3339
ulint create) /*!< in: nonzero=insert, zero=update */
3347
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3348
ut_ad(page_zip_simple_validate(page_zip));
3349
ut_ad(page_zip_get_size(page_zip)
3350
> PAGE_DATA + page_zip_dir_size(page_zip));
3351
ut_ad(rec_offs_comp(offsets));
3352
ut_ad(rec_offs_validate(rec, index, offsets));
3354
ut_ad(page_zip->m_start >= PAGE_DATA);
3356
page = page_align(rec);
3358
ut_ad(page_zip_header_cmp(page_zip, page));
3359
ut_ad(page_simple_validate_new((page_t*) page));
3361
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3362
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3363
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3364
rec_offs_extra_size(offsets));
3366
slot = page_zip_dir_find(page_zip, page_offset(rec));
3368
/* Copy the delete mark. */
3369
if (rec_get_deleted_flag(rec, TRUE)) {
3370
*slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
3372
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3375
ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
3376
ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
3377
- PAGE_DIR - PAGE_DIR_SLOT_SIZE
3378
* page_dir_get_n_slots(page));
3380
heap_no = rec_get_heap_no_new(rec);
3381
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
3382
ut_ad(heap_no < page_dir_get_n_heap(page));
3384
/* Append to the modification log. */
3385
data = page_zip->data + page_zip->m_end;
3388
/* Identify the record by writing its heap number - 1.
3389
0 is reserved to indicate the end of the modification log. */
3391
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3392
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3395
*data++ = (byte) ((heap_no - 1) << 1);
3399
const byte* start = rec - rec_offs_extra_size(offsets);
3400
const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
3402
/* Write the extra bytes backwards, so that
3403
rec_offs_extra_size() can be easily computed in
3404
page_zip_apply_log() by invoking
3405
rec_get_offsets_reverse(). */
3407
while (b != start) {
3413
/* Write the data bytes. Store the uncompressed bytes separately. */
3414
storage = page_zip->data + page_zip_get_size(page_zip)
3415
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3416
* PAGE_ZIP_DIR_SLOT_SIZE;
3418
if (page_is_leaf(page)) {
3421
if (dict_index_is_clust(index)) {
3424
trx_id_col = dict_index_get_sys_col_pos(index,
3426
ut_ad(trx_id_col != ULINT_UNDEFINED);
3428
/* Store separately trx_id, roll_ptr and
3429
the BTR_EXTERN_FIELD_REF of each BLOB column. */
3430
if (rec_offs_any_extern(offsets)) {
3431
data = page_zip_write_rec_ext(
3433
rec, index, offsets, create,
3434
trx_id_col, heap_no, storage, data);
3436
/* Locate trx_id and roll_ptr. */
3438
= rec_get_nth_field(rec, offsets,
3440
ut_ad(len == DATA_TRX_ID_LEN);
3441
ut_ad(src + DATA_TRX_ID_LEN
3442
== rec_get_nth_field(
3444
trx_id_col + 1, &len));
3445
ut_ad(len == DATA_ROLL_PTR_LEN);
3447
/* Log the preceding fields. */
3448
ASSERT_ZERO(data, src - rec);
3449
memcpy(data, rec, src - rec);
3452
/* Store trx_id and roll_ptr. */
3454
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3457
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3459
src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
3461
/* Log the last bytes of the record. */
3462
len = rec_offs_data_size(offsets)
3465
ASSERT_ZERO(data, len);
3466
memcpy(data, src, len);
3470
/* Leaf page of a secondary index:
3471
no externally stored columns */
3472
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
3473
== ULINT_UNDEFINED);
3474
ut_ad(!rec_offs_any_extern(offsets));
3476
/* Log the entire record. */
3477
len = rec_offs_data_size(offsets);
3479
ASSERT_ZERO(data, len);
3480
memcpy(data, rec, len);
3484
/* This is a node pointer page. */
3487
/* Non-leaf nodes should not have any externally
3489
ut_ad(!rec_offs_any_extern(offsets));
3491
/* Copy the data bytes, except node_ptr. */
3492
len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
3493
ut_ad(data + len < storage - REC_NODE_PTR_SIZE
3494
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
3495
ASSERT_ZERO(data, len);
3496
memcpy(data, rec, len);
3499
/* Copy the node pointer to the uncompressed area. */
3500
memcpy(storage - REC_NODE_PTR_SIZE
3507
ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
3508
page_zip->m_end = data - page_zip->data;
3509
page_zip->m_nonempty = TRUE;
3511
#ifdef UNIV_ZIP_DEBUG
3512
ut_a(page_zip_validate(page_zip, page_align(rec)));
3513
#endif /* UNIV_ZIP_DEBUG */
3516
/***********************************************************//**
3517
Parses a log record of writing a BLOB pointer of a record.
3518
@return end of log record or NULL */
3521
page_zip_parse_write_blob_ptr(
3522
/*==========================*/
3523
byte* ptr, /*!< in: redo log buffer */
3524
byte* end_ptr,/*!< in: redo log buffer end */
3525
page_t* page, /*!< in/out: uncompressed page */
3526
page_zip_des_t* page_zip)/*!< in/out: compressed page */
3531
ut_ad(!page == !page_zip);
3534
(end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
3539
offset = mach_read_from_2(ptr);
3540
z_offset = mach_read_from_2(ptr + 2);
3542
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3543
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3544
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3546
recv_sys->found_corrupt_log = TRUE;
3552
if (UNIV_UNLIKELY(!page_zip)
3553
|| UNIV_UNLIKELY(!page_is_leaf(page))) {
3558
#ifdef UNIV_ZIP_DEBUG
3559
ut_a(page_zip_validate(page_zip, page));
3560
#endif /* UNIV_ZIP_DEBUG */
3562
memcpy(page + offset,
3563
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3564
memcpy(page_zip->data + z_offset,
3565
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3567
#ifdef UNIV_ZIP_DEBUG
3568
ut_a(page_zip_validate(page_zip, page));
3569
#endif /* UNIV_ZIP_DEBUG */
3572
return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
3575
/**********************************************************************//**
3576
Write a BLOB pointer of a record on the leaf page of a clustered index.
3577
The information must already have been updated on the uncompressed page. */
3580
page_zip_write_blob_ptr(
3581
/*====================*/
3582
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3583
const byte* rec, /*!< in/out: record whose data is being
3585
dict_index_t* index, /*!< in: index of the page */
3586
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3587
ulint n, /*!< in: column index */
3588
mtr_t* mtr) /*!< in: mini-transaction handle,
3589
or NULL if no logging is needed */
3593
const page_t* page = page_align(rec);
3597
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3598
ut_ad(page_simple_validate_new((page_t*) page));
3599
ut_ad(page_zip_simple_validate(page_zip));
3600
ut_ad(page_zip_get_size(page_zip)
3601
> PAGE_DATA + page_zip_dir_size(page_zip));
3602
ut_ad(rec_offs_comp(offsets));
3603
ut_ad(rec_offs_validate(rec, NULL, offsets));
3604
ut_ad(rec_offs_any_extern(offsets));
3605
ut_ad(rec_offs_nth_extern(offsets, n));
3607
ut_ad(page_zip->m_start >= PAGE_DATA);
3608
ut_ad(page_zip_header_cmp(page_zip, page));
3610
ut_ad(page_is_leaf(page));
3611
ut_ad(dict_index_is_clust(index));
3613
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3614
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3615
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3616
rec_offs_extra_size(offsets));
3618
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
3619
+ rec_get_n_extern_new(rec, index, n);
3620
ut_a(blob_no < page_zip->n_blobs);
3622
externs = page_zip->data + page_zip_get_size(page_zip)
3623
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3624
* (PAGE_ZIP_DIR_SLOT_SIZE
3625
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3627
field = rec_get_nth_field(rec, offsets, n, &len);
3629
externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
3630
field += len - BTR_EXTERN_FIELD_REF_SIZE;
3632
memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
3634
#ifdef UNIV_ZIP_DEBUG
3635
ut_a(page_zip_validate(page_zip, page));
3636
#endif /* UNIV_ZIP_DEBUG */
3639
#ifndef UNIV_HOTBACKUP
3640
byte* log_ptr = mlog_open(
3641
mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
3642
if (UNIV_UNLIKELY(!log_ptr)) {
3646
log_ptr = mlog_write_initial_log_record_fast(
3647
(byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
3648
mach_write_to_2(log_ptr, page_offset(field));
3650
mach_write_to_2(log_ptr, externs - page_zip->data);
3652
memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
3653
log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
3654
mlog_close(mtr, log_ptr);
3655
#endif /* !UNIV_HOTBACKUP */
3659
/***********************************************************//**
3660
Parses a log record of writing the node pointer of a record.
3661
@return end of log record or NULL */
3664
page_zip_parse_write_node_ptr(
3665
/*==========================*/
3666
byte* ptr, /*!< in: redo log buffer */
3667
byte* end_ptr,/*!< in: redo log buffer end */
3668
page_t* page, /*!< in/out: uncompressed page */
3669
page_zip_des_t* page_zip)/*!< in/out: compressed page */
3674
ut_ad(!page == !page_zip);
3676
if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
3681
offset = mach_read_from_2(ptr);
3682
z_offset = mach_read_from_2(ptr + 2);
3684
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3685
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3686
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3688
recv_sys->found_corrupt_log = TRUE;
3699
if (UNIV_UNLIKELY(!page_zip)
3700
|| UNIV_UNLIKELY(page_is_leaf(page))) {
3705
#ifdef UNIV_ZIP_DEBUG
3706
ut_a(page_zip_validate(page_zip, page));
3707
#endif /* UNIV_ZIP_DEBUG */
3709
field = page + offset;
3710
storage = page_zip->data + z_offset;
3712
storage_end = page_zip->data + page_zip_get_size(page_zip)
3713
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3714
* PAGE_ZIP_DIR_SLOT_SIZE;
3716
heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
3718
if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
3719
|| UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
3720
|| UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
3725
memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
3726
memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
3728
#ifdef UNIV_ZIP_DEBUG
3729
ut_a(page_zip_validate(page_zip, page));
3730
#endif /* UNIV_ZIP_DEBUG */
3733
return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
3736
/**********************************************************************//**
3737
Write the node pointer of a record on a non-leaf compressed page. */
3740
page_zip_write_node_ptr(
3741
/*====================*/
3742
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3743
byte* rec, /*!< in/out: record */
3744
ulint size, /*!< in: data size of rec */
3745
ulint ptr, /*!< in: node pointer */
3746
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
3750
page_t* page = page_align(rec);
3752
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3753
ut_ad(page_simple_validate_new(page));
3754
ut_ad(page_zip_simple_validate(page_zip));
3755
ut_ad(page_zip_get_size(page_zip)
3756
> PAGE_DATA + page_zip_dir_size(page_zip));
3757
ut_ad(page_rec_is_comp(rec));
3759
ut_ad(page_zip->m_start >= PAGE_DATA);
3760
ut_ad(page_zip_header_cmp(page_zip, page));
3762
ut_ad(!page_is_leaf(page));
3764
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3765
UNIV_MEM_ASSERT_RW(rec, size);
3767
storage = page_zip->data + page_zip_get_size(page_zip)
3768
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3769
* PAGE_ZIP_DIR_SLOT_SIZE
3770
- (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
3771
field = rec + size - REC_NODE_PTR_SIZE;
3773
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3774
ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
3775
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3776
#if REC_NODE_PTR_SIZE != 4
3777
# error "REC_NODE_PTR_SIZE != 4"
3779
mach_write_to_4(field, ptr);
3780
memcpy(storage, field, REC_NODE_PTR_SIZE);
3783
#ifndef UNIV_HOTBACKUP
3784
byte* log_ptr = mlog_open(mtr,
3785
11 + 2 + 2 + REC_NODE_PTR_SIZE);
3786
if (UNIV_UNLIKELY(!log_ptr)) {
3790
log_ptr = mlog_write_initial_log_record_fast(
3791
field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
3792
mach_write_to_2(log_ptr, page_offset(field));
3794
mach_write_to_2(log_ptr, storage - page_zip->data);
3796
memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
3797
log_ptr += REC_NODE_PTR_SIZE;
3798
mlog_close(mtr, log_ptr);
3799
#endif /* !UNIV_HOTBACKUP */
3803
/**********************************************************************//**
3804
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
3807
page_zip_write_trx_id_and_roll_ptr(
3808
/*===============================*/
3809
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3810
byte* rec, /*!< in/out: record */
3811
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3812
ulint trx_id_col,/*!< in: column number of TRX_ID in rec */
3813
trx_id_t trx_id, /*!< in: transaction identifier */
3814
roll_ptr_t roll_ptr)/*!< in: roll_ptr */
3818
page_t* page = page_align(rec);
3821
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3822
ut_ad(page_simple_validate_new(page));
3823
ut_ad(page_zip_simple_validate(page_zip));
3824
ut_ad(page_zip_get_size(page_zip)
3825
> PAGE_DATA + page_zip_dir_size(page_zip));
3826
ut_ad(rec_offs_validate(rec, NULL, offsets));
3827
ut_ad(rec_offs_comp(offsets));
3829
ut_ad(page_zip->m_start >= PAGE_DATA);
3830
ut_ad(page_zip_header_cmp(page_zip, page));
3832
ut_ad(page_is_leaf(page));
3834
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3836
storage = page_zip->data + page_zip_get_size(page_zip)
3837
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3838
* PAGE_ZIP_DIR_SLOT_SIZE
3839
- (rec_get_heap_no_new(rec) - 1)
3840
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3842
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
3843
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
3845
field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
3846
ut_ad(len == DATA_TRX_ID_LEN);
3847
ut_ad(field + DATA_TRX_ID_LEN
3848
== rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
3849
ut_ad(len == DATA_ROLL_PTR_LEN);
3850
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3851
ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
3852
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3853
#if DATA_TRX_ID_LEN != 6
3854
# error "DATA_TRX_ID_LEN != 6"
3856
mach_write_to_6(field, trx_id);
3857
#if DATA_ROLL_PTR_LEN != 7
3858
# error "DATA_ROLL_PTR_LEN != 7"
3860
mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
3861
memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3863
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3864
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3865
rec_offs_extra_size(offsets));
3866
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3869
#ifdef UNIV_ZIP_DEBUG
3870
/** Set this variable in a debugger to disable page_zip_clear_rec().
3871
The only observable effect should be the compression ratio due to
3872
deleted records not being zeroed out. In rare cases, there can be
3873
page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
3874
columns if the space is reallocated for a smaller record. */
3875
UNIV_INTERN ibool page_zip_clear_rec_disable;
3876
#endif /* UNIV_ZIP_DEBUG */
3878
/**********************************************************************//**
3879
Clear an area on the uncompressed and compressed page, if possible. */
3884
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3885
byte* rec, /*!< in: record to clear */
3886
dict_index_t* index, /*!< in: index of rec */
3887
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
3890
page_t* page = page_align(rec);
3891
/* page_zip_validate() would fail here if a record
3892
containing externally stored columns is being deleted. */
3893
ut_ad(rec_offs_validate(rec, index, offsets));
3894
ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
3895
ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
3896
ut_ad(page_zip_header_cmp(page_zip, page));
3898
heap_no = rec_get_heap_no_new(rec);
3899
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
3901
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3902
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3903
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3904
rec_offs_extra_size(offsets));
3907
#ifdef UNIV_ZIP_DEBUG
3908
!page_zip_clear_rec_disable &&
3909
#endif /* UNIV_ZIP_DEBUG */
3911
+ 1 + ((heap_no - 1) >= 64)/* size of the log entry */
3912
+ page_zip_get_trailer_len(page_zip,
3913
dict_index_is_clust(index), NULL)
3914
< page_zip_get_size(page_zip)) {
3917
/* Clear only the data bytes, because the allocator and
3918
the decompressor depend on the extra bytes. */
3919
memset(rec, 0, rec_offs_data_size(offsets));
3921
if (!page_is_leaf(page)) {
3922
/* Clear node_ptr on the compressed page. */
3923
byte* storage = page_zip->data
3924
+ page_zip_get_size(page_zip)
3925
- (page_dir_get_n_heap(page)
3926
- PAGE_HEAP_NO_USER_LOW)
3927
* PAGE_ZIP_DIR_SLOT_SIZE;
3929
memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
3930
0, REC_NODE_PTR_SIZE);
3931
} else if (dict_index_is_clust(index)) {
3932
/* Clear trx_id and roll_ptr on the compressed page. */
3933
byte* storage = page_zip->data
3934
+ page_zip_get_size(page_zip)
3935
- (page_dir_get_n_heap(page)
3936
- PAGE_HEAP_NO_USER_LOW)
3937
* PAGE_ZIP_DIR_SLOT_SIZE;
3939
memset(storage - (heap_no - 1)
3940
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
3941
0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3944
/* Log that the data was zeroed out. */
3945
data = page_zip->data + page_zip->m_end;
3947
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3948
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3951
*data++ = (byte) ((heap_no - 1) << 1 | 1);
3953
ut_ad((ulint) (data - page_zip->data)
3954
< page_zip_get_size(page_zip));
3955
page_zip->m_end = data - page_zip->data;
3956
page_zip->m_nonempty = TRUE;
3957
} else if (page_is_leaf(page) && dict_index_is_clust(index)) {
3958
/* Do not clear the record, because there is not enough space
3959
to log the operation. */
3961
if (rec_offs_any_extern(offsets)) {
3964
for (i = rec_offs_n_fields(offsets); i--; ) {
3965
/* Clear all BLOB pointers in order to make
3966
page_zip_validate() pass. */
3967
if (rec_offs_nth_extern(offsets, i)) {
3969
byte* field = rec_get_nth_field(
3970
rec, offsets, i, &len);
3972
- BTR_EXTERN_FIELD_REF_SIZE,
3973
0, BTR_EXTERN_FIELD_REF_SIZE);
3979
#ifdef UNIV_ZIP_DEBUG
3980
ut_a(page_zip_validate(page_zip, page));
3981
#endif /* UNIV_ZIP_DEBUG */
3984
/**********************************************************************//**
3985
Write the "deleted" flag of a record on a compressed page. The flag must
3986
already have been written on the uncompressed page. */
3989
page_zip_rec_set_deleted(
3990
/*=====================*/
3991
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3992
const byte* rec, /*!< in: record on the uncompressed page */
3993
ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
3995
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
3997
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3999
*slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
4001
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
4003
#ifdef UNIV_ZIP_DEBUG
4004
ut_a(page_zip_validate(page_zip, page_align(rec)));
4005
#endif /* UNIV_ZIP_DEBUG */
4008
/**********************************************************************//**
4009
Write the "owned" flag of a record on a compressed page. The n_owned field
4010
must already have been written on the uncompressed page. */
4013
page_zip_rec_set_owned(
4014
/*===================*/
4015
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4016
const byte* rec, /*!< in: record on the uncompressed page */
4017
ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
4019
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
4021
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4023
*slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4025
*slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4029
/**********************************************************************//**
4030
Insert a record to the dense page directory. */
4033
page_zip_dir_insert(
4034
/*================*/
4035
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4036
const byte* prev_rec,/*!< in: record after which to insert */
4037
const byte* free_rec,/*!< in: record from which rec was
4038
allocated, or NULL */
4039
byte* rec) /*!< in: record to insert */
4045
ut_ad(prev_rec != rec);
4046
ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
4047
ut_ad(page_zip_simple_validate(page_zip));
4049
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4051
if (page_rec_is_infimum(prev_rec)) {
4052
/* Use the first slot. */
4053
slot_rec = page_zip->data + page_zip_get_size(page_zip);
4055
byte* end = page_zip->data + page_zip_get_size(page_zip);
4056
byte* start = end - page_zip_dir_user_size(page_zip);
4058
if (UNIV_LIKELY(!free_rec)) {
4059
/* PAGE_N_RECS was already incremented
4060
in page_cur_insert_rec_zip(), but the
4061
dense directory slot at that position
4062
contains garbage. Skip it. */
4063
start += PAGE_ZIP_DIR_SLOT_SIZE;
4066
slot_rec = page_zip_dir_find_low(start, end,
4067
page_offset(prev_rec));
4071
/* Read the old n_dense (n_heap may have been incremented). */
4072
n_dense = page_dir_get_n_heap(page_zip->data)
4073
- (PAGE_HEAP_NO_USER_LOW + 1);
4075
if (UNIV_LIKELY_NULL(free_rec)) {
4076
/* The record was allocated from the free list.
4077
Shift the dense directory only up to that slot.
4078
Note that in this case, n_dense is actually
4079
off by one, because page_cur_insert_rec_zip()
4080
did not increment n_heap. */
4081
ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
4082
+ PAGE_HEAP_NO_USER_LOW);
4083
ut_ad(rec >= free_rec);
4084
slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
4086
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4088
/* The record was allocated from the heap.
4089
Shift the entire dense directory. */
4090
ut_ad(rec_get_heap_no_new(rec) == n_dense
4091
+ PAGE_HEAP_NO_USER_LOW);
4093
/* Shift to the end of the dense page directory. */
4094
slot_free = page_zip->data + page_zip_get_size(page_zip)
4095
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4098
/* Shift the dense directory to allocate place for rec. */
4099
memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
4100
slot_rec - slot_free);
4102
/* Write the entry for the inserted record.
4103
The "owned" and "deleted" flags must be zero. */
4104
mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
4107
/**********************************************************************//**
4108
Shift the dense page directory and the array of BLOB pointers
4109
when a record is deleted. */
4112
page_zip_dir_delete(
4113
/*================*/
4114
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4115
byte* rec, /*!< in: record to delete */
4116
dict_index_t* index, /*!< in: index of rec */
4117
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
4118
const byte* free) /*!< in: previous start of the free list */
4123
page_t* page = page_align(rec);
4125
ut_ad(rec_offs_validate(rec, index, offsets));
4126
ut_ad(rec_offs_comp(offsets));
4128
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4129
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4130
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4131
rec_offs_extra_size(offsets));
4133
slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
4137
/* This could not be done before page_zip_dir_find(). */
4138
page_header_set_field(page, page_zip, PAGE_N_RECS,
4139
(ulint)(page_get_n_recs(page) - 1));
4141
if (UNIV_UNLIKELY(!free)) {
4142
/* Make the last slot the start of the free list. */
4143
slot_free = page_zip->data + page_zip_get_size(page_zip)
4144
- PAGE_ZIP_DIR_SLOT_SIZE
4145
* (page_dir_get_n_heap(page_zip->data)
4146
- PAGE_HEAP_NO_USER_LOW);
4148
slot_free = page_zip_dir_find_free(page_zip,
4150
ut_a(slot_free < slot_rec);
4151
/* Grow the free list by one slot by moving the start. */
4152
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4155
if (UNIV_LIKELY(slot_rec > slot_free)) {
4156
memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
4158
slot_rec - slot_free);
4161
/* Write the entry for the deleted record.
4162
The "owned" and "deleted" flags will be cleared. */
4163
mach_write_to_2(slot_free, page_offset(rec));
4165
if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
4166
ut_ad(!rec_offs_any_extern(offsets));
4170
n_ext = rec_offs_n_extern(offsets);
4171
if (UNIV_UNLIKELY(n_ext)) {
4172
/* Shift and zero fill the array of BLOB pointers. */
4177
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
4178
ut_a(blob_no + n_ext <= page_zip->n_blobs);
4180
externs = page_zip->data + page_zip_get_size(page_zip)
4181
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
4182
* (PAGE_ZIP_DIR_SLOT_SIZE
4183
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4185
ext_end = externs - page_zip->n_blobs
4186
* BTR_EXTERN_FIELD_REF_SIZE;
4187
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
4189
page_zip->n_blobs -= n_ext;
4190
/* Shift and zero fill the array. */
4191
memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
4192
(page_zip->n_blobs - blob_no)
4193
* BTR_EXTERN_FIELD_REF_SIZE);
4194
memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
4198
/* The compression algorithm expects info_bits and n_owned
4199
to be 0 for deleted records. */
4200
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
4202
page_zip_clear_rec(page_zip, rec, index, offsets);
4205
/**********************************************************************//**
4206
Add a slot to the dense page directory. */
4209
page_zip_dir_add_slot(
4210
/*==================*/
4211
page_zip_des_t* page_zip, /*!< in/out: compressed page */
4212
ulint is_clustered) /*!< in: nonzero for clustered index,
4219
ut_ad(page_is_comp(page_zip->data));
4220
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4222
/* Read the old n_dense (n_heap has already been incremented). */
4223
n_dense = page_dir_get_n_heap(page_zip->data)
4224
- (PAGE_HEAP_NO_USER_LOW + 1);
4226
dir = page_zip->data + page_zip_get_size(page_zip)
4227
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4229
if (!page_is_leaf(page_zip->data)) {
4230
ut_ad(!page_zip->n_blobs);
4231
stored = dir - n_dense * REC_NODE_PTR_SIZE;
4232
} else if (UNIV_UNLIKELY(is_clustered)) {
4233
/* Move the BLOB pointer array backwards to make space for the
4234
roll_ptr and trx_id columns and the dense directory slot. */
4237
stored = dir - n_dense
4238
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4240
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4242
- (PAGE_ZIP_DIR_SLOT_SIZE
4243
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4244
PAGE_ZIP_DIR_SLOT_SIZE
4245
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4246
memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
4247
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4248
externs, stored - externs);
4251
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4252
ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
4253
PAGE_ZIP_DIR_SLOT_SIZE);
4256
/* Move the uncompressed area backwards to make space
4257
for one directory slot. */
4258
memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
4261
/***********************************************************//**
4262
Parses a log record of writing to the header of a page.
4263
@return end of log record or NULL */
4266
page_zip_parse_write_header(
4267
/*========================*/
4268
byte* ptr, /*!< in: redo log buffer */
4269
byte* end_ptr,/*!< in: redo log buffer end */
4270
page_t* page, /*!< in/out: uncompressed page */
4271
page_zip_des_t* page_zip)/*!< in/out: compressed page */
4276
ut_ad(ptr && end_ptr);
4277
ut_ad(!page == !page_zip);
4279
if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
4284
offset = (ulint) *ptr++;
4285
len = (ulint) *ptr++;
4287
if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
4289
recv_sys->found_corrupt_log = TRUE;
4294
if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
4300
if (UNIV_UNLIKELY(!page_zip)) {
4304
#ifdef UNIV_ZIP_DEBUG
4305
ut_a(page_zip_validate(page_zip, page));
4306
#endif /* UNIV_ZIP_DEBUG */
4308
memcpy(page + offset, ptr, len);
4309
memcpy(page_zip->data + offset, ptr, len);
4311
#ifdef UNIV_ZIP_DEBUG
4312
ut_a(page_zip_validate(page_zip, page));
4313
#endif /* UNIV_ZIP_DEBUG */
4319
#ifndef UNIV_HOTBACKUP
4320
/**********************************************************************//**
4321
Write a log record of writing to the uncompressed header portion of a page. */
4324
page_zip_write_header_log(
4325
/*======================*/
4326
const byte* data, /*!< in: data on the uncompressed page */
4327
ulint length, /*!< in: length of the data */
4328
mtr_t* mtr) /*!< in: mini-transaction */
4330
byte* log_ptr = mlog_open(mtr, 11 + 1 + 1);
4331
ulint offset = page_offset(data);
4333
ut_ad(offset < PAGE_DATA);
4334
ut_ad(offset + length < PAGE_DATA);
4336
# error "PAGE_DATA > 255"
4338
ut_ad(length < 256);
4340
/* If no logging is requested, we may return now */
4341
if (UNIV_UNLIKELY(!log_ptr)) {
4346
log_ptr = mlog_write_initial_log_record_fast(
4347
(byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
4348
*log_ptr++ = (byte) offset;
4349
*log_ptr++ = (byte) length;
4350
mlog_close(mtr, log_ptr);
4352
mlog_catenate_string(mtr, data, length);
4354
#endif /* !UNIV_HOTBACKUP */
4356
/**********************************************************************//**
4357
Reorganize and compress a page. This is a low-level operation for
4358
compressed pages, to be used when page_zip_compress() fails.
4359
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
4360
The function btr_page_reorganize() should be preferred whenever possible.
4361
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
4362
non-clustered index, the caller must update the insert buffer free
4363
bits in the same mini-transaction in such a way that the modification
4364
will be redo-logged.
4365
@return TRUE on success, FALSE on failure; page and page_zip will be
4366
left intact on failure. */
4369
page_zip_reorganize(
4370
/*================*/
4371
buf_block_t* block, /*!< in/out: page with compressed page;
4372
on the compressed page, in: size;
4374
m_start, m_end, m_nonempty */
4375
dict_index_t* index, /*!< in: index of the B-tree node */
4376
mtr_t* mtr) /*!< in: mini-transaction */
4378
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
4379
page_t* page = buf_block_get_frame(block);
4380
buf_block_t* temp_block;
4384
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4385
ut_ad(page_is_comp(page));
4386
ut_ad(!dict_index_is_ibuf(index));
4387
/* Note that page_zip_validate(page_zip, page) may fail here. */
4388
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
4389
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4391
/* Disable logging */
4392
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
4394
#ifndef UNIV_HOTBACKUP
4395
temp_block = buf_block_alloc(0);
4396
btr_search_drop_page_hash_index(block);
4397
block->check_index_page_at_flush = TRUE;
4398
#else /* !UNIV_HOTBACKUP */
4399
ut_ad(block == back_block1);
4400
temp_block = back_block2;
4401
#endif /* !UNIV_HOTBACKUP */
4402
temp_page = temp_block->frame;
4404
/* Copy the old page to temporary space */
4405
buf_frame_copy(temp_page, page);
4407
/* Recreate the page: note that global data on page (possible
4408
segment headers, next page-field, etc.) is preserved intact */
4410
page_create(block, mtr, TRUE);
4412
/* Copy the records from the temporary space to the recreated page;
4413
do not copy the lock bits yet */
4415
page_copy_rec_list_end_no_locks(block, temp_block,
4416
page_get_infimum_rec(temp_page),
4419
if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
4420
/* Copy max trx id to recreated page */
4421
trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
4422
page_set_max_trx_id(block, NULL, max_trx_id, NULL);
4423
ut_ad(!ut_dulint_is_zero(max_trx_id));
4426
/* Restore logging. */
4427
mtr_set_log_mode(mtr, log_mode);
4429
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
4431
/* Restore the old page and exit. */
4432
buf_frame_copy(page, temp_page);
4434
#ifndef UNIV_HOTBACKUP
4435
buf_block_free(temp_block);
4436
#endif /* !UNIV_HOTBACKUP */
4440
lock_move_reorganize_page(block, temp_block);
4442
#ifndef UNIV_HOTBACKUP
4443
buf_block_free(temp_block);
4444
#endif /* !UNIV_HOTBACKUP */
4448
#ifndef UNIV_HOTBACKUP
4449
/**********************************************************************//**
4450
Copy the records of a page byte for byte. Do not copy the page header
4451
or trailer, except those B-tree header fields that are directly
4452
related to the storage of records. Also copy PAGE_MAX_TRX_ID.
4453
NOTE: The caller must update the lock table and the adaptive hash index. */
4458
page_zip_des_t* page_zip, /*!< out: copy of src_zip
4459
(n_blobs, m_start, m_end,
4460
m_nonempty, data[0..size-1]) */
4461
page_t* page, /*!< out: copy of src */
4462
const page_zip_des_t* src_zip, /*!< in: compressed page */
4463
const page_t* src, /*!< in: page */
4464
dict_index_t* index, /*!< in: index of the B-tree */
4465
mtr_t* mtr) /*!< in: mini-transaction */
4467
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
4468
ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
4469
ut_ad(!dict_index_is_ibuf(index));
4470
#ifdef UNIV_ZIP_DEBUG
4471
/* The B-tree operations that call this function may set
4472
FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
4473
mismatch. A strict page_zip_validate() will be executed later
4474
during the B-tree operations. */
4475
ut_a(page_zip_validate_low(src_zip, src, TRUE));
4476
#endif /* UNIV_ZIP_DEBUG */
4477
ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
4478
if (UNIV_UNLIKELY(src_zip->n_blobs)) {
4479
ut_a(page_is_leaf(src));
4480
ut_a(dict_index_is_clust(index));
4483
/* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
4484
indexes. It does not matter on other pages. */
4485
ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
4486
|| !ut_dulint_is_zero(page_get_max_trx_id(src)));
4488
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
4489
UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
4490
UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
4491
UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
4493
/* Copy those B-tree page header fields that are related to
4494
the records stored in the page. Also copy the field
4495
PAGE_MAX_TRX_ID. Skip the rest of the page header and
4496
trailer. On the compressed page, there is no trailer. */
4497
#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
4498
# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
4500
memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
4501
PAGE_HEADER_PRIV_END);
4502
memcpy(PAGE_DATA + page, PAGE_DATA + src,
4503
UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
4504
memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
4505
PAGE_HEADER_PRIV_END);
4506
memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
4507
page_zip_get_size(page_zip) - PAGE_DATA);
4509
/* Copy all fields of src_zip to page_zip, except the pointer
4510
to the compressed data page. */
4512
page_zip_t* data = page_zip->data;
4513
memcpy(page_zip, src_zip, sizeof *page_zip);
4514
page_zip->data = data;
4516
ut_ad(page_zip_get_trailer_len(page_zip,
4517
dict_index_is_clust(index), NULL)
4518
+ page_zip->m_end < page_zip_get_size(page_zip));
4520
if (!page_is_leaf(src)
4521
&& UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
4522
&& UNIV_LIKELY(mach_read_from_4(page
4523
+ FIL_PAGE_PREV) != FIL_NULL)) {
4524
/* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
4525
ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
4527
if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
4528
rec_t* rec = page + offs;
4529
ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
4530
& REC_INFO_MIN_REC_FLAG);
4531
rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
4535
#ifdef UNIV_ZIP_DEBUG
4536
ut_a(page_zip_validate(page_zip, page));
4537
#endif /* UNIV_ZIP_DEBUG */
4539
page_zip_compress_write_log(page_zip, page, index, mtr);
4541
#endif /* !UNIV_HOTBACKUP */
4543
/**********************************************************************//**
4544
Parses a log record of compressing an index page.
4545
@return end of log record or NULL */
4548
page_zip_parse_compress(
4549
/*====================*/
4550
byte* ptr, /*!< in: buffer */
4551
byte* end_ptr,/*!< in: buffer end */
4552
page_t* page, /*!< out: uncompressed page */
4553
page_zip_des_t* page_zip)/*!< out: compressed page */
4558
ut_ad(ptr && end_ptr);
4559
ut_ad(!page == !page_zip);
4561
if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
4566
size = mach_read_from_2(ptr);
4568
trailer_size = mach_read_from_2(ptr);
4571
if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
4577
if (UNIV_UNLIKELY(!page_zip)
4578
|| UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
4580
recv_sys->found_corrupt_log = TRUE;
4585
memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
4586
memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
4587
memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
4588
memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
4589
page_zip_get_size(page_zip) - trailer_size
4590
- (FIL_PAGE_TYPE + size));
4591
memcpy(page_zip->data + page_zip_get_size(page_zip)
4592
- trailer_size, ptr + 8 + size, trailer_size);
4594
if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page))) {
4600
return(ptr + 8 + size + trailer_size);
4603
/**********************************************************************//**
4604
Calculate the compressed page checksum.
4605
@return page checksum */
4608
page_zip_calc_checksum(
4609
/*===================*/
4610
const void* data, /*!< in: compressed page */
4611
ulint size) /*!< in: size of compressed page */
4613
/* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
4614
and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
4616
const Bytef* s = data;
4619
ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4621
adler = adler32(0L, s + FIL_PAGE_OFFSET,
4622
FIL_PAGE_LSN - FIL_PAGE_OFFSET);
4623
adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
4624
adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
4625
size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4627
return((ulint) adler);