~ignacio-nin/percona-server/5.1-issue26684

102.1.1 by kinoyasu
add header and rule, as the first step of the reordering patch for separate release
1
# name       : innodb_buffer_pool_shm.patch
2
# introduced : 12
3
# maintainer : Yasufumi
4
#
5
#!!! notice !!!
6
# Any small change to this file in the main branch
7
# should be done or reviewed by the maintainer!
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
8
diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
9
--- a/storage/innodb_plugin/buf/buf0buf.c	2010-07-14 16:32:49.669501663 +0900
10
+++ b/storage/innodb_plugin/buf/buf0buf.c	2010-07-14 16:40:16.149438645 +0900
71 by kinoyasu
adjust innodb_buffer_pool_shm.patch to be built with UNIV_DEBUG definition
11
@@ -53,6 +53,10 @@
12
 #include "page0zip.h"
13
 #include "trx0trx.h"
14
 #include "srv0start.h"
15
+#include "que0que.h"
16
+#include "read0read.h"
17
+#include "row0row.h"
18
+#include "ha_prototypes.h"
19
 
20
 /* prototypes for new functions added to ha_innodb.cc */
21
 trx_t* innobase_get_trx();
22
@@ -310,6 +314,30 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
23
 UNIV_INTERN ibool		buf_debug_prints = FALSE;
24
 #endif /* UNIV_DEBUG */
25
 
26
+/* Buffer pool shared memory segment information */
27
+typedef	struct buf_shm_info_struct	buf_shm_info_t;
28
+
29
+struct buf_shm_info_struct {
30
+	char	head_str[8];
31
+	ulint	binary_id;
32
+	ibool	is_new;		/* during initializing */
33
+	ibool	clean;		/* clean shutdowned and free */
34
+	ibool	reusable;	/* reusable */
35
+	ulint	buf_pool_size;	/* backup value */
36
+	ulint	page_size;	/* backup value */
37
+	ulint	frame_offset;	/* offset of the first frame based on chunk->mem */
38
+	ulint	zip_hash_offset;
39
+	ulint	zip_hash_n;
40
+
41
+	ulint	checksum;
42
+
43
+	buf_pool_t	buf_pool_backup;
44
+	buf_chunk_t	chunk_backup;
45
+
46
+	ib_uint64_t	dummy;
47
+};
48
+
49
+#define BUF_SHM_INFO_HEAD "XTRA_SHM"
50
 #endif /* !UNIV_HOTBACKUP */
51
 
52
 /********************************************************************//**
71 by kinoyasu
adjust innodb_buffer_pool_shm.patch to be built with UNIV_DEBUG definition
53
@@ -756,6 +784,45 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
54
 #endif /* UNIV_SYNC_DEBUG */
55
 }
56
 
57
+static
58
+void
59
+buf_block_reuse(
60
+/*============*/
61
+	buf_block_t*	block,
62
+	ptrdiff_t	frame_offset)
63
+{
64
+	/* block_init */
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
65
+	block->frame += frame_offset;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
66
+
67
+	UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
68
+
69
+	block->index = NULL;
70
+
71
+#ifdef UNIV_DEBUG
72
+	/* recreate later */
73
+	block->page.in_page_hash = FALSE;
74
+	block->page.in_zip_hash = FALSE;
75
+#endif /* UNIV_DEBUG */
76
+
77
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
78
+	block->n_pointers = 0;
79
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
80
+
81
+	if (block->page.zip.data)
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
82
+		block->page.zip.data += frame_offset;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
83
+
84
+	block->is_hashed = FALSE;
85
+
86
+	mutex_create(&block->mutex, SYNC_BUF_BLOCK);
87
+
88
+	rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
89
+	ut_ad(rw_lock_validate(&(block->lock)));
90
+
91
+#ifdef UNIV_SYNC_DEBUG
92
+	rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
93
+#endif /* UNIV_SYNC_DEBUG */
94
+}
95
+
96
 /********************************************************************//**
97
 Allocates a chunk of buffer frames.
98
 @return	chunk, or NULL on failure */
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
99
@@ -768,26 +835,190 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
100
 {
101
 	buf_block_t*	block;
102
 	byte*		frame;
103
+	ulint		zip_hash_n = 0;
104
+	ulint		zip_hash_mem_size = 0;
83.2.3 by kinoyasu
adjust not to cause warnings with -Wall
105
+	hash_table_t*	zip_hash_tmp = NULL;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
106
 	ulint		i;
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
107
+	ulint		size_target;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
108
+	buf_shm_info_t*	shm_info = NULL;
109
 
110
 	/* Round down to a multiple of page size,
111
 	although it already should be. */
112
 	mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
113
+	size_target = (mem_size / UNIV_PAGE_SIZE) - 1;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
114
+
124.1.1 by kinoyasu
fix bug649408
115
+	srv_buffer_pool_shm_is_reused = FALSE;
116
+
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
117
+	if (srv_buffer_pool_shm_key) {
118
+		/* zip_hash size */
119
+		zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
120
+		zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
121
+						  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
122
+	}
123
+
124
 	/* Reserve space for the block descriptors. */
125
 	mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
126
 				  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
127
+	if (srv_buffer_pool_shm_key) {
128
+		 mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
129
+					   + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
130
+		 mem_size += zip_hash_mem_size;
131
+	}
132
 
133
 	chunk->mem_size = mem_size;
134
+
135
+	if (srv_buffer_pool_shm_key) {
136
+		ulint	binary_id;
137
+		ibool	is_new;
138
+
139
+		ut_a(buf_pool->n_chunks == 1);
140
+
141
+		fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
142
+		"InnoDB: Warning: The innodb_buffer_pool_shm_key option has been specified.\n"
143
+		"InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
144
+		"InnoDB:   * the mysqld executable between restarts of the server.\n"
112 by Vadim Tkachenko
fix mistyping
145
+		"InnoDB:   * the value of innodb_buffer_pool_size.\n"
146
+		"InnoDB:   * the value of innodb_page_size.\n"
147
+		"InnoDB:   * datafiles created by InnoDB during this session.\n"
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
148
+		"InnoDB: Otherwise, data corruption in datafiles may result.\n");
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
149
+
150
+		/* FIXME: This is vague id still */
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
151
+		binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
152
+			  + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
153
+			  + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
154
+			  + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
155
+			  + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
156
+			  + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
157
+			  + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
158
+			  + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
159
+			  + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
160
+			  + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
161
+			  + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
162
+
163
+		chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
164
+
165
+		if (UNIV_UNLIKELY(chunk->mem == NULL)) {
166
+			return(NULL);
167
+		}
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
168
+init_again:
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
169
+#ifdef UNIV_SET_MEM_TO_ZERO
170
+		if (is_new) {
171
+			memset(chunk->mem, '\0', chunk->mem_size);
172
+		}
173
+#endif
117.1.1 by kinoyasu
fix bug643650: change to use ut_fold_binary_32 for checksum
174
+		/* for ut_fold_binary_32(), these values should be 32-bit aligned */
175
+		ut_a(sizeof(buf_shm_info_t) % 4 == 0);
176
+		ut_a((ulint)chunk->mem % 4 == 0);
177
+		ut_a(chunk->mem_size % 4 == 0);
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
178
+
179
+		shm_info = chunk->mem;
180
+
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
181
+		zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
182
+
183
+		if (is_new) {
184
+			strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
185
+			shm_info->binary_id = binary_id;
186
+			shm_info->is_new = TRUE;	/* changed to FALSE when the initialization is finished */
187
+			shm_info->clean = FALSE;	/* changed to TRUE when free the segment. */
188
+			shm_info->reusable = FALSE;	/* changed to TRUE when validation is finished. */
189
+			shm_info->buf_pool_size = srv_buf_pool_size;
190
+			shm_info->page_size = srv_page_size;
191
+			shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
192
+			shm_info->zip_hash_n = zip_hash_n;
193
+		} else {
194
+			ulint	checksum;
195
+
196
+			if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
197
+				fprintf(stderr,
198
+				"InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
199
+				return(NULL);
200
+			}
201
+			if (shm_info->binary_id != binary_id) {
202
+				fprintf(stderr,
203
+				"InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
204
+				return(NULL);
205
+			}
206
+			if (shm_info->is_new) {
207
+				fprintf(stderr,
208
+				"InnoDB: Error: The shared memory was not initialized yet.\n");
209
+				return(NULL);
210
+			}
211
+			if (shm_info->buf_pool_size != srv_buf_pool_size) {
212
+				fprintf(stderr,
213
+				"InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
214
+				shm_info->buf_pool_size, srv_buf_pool_size);
215
+				return(NULL);
216
+			}
217
+			if (shm_info->page_size != srv_page_size) {
218
+				fprintf(stderr,
219
+				"InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
220
+				shm_info->page_size, srv_page_size);
221
+				return(NULL);
222
+			}
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
223
+			if (!shm_info->reusable) {
224
+				fprintf(stderr,
225
+				"InnoDB: Warning: The shared memory has unrecoverable contents.\n"
226
+				"InnoDB: The shared memory segment is initialized.\n");
227
+				is_new = TRUE;
228
+				goto init_again;
229
+			}
230
+			if (!shm_info->clean) {
231
+				fprintf(stderr,
232
+				"InnoDB: Warning: The shared memory was not shut down cleanly.\n"
233
+				"InnoDB: The shared memory segment is initialized.\n");
234
+				is_new = TRUE;
235
+				goto init_again;
236
+			}
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
237
+
238
+			ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
239
+			ut_a(shm_info->zip_hash_n == zip_hash_n);
240
+
241
+			/* check checksum */
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
242
+			if (srv_buffer_pool_shm_checksum) {
243
+				checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
244
+							     chunk->mem_size - sizeof(buf_shm_info_t));
245
+			} else {
246
+				checksum = BUF_NO_CHECKSUM_MAGIC;
247
+			}
248
+
249
+			if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
250
+			    && shm_info->checksum != checksum) {
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
251
+				fprintf(stderr,
252
+				"InnoDB: Error: checksum of the shared memory is not match. "
253
+				"(stored=%lu calculated=%lu)\n",
254
+				shm_info->checksum, checksum);
255
+				return(NULL);
256
+			}
257
+
258
+			/* flag to use the segment. */
259
+			shm_info->clean = FALSE;	/* changed to TRUE when free the segment. */
260
+		}
261
+
262
+		/* init zip_hash contents */
263
+		if (is_new) {
264
+			hash_create_init(zip_hash_tmp, zip_hash_n);
265
+		} else {
266
+			/* adjust offset is done later */
267
+			hash_create_reuse(zip_hash_tmp);
124.1.1 by kinoyasu
fix bug649408
268
+
269
+			srv_buffer_pool_shm_is_reused = TRUE;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
270
+		}
271
+	} else {
272
 	chunk->mem = os_mem_alloc_large(&chunk->mem_size);
273
 
274
 	if (UNIV_UNLIKELY(chunk->mem == NULL)) {
275
 
276
 		return(NULL);
277
 	}
278
+	}
279
 
280
 	/* Allocate the block descriptors from
281
 	the start of the memory block. */
282
+	if (srv_buffer_pool_shm_key) {
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
283
+		chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
284
+	} else {
285
 	chunk->blocks = chunk->mem;
286
+	}
287
 
288
 	/* Align a pointer to the first frame.  Note that when
289
 	os_large_page_size is smaller than UNIV_PAGE_SIZE,
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
290
@@ -795,8 +1026,13 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
291
 	it is bigger, we may allocate more blocks than requested. */
292
 
293
 	frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
294
+	if (srv_buffer_pool_shm_key) {
295
+		/* reserve zip_hash space and always -1 for reproductibity */
296
+		chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
297
+	} else {
298
 	chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
299
 		- (frame != chunk->mem);
300
+	}
301
 
302
 	/* Subtract the space needed for block descriptors. */
303
 	{
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
304
@@ -810,6 +1046,102 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
305
 		chunk->size = size;
306
 	}
307
 
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
308
+	if (chunk->size > size_target) {
309
+		chunk->size = size_target;
310
+	}
311
+
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
312
+	if (shm_info && !(shm_info->is_new)) {
313
+		/* convert the shared memory segment for reuse */
314
+		ptrdiff_t	phys_offset;
315
+		ptrdiff_t	logi_offset;
316
+		ptrdiff_t	blocks_offset;
317
+		void*		previous_frame_address;
318
+
319
+		if (chunk->size < shm_info->chunk_backup.size) {
320
+			fprintf(stderr,
321
+			"InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
322
+			"InnoDB: Retrying may avoid this situation.\n");
323
+			shm_info->clean = TRUE; /* release the flag for retrying */
324
+			return(NULL);
325
+		}
326
+
327
+		chunk->size = shm_info->chunk_backup.size;
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
328
+		phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
329
+		logi_offset = frame - chunk->blocks[0].frame;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
330
+		previous_frame_address = chunk->blocks[0].frame;
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
331
+		blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
332
+
333
+		if (phys_offset || logi_offset || blocks_offset) {
334
+			fprintf(stderr,
335
+			"InnoDB: Buffer pool in the shared memory segment should be converted.\n"
336
+			"InnoDB: Previous frames in address      : %p\n"
337
+			"InnoDB: Previous frames were located    : %p\n"
338
+			"InnoDB: Current frames should be located: %p\n"
339
+			"InnoDB: Pysical offset                  : %ld (%#lx)\n"
340
+			"InnoDB: Logical offset (frames)         : %ld (%#lx)\n"
341
+			"InnoDB: Logical offset (blocks)         : %ld (%#lx)\n",
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
342
+				(byte*)chunk->mem + shm_info->frame_offset,
343
+				chunk->blocks[0].frame, frame,
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
344
+				phys_offset, phys_offset, logi_offset, logi_offset,
345
+				blocks_offset, blocks_offset);
346
+		} else {
347
+			fprintf(stderr,
348
+			"InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
349
+		}
350
+
351
+		if (phys_offset) {
352
+			fprintf(stderr,
353
+			"InnoDB: Aligning physical offset...");
354
+
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
355
+			memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
356
+				chunk->size * UNIV_PAGE_SIZE);
357
+
358
+			fprintf(stderr,
359
+			" Done.\n");
360
+		}
361
+
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
362
+		/* buf_block_t */
363
+		block = chunk->blocks;
364
+		for (i = chunk->size; i--; ) {
365
+			buf_block_reuse(block, logi_offset);
366
+			block++;
367
+		}
368
+
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
369
+		if (logi_offset || blocks_offset) {
370
+			fprintf(stderr,
371
+			"InnoDB: Aligning logical offset...");
372
+
373
+
374
+			/* buf_pool_t buf_pool_backup */
375
+			UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
376
+					previous_frame_address, logi_offset, blocks_offset);
377
+			UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
378
+					previous_frame_address, logi_offset, blocks_offset);
379
+			UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
380
+					previous_frame_address, logi_offset, blocks_offset);
381
+			if (shm_info->buf_pool_backup.LRU_old)
382
+				shm_info->buf_pool_backup.LRU_old =
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
383
+					(buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
384
+						+ (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
385
+						  ? logi_offset : blocks_offset));
386
+
387
+			UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
388
+					previous_frame_address, logi_offset, blocks_offset);
389
+
390
+			UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
391
+					previous_frame_address, logi_offset, blocks_offset);
392
+			for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
393
+				UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
394
+					previous_frame_address, logi_offset, blocks_offset);
395
+			}
396
+
397
+			HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
398
+					previous_frame_address, logi_offset, blocks_offset);
399
+
400
+			fprintf(stderr,
401
+			" Done.\n");
402
+		}
403
+	} else {
404
 	/* Init block structs and assign frames for them. Then we
405
 	assign the frames to the first blocks (we already mapped the
406
 	memory above). */
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
407
@@ -833,6 +1165,11 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
408
 		block++;
409
 		frame += UNIV_PAGE_SIZE;
410
 	}
411
+	}
412
+
413
+	if (shm_info) {
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
414
+		shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
415
+	}
416
 
417
 	return(chunk);
418
 }
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
419
@@ -1014,6 +1351,8 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
420
 		UNIV_MEM_UNDESC(block);
421
 	}
422
 
423
+	ut_a(!srv_buffer_pool_shm_key);
424
+
425
 	os_mem_free_large(chunk->mem, chunk->mem_size);
426
 }
427
 
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
428
@@ -1063,7 +1402,10 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
429
 	srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
430
 
431
 	buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
432
+	/* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
433
+	if (!srv_buffer_pool_shm_key) {
434
 	buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
435
+	}
436
 
437
 	buf_pool->last_printout_time = time(NULL);
438
 
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
439
@@ -1078,6 +1420,86 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
440
 	--------------------------- */
441
 	/* All fields are initialized by mem_zalloc(). */
442
 
443
+	if (srv_buffer_pool_shm_key) {
444
+		buf_shm_info_t*	shm_info;
445
+
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
446
+		ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
447
+		shm_info = chunk->mem;
448
+
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
449
+		buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
450
+
451
+		if(shm_info->is_new) {
452
+			shm_info->is_new = FALSE; /* initialization was finished */
453
+		} else {
454
+			buf_block_t*	block = chunk->blocks;
455
+			buf_page_t*	b;
456
+
457
+			/* shm_info->buf_pool_backup should be converted */
458
+			/* at buf_chunk_init(). So copy simply. */
459
+			buf_pool->flush_list 		= shm_info->buf_pool_backup.flush_list;
460
+			buf_pool->freed_page_clock 	= shm_info->buf_pool_backup.freed_page_clock;
461
+			buf_pool->free			= shm_info->buf_pool_backup.free;
462
+			buf_pool->LRU			= shm_info->buf_pool_backup.LRU;
463
+			buf_pool->LRU_old		= shm_info->buf_pool_backup.LRU_old;
464
+			buf_pool->LRU_old_len		= shm_info->buf_pool_backup.LRU_old_len;
465
+			buf_pool->unzip_LRU		= shm_info->buf_pool_backup.unzip_LRU;
466
+			buf_pool->zip_clean		= shm_info->buf_pool_backup.zip_clean;
467
+			for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
468
+				buf_pool->zip_free[i]	= shm_info->buf_pool_backup.zip_free[i];
469
+			}
470
+
471
+			for (i = 0; i < chunk->size; i++, block++) {
472
+				if (buf_block_get_state(block)
473
+				    == BUF_BLOCK_FILE_PAGE) {
474
+					ut_d(block->page.in_page_hash = TRUE);
475
+					HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
476
+						    buf_page_address_fold(
477
+							    block->page.space,
478
+							    block->page.offset),
479
+						    &block->page);
480
+				}
481
+			}
482
+
483
+			for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
484
+			     b = UT_LIST_GET_NEXT(zip_list, b)) {
485
+				ut_ad(!b->in_flush_list);
486
+				ut_ad(b->in_LRU_list);
487
+
488
+				ut_d(b->in_page_hash = TRUE);
489
+				HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
490
+					    buf_page_address_fold(b->space, b->offset), b);
491
+			}
492
+
493
+			for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
494
+			     b = UT_LIST_GET_NEXT(flush_list, b)) {
495
+				ut_ad(b->in_flush_list);
496
+				ut_ad(b->in_LRU_list);
497
+
498
+				switch (buf_page_get_state(b)) {
499
+				case BUF_BLOCK_ZIP_DIRTY:
500
+					ut_d(b->in_page_hash = TRUE);
501
+					HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
502
+						    buf_page_address_fold(b->space,
503
+							    		  b->offset), b);
504
+					break;
505
+				case BUF_BLOCK_FILE_PAGE:
506
+					/* uncompressed page */
507
+					break;
508
+				case BUF_BLOCK_ZIP_FREE:
509
+				case BUF_BLOCK_ZIP_PAGE:
510
+				case BUF_BLOCK_NOT_USED:
511
+				case BUF_BLOCK_READY_FOR_USE:
512
+				case BUF_BLOCK_MEMORY:
513
+				case BUF_BLOCK_REMOVE_HASH:
514
+					ut_error;
515
+					break;
516
+				}
517
+			}
518
+
519
+
520
+		}
521
+	}
522
+
523
 	mutex_exit(&LRU_list_mutex);
524
 	rw_lock_x_unlock(&page_hash_latch);
525
 	buf_pool_mutex_exit();
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
526
@@ -1102,6 +1524,34 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
527
 	buf_chunk_t*	chunk;
528
 	buf_chunk_t*	chunks;
529
 
530
+	if (srv_buffer_pool_shm_key) {
531
+		buf_shm_info_t*	shm_info;
532
+
533
+		ut_a(buf_pool->n_chunks == 1);
534
+
535
+		chunk = buf_pool->chunks;
536
+		shm_info = chunk->mem;
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
537
+		ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
538
+
539
+		/* validation the shared memory segment doesn't have unrecoverable contents. */
540
+		/* Currently, validation became not needed */
541
+		shm_info->reusable = TRUE;
542
+
543
+		memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
544
+		memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
545
+
546
+		if (srv_fast_shutdown < 2) {
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
547
+			if (srv_buffer_pool_shm_checksum) {
548
+				shm_info->checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
549
+								       chunk->mem_size - sizeof(buf_shm_info_t));
550
+			} else {
551
+				shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
552
+			}
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
553
+			shm_info->clean = TRUE;
554
+		}
555
+
556
+		os_shm_free(chunk->mem, chunk->mem_size);
557
+	} else {
558
 	chunks = buf_pool->chunks;
559
 	chunk = chunks + buf_pool->n_chunks;
560
 
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
561
@@ -1110,10 +1560,13 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
562
 		would fail at shutdown. */
563
 		os_mem_free_large(chunk->mem, chunk->mem_size);
564
 	}
565
+	}
566
 
567
 	mem_free(buf_pool->chunks);
568
 	hash_table_free(buf_pool->page_hash);
569
+	if (!srv_buffer_pool_shm_key) {
570
 	hash_table_free(buf_pool->zip_hash);
571
+	}
572
 	mem_free(buf_pool);
573
 	buf_pool = NULL;
574
 }
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
575
@@ -1308,6 +1761,11 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
576
 	//buf_pool_mutex_enter();
577
 	mutex_enter(&LRU_list_mutex);
578
 
579
+	if (srv_buffer_pool_shm_key) {
580
+		/* Cannot support shrink */
581
+		goto func_done;
582
+	}
583
+
584
 shrink_again:
585
 	if (buf_pool->n_chunks <= 1) {
586
 
199.4.1 by kinoyasu
constant number of the buffer pool pages for same buffer_pool_size
587
@@ -1551,6 +2009,11 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
588
 buf_pool_resize(void)
589
 /*=================*/
590
 {
591
+	if (srv_buffer_pool_shm_key) {
592
+		/* Cannot support resize */
593
+		return;
594
+	}
595
+
596
 	//buf_pool_mutex_enter();
597
 	mutex_enter(&LRU_list_mutex);
598
 
599
diff -ruN a/storage/innodb_plugin/ha/hash0hash.c b/storage/innodb_plugin/ha/hash0hash.c
600
--- a/storage/innodb_plugin/ha/hash0hash.c	2010-06-04 00:49:59.000000000 +0900
601
+++ b/storage/innodb_plugin/ha/hash0hash.c	2010-07-14 16:40:16.150438366 +0900
602
@@ -128,6 +128,70 @@
603
 }
604
 
605
 /*************************************************************//**
606
+*/
607
+UNIV_INTERN
608
+ulint
609
+hash_create_needed(
610
+/*===============*/
611
+	ulint	n)
612
+{
613
+	ulint	prime;
614
+	ulint	offset;
615
+
616
+	prime = ut_find_prime(n);
617
+
618
+	offset = (sizeof(hash_table_t) + 7) / 8;
619
+	offset *= 8;
620
+
621
+	return(offset + sizeof(hash_cell_t) * prime);
622
+}
623
+
624
+UNIV_INTERN
625
+void
626
+hash_create_init(
627
+/*=============*/
628
+	hash_table_t*	table,
629
+	ulint		n)
630
+{
631
+	ulint	prime;
632
+	ulint	offset;
633
+
634
+	prime = ut_find_prime(n);
635
+
636
+	offset = (sizeof(hash_table_t) + 7) / 8;
637
+	offset *= 8;
638
+
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
639
+	table->array = (hash_cell_t*)(((byte*)table) + offset);
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
640
+	table->n_cells = prime;
641
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
642
+	table->adaptive = FALSE;
643
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
644
+	table->n_mutexes = 0;
645
+	table->mutexes = NULL;
646
+	table->heaps = NULL;
647
+	table->heap = NULL;
648
+	ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
649
+
650
+	/* Initialize the cell array */
651
+	hash_table_clear(table);
652
+}
653
+
654
+UNIV_INTERN
655
+void
656
+hash_create_reuse(
657
+/*==============*/
658
+	hash_table_t*	table)
659
+{
660
+	ulint	offset;
661
+
662
+	offset = (sizeof(hash_table_t) + 7) / 8;
663
+	offset *= 8;
664
+
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
665
+	table->array = (hash_cell_t*)(((byte*)table) + offset);
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
666
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
667
+}
668
+
669
+/*************************************************************//**
670
 Frees a hash table. */
671
 UNIV_INTERN
672
 void
673
diff -ruN a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
674
--- a/storage/innodb_plugin/handler/ha_innodb.cc	2010-07-14 16:34:18.597725479 +0900
675
+++ b/storage/innodb_plugin/handler/ha_innodb.cc	2010-07-14 16:40:16.159323612 +0900
166.3.1 by kinoyasu
backport adaptive_checkpoint=keep_average and innodb_log_block_size from XtraDB on 5.5.7 dev-branch
676
@@ -198,6 +198,7 @@
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
677
 static my_bool	innobase_create_status_file		= FALSE;
678
 static my_bool	innobase_stats_on_metadata		= TRUE;
679
 static my_bool	innobase_use_sys_stats_table		= FALSE;
680
+static my_bool	innobase_buffer_pool_shm_checksum	= TRUE;
681
 
682
 static char*	internal_innobase_data_file_path	= NULL;
683
 
208 by Yasufumi Kinoshita
Yasufumi patches and several are ported to 5.1.56; Note: option innodb_stats_method was removed from innodb_stats.patch, because implemented officially. And, bug733317 should be fixed before release
684
@@ -2476,6 +2477,7 @@
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
685
 	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
686
 	srv_use_checksums = (ibool) innobase_use_checksums;
687
 	srv_fast_checksum = (ibool) innobase_fast_checksum;
688
+	srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;
689
 
690
 #ifdef HAVE_LARGE_PAGES
691
         if ((os_use_large_pages = (ibool) my_use_large_pages))
215.1.1 by Yasufumi Kinoshita
fix bug733317
692
@@ -11476,6 +11478,16 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
693
   "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
77 by kinoyasu
port and adjust the part of the patches to 5.1.49: But query_cache_with_comments.patch, response-time-distribution.patch, mysql_remove_eol_carret.patch, log_connection_error.patch, status_wait_query_cache_mutex.patch and sql_no_fcache.patch were not ported yet. Please port them.
694
   NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
695
 
696
+static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
697
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
698
+  "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
699
+  NULL, NULL, 0, 0, INT_MAX32, 0);
700
+
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
701
+static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
702
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
703
+  "Enable buffer_pool_shm checksum validation (enabled by default).",
704
+  NULL, NULL, TRUE);
705
+
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
706
 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
707
   PLUGIN_VAR_RQCMDARG,
708
   "Helps in performance tuning in heavily concurrent environments.",
215.1.1 by Yasufumi Kinoshita
fix bug733317
709
@@ -11764,6 +11776,8 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
710
   MYSQL_SYSVAR(additional_mem_pool_size),
711
   MYSQL_SYSVAR(autoextend_increment),
712
   MYSQL_SYSVAR(buffer_pool_size),
713
+  MYSQL_SYSVAR(buffer_pool_shm_key),
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
714
+  MYSQL_SYSVAR(buffer_pool_shm_checksum),
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
715
   MYSQL_SYSVAR(checksums),
716
   MYSQL_SYSVAR(fast_checksum),
717
   MYSQL_SYSVAR(commit_concurrency),
718
diff -ruN a/storage/innodb_plugin/handler/innodb_patch_info.h b/storage/innodb_plugin/handler/innodb_patch_info.h
719
--- a/storage/innodb_plugin/handler/innodb_patch_info.h	2010-07-14 16:34:18.603733950 +0900
720
+++ b/storage/innodb_plugin/handler/innodb_patch_info.h	2010-07-14 16:40:16.164323927 +0900
721
@@ -47,5 +47,6 @@
722
 {"innodb_fast_checksum","Using the checksum on 32bit-unit calculation","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"},
723
 {"innodb_files_extend","allow >4GB transaction log files, and can vary universal page size of datafiles","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"},
724
 {"innodb_sys_tables_sys_indexes","Expose InnoDB SYS_TABLES and SYS_INDEXES schema tables","","http://www.percona.com/docs/wiki/percona-xtradb"},
725
+{"innodb_buffer_pool_shm","Put buffer pool contents to shared memory segment and reuse it at clean restart [experimental]","","http://www.percona.com/docs/wiki/percona-xtradb"},
726
 {NULL, NULL, NULL, NULL}
727
 };
728
diff -ruN a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
729
--- a/storage/innodb_plugin/include/buf0buf.h	2010-07-14 16:33:23.823323393 +0900
730
+++ b/storage/innodb_plugin/include/buf0buf.h	2010-07-14 16:40:16.166323436 +0900
71 by kinoyasu
adjust innodb_buffer_pool_shm.patch to be built with UNIV_DEBUG definition
731
@@ -36,6 +36,7 @@
732
 #include "ut0rbt.h"
733
 #ifndef UNIV_HOTBACKUP
734
 #include "os0proc.h"
735
+#include "srv0srv.h"
736
 
737
 /** @name Modes for buf_page_get_gen */
738
 /* @{ */
208 by Yasufumi Kinoshita
Yasufumi patches and several are ported to 5.1.56; Note: option innodb_stats_method was removed from innodb_stats.patch, because implemented officially. And, bug733317 should be fixed before release
739
@@ -1300,7 +1301,10 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
740
 /**********************************************************************//**
741
 Compute the hash fold value for blocks in buf_pool->zip_hash. */
742
 /* @{ */
743
-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
744
+/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
745
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\
746
+					?((ulint) (ptr) / UNIV_PAGE_SIZE)\
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
747
+					:((ulint) ((byte*)ptr - (byte*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE))
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
748
 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
749
 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
750
 /* @} */
751
diff -ruN a/storage/innodb_plugin/include/hash0hash.h b/storage/innodb_plugin/include/hash0hash.h
752
--- a/storage/innodb_plugin/include/hash0hash.h	2010-06-04 00:49:59.000000000 +0900
753
+++ b/storage/innodb_plugin/include/hash0hash.h	2010-07-14 16:40:16.168323262 +0900
754
@@ -49,6 +49,28 @@
755
 hash_create(
756
 /*========*/
757
 	ulint	n);	/*!< in: number of array cells */
758
+
759
+/*************************************************************//**
760
+*/
761
+UNIV_INTERN
762
+ulint
763
+hash_create_needed(
764
+/*===============*/
765
+	ulint	n);
766
+
767
+UNIV_INTERN
768
+void
769
+hash_create_init(
770
+/*=============*/
771
+	hash_table_t*	table,
772
+	ulint		n);
773
+
774
+UNIV_INTERN
775
+void
776
+hash_create_reuse(
777
+/*==============*/
778
+	hash_table_t*	table);
779
+
780
 #ifndef UNIV_HOTBACKUP
781
 /*************************************************************//**
782
 Creates a mutex array to protect a hash table. */
783
@@ -327,6 +349,33 @@
784
 		}\
785
 	}\
786
 } while (0)
787
+
788
+/********************************************************************//**
789
+Align nodes with moving location.*/
790
+#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
791
+do {\
792
+	ulint		i2222;\
793
+	ulint		cell_count2222;\
794
+\
795
+	cell_count2222 = hash_get_n_cells(TABLE);\
796
+\
797
+	for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
798
+		NODE_TYPE*	node2222;\
799
+\
800
+		if ((TABLE)->array[i2222].node) \
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
801
+			(TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
802
+			+ (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
803
+		node2222 = HASH_GET_FIRST((TABLE), i2222);\
804
+\
805
+		while (node2222) {\
806
+			if (node2222->PTR_NAME) \
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
807
+				node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
808
+				+ ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
809
+\
810
+			node2222 = node2222->PTR_NAME;\
811
+		}\
812
+	}\
813
+} while (0)
814
 
815
 /************************************************************//**
816
 Gets the mutex index for a fold value in a hash table.
817
diff -ruN a/storage/innodb_plugin/include/os0proc.h b/storage/innodb_plugin/include/os0proc.h
818
--- a/storage/innodb_plugin/include/os0proc.h	2010-06-04 00:49:59.000000000 +0900
819
+++ b/storage/innodb_plugin/include/os0proc.h	2010-07-14 16:40:16.169321536 +0900
820
@@ -32,6 +32,11 @@
821
 #ifdef UNIV_LINUX
822
 #include <sys/ipc.h>
823
 #include <sys/shm.h>
824
+#else
825
+# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
826
+#include <sys/ipc.h>
827
+#include <sys/shm.h>
828
+# endif
829
 #endif
830
 
831
 typedef void*			os_process_t;
832
@@ -70,6 +75,29 @@
833
 	ulint	size);			/*!< in: size returned by
834
 					os_mem_alloc_large() */
835
 
836
+
837
+/****************************************************************//**
838
+Allocates or attaches and reuses shared memory segment.
839
+The content is not cleared automatically.
840
+@return	allocated memory */
841
+UNIV_INTERN
842
+void*
843
+os_shm_alloc(
844
+/*=========*/
845
+	ulint*	n,			/*!< in/out: number of bytes */
846
+	uint	key,
847
+	ibool*	is_new);
848
+
849
+/****************************************************************//**
850
+Detach shared memory segment. */
851
+UNIV_INTERN
852
+void
853
+os_shm_free(
854
+/*========*/
855
+	void	*ptr,			/*!< in: pointer returned by
856
+					os_shm_alloc() */
857
+	ulint	size);			/*!< in: size returned by
858
+					os_shm_alloc() */
859
 #ifndef UNIV_NONINL
860
 #include "os0proc.ic"
861
 #endif
862
diff -ruN a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
863
--- a/storage/innodb_plugin/include/srv0srv.h	2010-07-14 16:32:49.695323045 +0900
864
+++ b/storage/innodb_plugin/include/srv0srv.h	2010-07-14 16:40:16.171325784 +0900
124.1.1 by kinoyasu
fix bug649408
865
@@ -156,6 +156,10 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
866
 extern ulint	srv_mem_pool_size;
867
 extern ulint	srv_lock_table_size;
868
 
869
+extern uint	srv_buffer_pool_shm_key;
124.1.1 by kinoyasu
fix bug649408
870
+extern ibool	srv_buffer_pool_shm_is_reused;
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
871
+extern ibool	srv_buffer_pool_shm_checksum;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
872
+
873
 extern ibool	srv_thread_concurrency_timer_based;
874
 
875
 extern ulint	srv_n_file_io_threads;
876
diff -ruN a/storage/innodb_plugin/include/ut0lst.h b/storage/innodb_plugin/include/ut0lst.h
877
--- a/storage/innodb_plugin/include/ut0lst.h	2010-06-04 00:49:59.000000000 +0900
878
+++ b/storage/innodb_plugin/include/ut0lst.h	2010-07-14 16:40:16.172321547 +0900
879
@@ -257,5 +257,48 @@
880
 	ut_a(ut_list_node_313 == NULL);					\
881
 } while (0)
882
 
883
+/********************************************************************//**
884
+Align nodes with moving location.
885
+@param NAME		the name of the list
886
+@param TYPE		node type
887
+@param BASE		base node (not a pointer to it)
888
+@param OFFSET		offset moved */
889
+#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET)	\
890
+do {									\
891
+	ulint	ut_list_i_313;						\
892
+	TYPE*	ut_list_node_313;					\
893
+									\
894
+	if ((BASE).start)						\
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
895
+		(BASE).start = (void*)((byte*)((BASE).start)			\
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
896
+			+ (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
897
+	if ((BASE).end)							\
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
898
+		(BASE).end   = (void*)((byte*)((BASE).end)			\
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
899
+			+ (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
900
+									\
901
+	ut_list_node_313 = (BASE).start;				\
902
+									\
903
+	for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {		\
904
+		ut_a(ut_list_node_313);					\
905
+		if ((ut_list_node_313->NAME).prev)			\
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
906
+			(ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
907
+				+ (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
908
+		if ((ut_list_node_313->NAME).next)			\
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
909
+			(ut_list_node_313->NAME).next =	(void*)((byte*)((ut_list_node_313->NAME).next)\
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
910
+				+ (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
911
+		ut_list_node_313 = (ut_list_node_313->NAME).next;	\
912
+	}								\
913
+									\
914
+	ut_a(ut_list_node_313 == NULL);					\
915
+									\
916
+	ut_list_node_313 = (BASE).end;					\
917
+									\
918
+	for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {		\
919
+		ut_a(ut_list_node_313);					\
920
+		ut_list_node_313 = (ut_list_node_313->NAME).prev;	\
921
+	}								\
922
+									\
923
+	ut_a(ut_list_node_313 == NULL);					\
924
+} while (0)
925
+
926
 #endif
927
 
124.1.1 by kinoyasu
fix bug649408
928
diff -ruN a/storage/innodb_plugin/log/log0recv.c b/storage/innodb_plugin/log/log0recv.c
929
--- a/storage/innodb_plugin/log/log0recv.c	2010-10-01 15:25:27.106299166 +0900
930
+++ b/storage/innodb_plugin/log/log0recv.c	2010-10-01 15:26:33.689261436 +0900
133 by kinoyasu
port maintainer-Yasufumi patches for 5.1.52
931
@@ -2899,6 +2899,7 @@
124.1.1 by kinoyasu
fix bug649408
932
 /*==========================*/
933
 {
934
 	ut_a(!recv_needed_recovery);
935
+	ut_a(!srv_buffer_pool_shm_is_reused);
936
 
937
 	recv_needed_recovery = TRUE;
938
 
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
939
diff -ruN a/storage/innodb_plugin/os/os0proc.c b/storage/innodb_plugin/os/os0proc.c
940
--- a/storage/innodb_plugin/os/os0proc.c	2010-06-04 00:49:59.000000000 +0900
941
+++ b/storage/innodb_plugin/os/os0proc.c	2010-07-14 16:40:16.174322953 +0900
942
@@ -229,3 +229,173 @@
943
 	}
944
 #endif
945
 }
946
+
947
+/****************************************************************//**
948
+Allocates or attaches and reuses shared memory segment.
949
+The content is not cleared automatically.
950
+@return	allocated memory */
951
+UNIV_INTERN
952
+void*
953
+os_shm_alloc(
954
+/*=========*/
955
+	ulint*	n,			/*!< in/out: number of bytes */
956
+	uint	key,
957
+	ibool*	is_new)
958
+{
959
+	void*	ptr;
115.1.1 by kinoyasu
fix innodb_buffer_pool_shm.patch for -Wpointer-arith warnings of GCC
960
+#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
961
+	ulint	size;
962
+	int	shmid;
963
+
964
+	*is_new = FALSE;
965
+	fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
966
+		"InnoDB: The shared memory segment containing the buffer pool is: key  %#x (%d).\n",
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
967
+		key, key);
968
+# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
969
+	if (!os_use_large_pages || !os_large_page_size) {
970
+		goto skip;
971
+	}
972
+
973
+	/* Align block size to os_large_page_size */
974
+	ut_ad(ut_is_2pow(os_large_page_size));
975
+	size = ut_2pow_round(*n + (os_large_page_size - 1),
976
+			     os_large_page_size);
977
+
978
+	shmid = shmget((key_t)key, (size_t)size,
979
+			IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W);
980
+	if (shmid < 0) {
981
+		if (errno == EEXIST) {
982
+			fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
983
+				"InnoDB: HugeTLB: The shared memory segment exists.\n");
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
984
+			shmid = shmget((key_t)key, (size_t)size,
985
+					SHM_HUGETLB | SHM_R | SHM_W);
986
+			if (shmid < 0) {
987
+				fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
988
+					"InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
989
+					size, errno);
990
+				goto skip;
991
+			} else {
992
+				fprintf(stderr,
993
+					"InnoDB: HugeTLB: The existent shared memory segment is used.\n");
994
+			}
995
+		} else {
996
+			fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
997
+				"InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
998
+				size, errno);
999
+			goto skip;
1000
+		}
1001
+	} else {
1002
+		*is_new = TRUE;
1003
+		fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
1004
+			"InnoDB: HugeTLB: A new shared memory segment has been created .\n");
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
1005
+	}
1006
+
1007
+	ptr = shmat(shmid, NULL, 0);
1008
+	if (ptr == (void *)-1) {
1009
+		fprintf(stderr,
1010
+			"InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n",
1011
+			errno);
1012
+		ptr = NULL;
1013
+	}
1014
+
1015
+	if (ptr) {
1016
+		*n = size;
1017
+		os_fast_mutex_lock(&ut_list_mutex);
1018
+		ut_total_allocated_memory += size;
1019
+		os_fast_mutex_unlock(&ut_list_mutex);
1020
+		UNIV_MEM_ALLOC(ptr, size);
1021
+		return(ptr);
1022
+	}
1023
+skip:
1024
+	*is_new = FALSE;
1025
+# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */
1026
+# ifdef HAVE_GETPAGESIZE
1027
+	size = getpagesize();
1028
+# else
1029
+	size = UNIV_PAGE_SIZE;
1030
+# endif
1031
+	/* Align block size to system page size */
1032
+	ut_ad(ut_is_2pow(size));
1033
+	size = *n = ut_2pow_round(*n + (size - 1), size);
1034
+
1035
+	shmid = shmget((key_t)key, (size_t)size,
1036
+			IPC_CREAT | IPC_EXCL | SHM_R | SHM_W);
1037
+	if (shmid < 0) {
1038
+		if (errno == EEXIST) {
1039
+			fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
1040
+				"InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
1041
+			shmid = shmget((key_t)key, (size_t)size,
1042
+					SHM_R | SHM_W);
1043
+			if (shmid < 0) {
1044
+				fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
1045
+					"InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
1046
+					size, errno);
1047
+				ptr = NULL;
1048
+				goto end;
1049
+			} else {
1050
+				fprintf(stderr,
1051
+					"InnoDB: The existent shared memory segment is used.\n");
1052
+			}
1053
+		} else {
1054
+			fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
1055
+				"InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
1056
+				size, errno);
1057
+			ptr = NULL;
1058
+			goto end;
1059
+		}
1060
+	} else {
1061
+		*is_new = TRUE;
1062
+		fprintf(stderr,
108.2.1 by Vadim Tkachenko
Fix error messages for buffer pool in shm
1063
+			"InnoDB: A new shared memory segment has been created.\n");
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
1064
+	}
1065
+
1066
+	ptr = shmat(shmid, NULL, 0);
1067
+	if (ptr == (void *)-1) {
1068
+		fprintf(stderr,
1069
+			"InnoDB: Warning: Failed to attach shared memory segment, errno %d\n",
1070
+			errno);
1071
+		ptr = NULL;
1072
+	}
1073
+
1074
+	if (ptr) {
1075
+		*n = size;
1076
+		os_fast_mutex_lock(&ut_list_mutex);
1077
+		ut_total_allocated_memory += size;
1078
+		os_fast_mutex_unlock(&ut_list_mutex);
1079
+		UNIV_MEM_ALLOC(ptr, size);
1080
+	}
1081
+end:
1082
+#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1083
+	fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1084
+	ptr = NULL;
1085
+#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1086
+	return(ptr);
1087
+}
1088
+
1089
+/****************************************************************//**
1090
+Detach shared memory segment. */
1091
+UNIV_INTERN
1092
+void
1093
+os_shm_free(
1094
+/*========*/
1095
+	void	*ptr,			/*!< in: pointer returned by
1096
+					os_shm_alloc() */
1097
+	ulint	size)			/*!< in: size returned by
1098
+					os_shm_alloc() */
1099
+{
1100
+	os_fast_mutex_lock(&ut_list_mutex);
1101
+	ut_a(ut_total_allocated_memory >= size);
1102
+	os_fast_mutex_unlock(&ut_list_mutex);
1103
+
1104
+#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1105
+	if (!shmdt(ptr)) {
1106
+		os_fast_mutex_lock(&ut_list_mutex);
1107
+		ut_a(ut_total_allocated_memory >= size);
1108
+		ut_total_allocated_memory -= size;
1109
+		os_fast_mutex_unlock(&ut_list_mutex);
1110
+		UNIV_MEM_FREE(ptr, size);
1111
+	}
1112
+#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1113
+	fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1114
+#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1115
+}
1116
diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
1117
--- a/storage/innodb_plugin/srv/srv0srv.c	2010-07-14 16:33:23.848391648 +0900
1118
+++ b/storage/innodb_plugin/srv/srv0srv.c	2010-07-14 16:40:16.177323553 +0900
124.1.1 by kinoyasu
fix bug649408
1119
@@ -211,6 +211,11 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
1120
 UNIV_INTERN ulint	srv_mem_pool_size	= ULINT_MAX;
1121
 UNIV_INTERN ulint	srv_lock_table_size	= ULINT_MAX;
1122
 
1123
+/* key value for shm */
1124
+UNIV_INTERN uint	srv_buffer_pool_shm_key	= 0;
124.1.1 by kinoyasu
fix bug649408
1125
+UNIV_INTERN ibool	srv_buffer_pool_shm_is_reused = FALSE;
119.1.1 by kinoyasu
- New static bool option innodb_buffer_pool_shm_checksum for bug643650
1126
+UNIV_INTERN ibool	srv_buffer_pool_shm_checksum = TRUE;
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
1127
+
1128
 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
1129
 instead. */
1130
 UNIV_INTERN ulint	srv_n_file_io_threads	= ULINT_MAX;
1131
diff -ruN a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c
1132
--- a/storage/innodb_plugin/srv/srv0start.c	2010-07-14 16:33:23.851391514 +0900
1133
+++ b/storage/innodb_plugin/srv/srv0start.c	2010-07-14 16:40:16.180321173 +0900
175 by kinoyasu
Yasufumi patches are ported to 5.1.54
1134
@@ -1744,6 +1744,8 @@
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
1135
 		Note that this is not as heavy weight as it seems. At
1136
 		this point there will be only ONE page in the buf_LRU
1137
 		and there must be no page in the buf_flush list. */
124.1.1 by kinoyasu
fix bug649408
1138
+		/* buffer_pool_shm should not be reused when recovery was needed. */
1139
+		if (!srv_buffer_pool_shm_is_reused)
63.1.1 by kinoyasu
add innodb_buffer_pool_shm_key option
1140
 		buf_pool_invalidate();
1141
 
1142
 		/* We always try to do a recovery, even if the database had