1
%%% ``The contents of this file are subject to the Erlang Public License,
2
%%% Version 1.1, (the "License"); you may not use this file except in
3
%%% compliance with the License. You should have received a copy of the
4
%%% Erlang Public License along with this software. If not, it can be
5
%%% retrieved via the world wide web at http://www.erlang.org/.
7
%%% Software distributed under the License is distributed on an "AS IS"
8
%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
9
%%% the License for the specific language governing rights and limitations
10
%%% under the License.
12
%%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
13
%%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
14
%%% AB. All Rights Reserved.''
16
%%% $Id: mnesia_frag.erl,v 1.1 2008/12/17 09:53:38 mikpe Exp $
18
%%%----------------------------------------------------------------------
19
%%% Purpose : Support tables so large that they need
20
%%% to be divided into several fragments.
21
%%%----------------------------------------------------------------------
26
-behaviour(mnesia_access).
28
%% Callback functions when accessed within an activity
31
write/5, delete/5, delete_object/5,
32
read/5, match_object/5, all_keys/4,
34
index_match_object/6, index_read/6,
53
-include("mnesia.hrl").
55
-define(OLD_HASH_MOD, mnesia_frag_old_hash).
56
-define(DEFAULT_HASH_MOD, mnesia_frag_hash).
57
%%-define(DEFAULT_HASH_MOD, ?OLD_HASH_MOD). %% BUGBUG: New should be default
65
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
70
%% Callback functions which provides transparent
71
%% access of fragmented tables from any activity
74
lock(ActivityId, Opaque, {table , Tab}, LockKind) ->
75
case frag_names(Tab) of
77
mnesia:lock(ActivityId, Opaque, {table, Tab}, LockKind);
79
DeepNs = [mnesia:lock(ActivityId, Opaque, {table, F}, LockKind) ||
81
mnesia_lib:uniq(lists:append(DeepNs))
84
lock(ActivityId, Opaque, LockItem, LockKind) ->
85
mnesia:lock(ActivityId, Opaque, LockItem, LockKind).
87
write(ActivityId, Opaque, Tab, Rec, LockKind) ->
88
Frag = record_to_frag_name(Tab, Rec),
89
mnesia:write(ActivityId, Opaque, Frag, Rec, LockKind).
91
delete(ActivityId, Opaque, Tab, Key, LockKind) ->
92
Frag = key_to_frag_name(Tab, Key),
93
mnesia:delete(ActivityId, Opaque, Frag, Key, LockKind).
95
delete_object(ActivityId, Opaque, Tab, Rec, LockKind) ->
96
Frag = record_to_frag_name(Tab, Rec),
97
mnesia:delete_object(ActivityId, Opaque, Frag, Rec, LockKind).
99
read(ActivityId, Opaque, Tab, Key, LockKind) ->
100
Frag = key_to_frag_name(Tab, Key),
101
mnesia:read(ActivityId, Opaque, Frag, Key, LockKind).
103
match_object(ActivityId, Opaque, Tab, HeadPat, LockKind) ->
104
MatchSpec = [{HeadPat, [], ['$_']}],
105
select(ActivityId, Opaque, Tab, MatchSpec, LockKind).
107
select(ActivityId, Opaque, Tab, MatchSpec, LockKind) ->
108
do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind).
110
all_keys(ActivityId, Opaque, Tab, LockKind) ->
111
Match = [mnesia:all_keys(ActivityId, Opaque, Frag, LockKind)
112
|| Frag <- frag_names(Tab)],
115
index_match_object(ActivityId, Opaque, Tab, Pat, Attr, LockKind) ->
117
[mnesia:index_match_object(ActivityId, Opaque, Frag, Pat, Attr, LockKind)
118
|| Frag <- frag_names(Tab)],
121
index_read(ActivityId, Opaque, Tab, Key, Attr, LockKind) ->
123
[mnesia:index_read(ActivityId, Opaque, Frag, Key, Attr, LockKind)
124
|| Frag <- frag_names(Tab)],
127
foldl(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
128
Fun2 = fun(Frag, A) ->
129
mnesia:foldl(ActivityId, Opaque, Fun, A, Frag, LockKind)
131
lists:foldl(Fun2, Acc, frag_names(Tab)).
133
foldr(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
134
Fun2 = fun(Frag, A) ->
135
mnesia:foldr(ActivityId, Opaque, Fun, A, Frag, LockKind)
137
lists:foldr(Fun2, Acc, frag_names(Tab)).
139
table_info(ActivityId, Opaque, {Tab, Key}, Item) ->
140
Frag = key_to_frag_name(Tab, Key),
141
table_info2(ActivityId, Opaque, Tab, Frag, Item);
142
table_info(ActivityId, Opaque, Tab, Item) ->
143
table_info2(ActivityId, Opaque, Tab, Tab, Item).
145
table_info2(ActivityId, Opaque, Tab, Frag, Item) ->
148
SumFun = fun({_, Size}, Acc) -> Acc + Size end,
149
lists:foldl(SumFun, 0, frag_size(ActivityId, Opaque, Tab));
151
SumFun = fun({_, Size}, Acc) -> Acc + Size end,
152
lists:foldl(SumFun, 0, frag_memory(ActivityId, Opaque, Tab));
154
lookup_prop(Tab, base_table);
156
lookup_prop(Tab, node_pool);
158
FH = lookup_frag_hash(Tab),
159
FH#frag_state.n_fragments;
161
FH = lookup_frag_hash(Tab),
162
FH#frag_state.foreign_key;
164
lookup_foreigners(Tab);
166
length(val({Tab, ram_copies}));
168
length(val({Tab, disc_copies}));
169
n_disc_only_copies ->
170
length(val({Tab, disc_only_copies}));
177
frag_size(ActivityId, Opaque, Tab);
179
frag_memory(ActivityId, Opaque, Tab);
181
mnesia:table_info(ActivityId, Opaque, Frag, Item)
185
frag_size(ActivityId, Opaque, Tab) ->
186
[{F, remote_table_info(ActivityId, Opaque, F, size)} || F <- frag_names(Tab)].
188
frag_memory(ActivityId, Opaque, Tab) ->
189
[{F, remote_table_info(ActivityId, Opaque, F, memory)} || F <- frag_names(Tab)].
193
remote_table_info(ActivityId, Opaque, Tab, Item) ->
194
N = val({Tab, where_to_read}),
195
case rpc:call(N, mnesia, table_info, [ActivityId, Opaque, Tab, Item]) of
197
mnesia:abort({no_exists, Tab, Item});
202
do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind) ->
203
case ?catch_val({Tab, frag_hash}) of
205
mnesia:select(ActivityId, Opaque, Tab, MatchSpec, LockKind);
207
HashState = FH#frag_state.hash_state,
209
case FH#frag_state.hash_module of
210
HashMod when HashMod == ?DEFAULT_HASH_MOD ->
211
?DEFAULT_HASH_MOD:match_spec_to_frag_numbers(HashState, MatchSpec);
213
HashMod:match_spec_to_frag_numbers(HashState, MatchSpec)
215
N = FH#frag_state.n_fragments,
216
VerifyFun = fun(F) when integer(F), F >= 1, F =< N -> false;
219
case catch lists:filter(VerifyFun, FragNumbers) of
222
Name = n_to_frag_name(Tab, Num),
223
Node = val({Name, where_to_read}),
224
mnesia:lock(ActivityId, Opaque, {table, Name}, LockKind),
227
NameNodes = lists:map(Fun, FragNumbers),
229
fun(PatchedMatchSpec) ->
230
Match = [mnesia:dirty_select(Name, PatchedMatchSpec)
231
|| {Name, _Node} <- NameNodes],
234
case [{Name, Node} || {Name, Node} <- NameNodes, Node /= node()] of
236
%% All fragments are local
237
mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectAllFun);
240
fun(PatchedMatchSpec) ->
242
Args = [self(), Ref, RemoteNameNodes, PatchedMatchSpec],
243
Pid = spawn_link(?MODULE, local_select, Args),
244
LocalMatch = [mnesia:dirty_select(Name, PatchedMatchSpec)
245
|| {Name, Node} <- NameNodes, Node == node()],
246
OldSelectFun = fun() -> SelectAllFun(PatchedMatchSpec) end,
247
local_collect(Ref, Pid, lists:append(LocalMatch), OldSelectFun)
249
mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectFun)
252
mnesia:abort({"match_spec_to_frag_numbers: Fragment numbers out of range",
253
BadFrags, {range, 1, N}})
257
local_select(ReplyTo, Ref, RemoteNameNodes, MatchSpec) ->
258
RemoteNodes = mnesia_lib:uniq([Node || {_Name, Node} <- RemoteNameNodes]),
259
Args = [ReplyTo, Ref, RemoteNameNodes, MatchSpec],
260
{Replies, BadNodes} = rpc:multicall(RemoteNodes, ?MODULE, remote_select, Args),
261
case mnesia_lib:uniq(Replies) -- [ok] of
262
[] when BadNodes == [] ->
263
ReplyTo ! {local_select, Ref, ok};
264
_ when BadNodes /= [] ->
265
ReplyTo ! {local_select, Ref, {error, {node_not_running, hd(BadNodes)}}};
266
[{badrpc, {'EXIT', Reason}} | _] ->
267
ReplyTo ! {local_select, Ref, {error, Reason}};
269
ReplyTo ! {local_select, Ref, {error, Reason}}
274
remote_select(ReplyTo, Ref, NameNodes, MatchSpec) ->
275
do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec).
277
do_remote_select(ReplyTo, Ref, [{Name, Node} | NameNodes], MatchSpec) ->
280
Res = (catch {ok, mnesia:dirty_select(Name, MatchSpec)}),
281
ReplyTo ! {remote_select, Ref, Node, Res},
282
do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec);
284
do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec)
286
do_remote_select(_ReplyTo, _Ref, [], _MatchSpec) ->
289
local_collect(Ref, Pid, LocalMatch, OldSelectFun) ->
291
{local_select, Ref, LocalRes} ->
292
remote_collect(Ref, LocalRes, LocalMatch, OldSelectFun);
293
{'EXIT', Pid, Reason} ->
294
remote_collect(Ref, {error, Reason}, [], OldSelectFun)
297
remote_collect(Ref, LocalRes = ok, Acc, OldSelectFun) ->
299
{remote_select, Ref, Node, RemoteRes} ->
302
remote_collect(Ref, LocalRes, RemoteMatch ++ Acc, OldSelectFun);
304
remote_collect(Ref, {error, {node_not_running, Node}}, [], OldSelectFun)
309
remote_collect(Ref, LocalRes = {error, Reason}, _Acc, OldSelectFun) ->
311
{remote_select, Ref, _Node, _RemoteRes} ->
312
remote_collect(Ref, LocalRes, [], OldSelectFun)
317
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
318
%% Returns a list of cstructs
320
expand_cstruct(Cs) ->
321
expand_cstruct(Cs, create).
323
expand_cstruct(Cs, Mode) ->
324
Tab = Cs#cstruct.name,
325
Props = Cs#cstruct.frag_properties,
326
mnesia_schema:verify({alt, [nil, list]}, mnesia_lib:etype(Props),
327
{badarg, Tab, Props}),
329
ValidKeys = [foreign_key, n_fragments, node_pool,
330
n_ram_copies, n_disc_copies, n_disc_only_copies,
331
hash_module, hash_state],
332
Keys = mnesia_schema:check_keys(Tab, Props, ValidKeys),
333
mnesia_schema:check_duplicates(Tab, Keys),
335
%% Pick fragmentation props
336
ForeignKey = mnesia_schema:pick(Tab, foreign_key, Props, undefined),
337
{ForeignKey2, N, Pool, DefaultNR, DefaultND, DefaultNDO} =
338
pick_props(Tab, Cs, ForeignKey),
341
BadPool = {bad_type, Tab, {node_pool, Pool}},
342
mnesia_schema:verify(list, mnesia_lib:etype(Pool), BadPool),
343
NotAtom = fun(A) when atom(A) -> false;
346
mnesia_schema:verify([], [P || P <- Pool, NotAtom(P)], BadPool),
348
NR = mnesia_schema:pick(Tab, n_ram_copies, Props, 0),
349
ND = mnesia_schema:pick(Tab, n_disc_copies, Props, 0),
350
NDO = mnesia_schema:pick(Tab, n_disc_only_copies, Props, 0),
352
PosInt = fun(I) when integer(I), I >= 0 -> true;
355
mnesia_schema:verify(true, PosInt(NR),
356
{bad_type, Tab, {n_ram_copies, NR}}),
357
mnesia_schema:verify(true, PosInt(ND),
358
{bad_type, Tab, {n_disc_copies, ND}}),
359
mnesia_schema:verify(true, PosInt(NDO),
360
{bad_type, Tab, {n_disc_only_copies, NDO}}),
362
%% Verify n_fragments
363
Cs2 = verify_n_fragments(N, Cs, Mode),
365
%% Verify hash callback
366
HashMod = mnesia_schema:pick(Tab, hash_module, Props, ?DEFAULT_HASH_MOD),
367
HashState = mnesia_schema:pick(Tab, hash_state, Props, undefined),
368
HashState2 = HashMod:init_state(Tab, HashState), %% BUGBUG: Catch?
370
FH = #frag_state{foreign_key = ForeignKey2,
372
hash_module = HashMod,
373
hash_state = HashState2},
375
NR == 0, ND == 0, NDO == 0 ->
376
do_expand_cstruct(Cs2, FH, N, Pool, DefaultNR, DefaultND, DefaultNDO, Mode);
378
do_expand_cstruct(Cs2, FH, N, Pool, NR, ND, NDO, Mode)
381
do_expand_cstruct(Cs, FH, N, Pool, NR, ND, NDO, Mode) ->
382
Tab = Cs#cstruct.name,
384
LC = Cs#cstruct.local_content,
385
mnesia_schema:verify(false, LC,
386
{combine_error, Tab, {local_content, LC}}),
388
Snmp = Cs#cstruct.snmp,
389
mnesia_schema:verify([], Snmp,
390
{combine_error, Tab, {snmp, Snmp}}),
392
%% Add empty fragments
393
CommonProps = [{base_table, Tab}],
394
Cs2 = Cs#cstruct{frag_properties = lists:sort(CommonProps)},
395
expand_frag_cstructs(N, NR, ND, NDO, Cs2, Pool, Pool, FH, Mode).
397
verify_n_fragments(N, Cs, Mode) when integer(N), N >= 1 ->
400
Cs#cstruct{ram_copies = [],
402
disc_only_copies = []};
404
Reason = {combine_error, Cs#cstruct.name, {n_fragments, N}},
405
mnesia_schema:verify(1, N, Reason),
408
verify_n_fragments(N, Cs, _Mode) ->
409
mnesia:abort({bad_type, Cs#cstruct.name, {n_fragments, N}}).
411
pick_props(Tab, Cs, {ForeignTab, Attr}) ->
412
mnesia_schema:verify(true, ForeignTab /= Tab,
413
{combine_error, Tab, {ForeignTab, Attr}}),
414
Props = Cs#cstruct.frag_properties,
415
Attrs = Cs#cstruct.attributes,
417
ForeignKey = lookup_prop(ForeignTab, foreign_key),
418
ForeignN = lookup_prop(ForeignTab, n_fragments),
419
ForeignPool = lookup_prop(ForeignTab, node_pool),
420
N = mnesia_schema:pick(Tab, n_fragments, Props, ForeignN),
421
Pool = mnesia_schema:pick(Tab, node_pool, Props, ForeignPool),
423
mnesia_schema:verify(ForeignN, N,
424
{combine_error, Tab, {n_fragments, N},
425
ForeignTab, {n_fragments, ForeignN}}),
427
mnesia_schema:verify(ForeignPool, Pool,
428
{combine_error, Tab, {node_pool, Pool},
429
ForeignTab, {node_pool, ForeignPool}}),
431
mnesia_schema:verify(undefined, ForeignKey,
433
"Multiple levels of foreign_key dependencies",
434
{ForeignTab, Attr}, ForeignKey}),
436
Key = {ForeignTab, mnesia_schema:attr_to_pos(Attr, Attrs)},
437
DefaultNR = length(val({ForeignTab, ram_copies})),
438
DefaultND = length(val({ForeignTab, disc_copies})),
439
DefaultNDO = length(val({ForeignTab, disc_only_copies})),
440
{Key, N, Pool, DefaultNR, DefaultND, DefaultNDO};
441
pick_props(Tab, Cs, undefined) ->
442
Props = Cs#cstruct.frag_properties,
444
DefaultPool = mnesia:system_info(db_nodes),
445
N = mnesia_schema:pick(Tab, n_fragments, Props, DefaultN),
446
Pool = mnesia_schema:pick(Tab, node_pool, Props, DefaultPool),
450
{undefined, N, Pool, DefaultNR, DefaultND, DefaultNDO};
451
pick_props(Tab, _Cs, BadKey) ->
452
mnesia:abort({bad_type, Tab, {foreign_key, BadKey}}).
454
expand_frag_cstructs(N, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode)
455
when N > 1, Mode == create ->
456
Frag = n_to_frag_name(CommonCs#cstruct.name, N),
457
Cs = CommonCs#cstruct{name = Frag},
458
{Cs2, RevModDist, RestDist} = set_frag_nodes(NR, ND, NDO, Cs, Dist, []),
459
ModDist = lists:reverse(RevModDist),
460
Dist2 = rearrange_dist(Cs, ModDist, RestDist, Pool),
461
%% Adjusts backwards, but it doesn't matter.
462
{FH2, _FromFrags, _AdditionalWriteFrags} = adjust_before_split(FH),
463
CsList = expand_frag_cstructs(N - 1, NR, ND, NDO, CommonCs, Dist2, Pool, FH2, Mode),
465
expand_frag_cstructs(1, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode) ->
466
BaseProps = CommonCs#cstruct.frag_properties ++
467
[{foreign_key, FH#frag_state.foreign_key},
468
{hash_module, FH#frag_state.hash_module},
469
{hash_state, FH#frag_state.hash_state},
470
{n_fragments, FH#frag_state.n_fragments},
473
BaseCs = CommonCs#cstruct{frag_properties = lists:sort(BaseProps)},
478
{BaseCs2, _, _} = set_frag_nodes(NR, ND, NDO, BaseCs, Dist, []),
482
set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NR > 0 ->
483
Pos = #cstruct.ram_copies,
484
{Cs2, Head2} = set_frag_node(Cs, Pos, Head),
485
set_frag_nodes(NR - 1, ND, NDO, Cs2, Tail, [Head2 | Acc]);
486
set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when ND > 0 ->
487
Pos = #cstruct.disc_copies,
488
{Cs2, Head2} = set_frag_node(Cs, Pos, Head),
489
set_frag_nodes(NR, ND - 1, NDO, Cs2, Tail, [Head2 | Acc]);
490
set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NDO > 0 ->
491
Pos = #cstruct.disc_only_copies,
492
{Cs2, Head2} = set_frag_node(Cs, Pos, Head),
493
set_frag_nodes(NR, ND, NDO - 1, Cs2, Tail, [Head2 | Acc]);
494
set_frag_nodes(0, 0, 0, Cs, RestDist, ModDist) ->
495
{Cs, ModDist, RestDist};
496
set_frag_nodes(_, _, _, Cs, [], _) ->
497
mnesia:abort({combine_error, Cs#cstruct.name, "Too few nodes in node_pool"}).
499
set_frag_node(Cs, Pos, Head) ->
500
Ns = element(Pos, Cs),
503
{N, Count} when atom(N), integer(Count), Count >= 0 ->
508
mnesia:abort({bad_type, Cs#cstruct.name, BadNode})
510
Cs2 = setelement(Pos, Cs, [Node | Ns]),
511
{Cs2, {Node, Count2}}.
513
rearrange_dist(Cs, [{Node, Count} | ModDist], Dist, Pool) ->
514
Dist2 = insert_dist(Cs, Node, Count, Dist, Pool),
515
rearrange_dist(Cs, ModDist, Dist2, Pool);
516
rearrange_dist(_Cs, [], Dist, _) ->
519
insert_dist(Cs, Node, Count, [Head | Tail], Pool) ->
521
{Node2, Count2} when atom(Node2), integer(Count2), Count2 >= 0 ->
522
case node_diff(Node, Count, Node2, Count2, Pool) of
524
[{Node, Count}, Head | Tail];
526
[Head | insert_dist(Cs, Node, Count, Tail, Pool)]
528
Node2 when atom(Node2) ->
529
insert_dist(Cs, Node, Count, [{Node2, 0} | Tail], Pool);
531
mnesia:abort({bad_type, Cs#cstruct.name, BadNode})
533
insert_dist(_Cs, Node, Count, [], _Pool) ->
535
insert_dist(_Cs, _Node, _Count, Dist, _Pool) ->
536
mnesia:abort({bad_type, Dist}).
538
node_diff(_Node, Count, _Node2, Count2, _Pool) when Count < Count2 ->
540
node_diff(Node, Count, Node2, Count2, Pool) when Count == Count2 ->
541
Pos = list_pos(Node, Pool, 1),
542
Pos2 = list_pos(Node2, Pool, 1),
549
node_diff(_Node, Count, _Node2, Count2, _Pool) when Count > Count2 ->
552
%% Returns position of element in list
553
list_pos(H, [H | _T], Pos) ->
555
list_pos(E, [_H | T], Pos) ->
556
list_pos(E, T, Pos + 1).
558
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
559
%% Switch function for changing of table fragmentation
561
%% Returns a list of lists of schema ops
563
change_table_frag(Tab, {activate, FragProps}) ->
564
make_activate(Tab, FragProps);
565
change_table_frag(Tab, deactivate) ->
566
make_deactivate(Tab);
567
change_table_frag(Tab, {add_frag, SortedNodes}) ->
568
make_multi_add_frag(Tab, SortedNodes);
569
change_table_frag(Tab, del_frag) ->
570
make_multi_del_frag(Tab);
571
change_table_frag(Tab, {add_node, Node}) ->
572
make_multi_add_node(Tab, Node);
573
change_table_frag(Tab, {del_node, Node}) ->
574
make_multi_del_node(Tab, Node);
575
change_table_frag(Tab, Change) ->
576
mnesia:abort({bad_type, Tab, Change}).
578
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
579
%% Turn a normal table into a fragmented table
581
%% The storage type must be the same on all nodes
583
make_activate(Tab, Props) ->
584
Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
585
mnesia_schema:ensure_active(Cs),
586
case Cs#cstruct.frag_properties of
588
Cs2 = Cs#cstruct{frag_properties = Props},
589
[Cs3] = expand_cstruct(Cs2, activate),
590
TabDef = mnesia_schema:cs2list(Cs3),
591
Op = {op, change_table_frag, activate, TabDef},
594
mnesia:abort({already_exists, Tab, {frag_properties, BadProps}})
597
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
598
%% Turn a table into a normal defragmented table
600
make_deactivate(Tab) ->
601
Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
602
mnesia_schema:ensure_active(Cs),
603
Foreigners = lookup_foreigners(Tab),
604
BaseTab = lookup_prop(Tab, base_table),
605
FH = lookup_frag_hash(Tab),
608
mnesia:abort({combine_error, Tab, "Not a base table"});
610
mnesia:abort({combine_error, Tab, "Too many foreigners", Foreigners});
611
FH#frag_state.n_fragments > 1 ->
612
mnesia:abort({combine_error, Tab, "Too many fragments"});
614
Cs2 = Cs#cstruct{frag_properties = []},
615
TabDef = mnesia_schema:cs2list(Cs2),
616
Op = {op, change_table_frag, deactivate, TabDef},
620
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
621
%% Add a fragment to a fragmented table and fill it with half of
622
%% the records from one of the old fragments
624
make_multi_add_frag(Tab, SortedNs) when list(SortedNs) ->
626
Ops = make_add_frag(Tab, SortedNs),
628
%% Propagate to foreigners
629
MoreOps = [make_add_frag(T, SortedNs) || T <- lookup_foreigners(Tab)],
631
make_multi_add_frag(Tab, SortedNs) ->
632
mnesia:abort({bad_type, Tab, SortedNs}).
635
FH = lookup_frag_hash(Tab),
636
ForeignKey = FH#frag_state.foreign_key,
637
mnesia_schema:verify(undefined, ForeignKey,
639
"Op only allowed via foreign table",
640
{foreign_key, ForeignKey}}).
642
make_frag_names_and_acquire_locks(Tab, N, FragIndecies, DoNotLockN) ->
643
mnesia_schema:get_tid_ts_and_lock(Tab, write),
644
Fun = fun(Index, FN) ->
646
DoNotLockN == true, Index == N ->
647
Name = n_to_frag_name(Tab, Index),
648
setelement(Index, FN, Name);
650
Name = n_to_frag_name(Tab, Index),
651
mnesia_schema:get_tid_ts_and_lock(Name, write),
652
setelement(Index , FN, Name)
655
FragNames = erlang:make_tuple(N, undefined),
656
lists:foldl(Fun, FragNames, FragIndecies).
658
make_add_frag(Tab, SortedNs) ->
659
Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
660
mnesia_schema:ensure_active(Cs),
661
FH = lookup_frag_hash(Tab),
662
{FH2, FromIndecies, WriteIndecies} = adjust_before_split(FH),
663
N = FH2#frag_state.n_fragments,
664
FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, true),
665
NewFrag = element(N, FragNames),
667
NR = length(Cs#cstruct.ram_copies),
668
ND = length(Cs#cstruct.disc_copies),
669
NDO = length(Cs#cstruct.disc_only_copies),
670
NewCs = Cs#cstruct{name = NewFrag,
671
frag_properties = [{base_table, Tab}],
674
disc_only_copies = []},
675
{NewCs2, _, _} = set_frag_nodes(NR, ND, NDO, NewCs, SortedNs, []),
676
[NewOp] = mnesia_schema:make_create_table(NewCs2),
678
SplitOps = split(Tab, FH2, FromIndecies, FragNames, []),
680
Cs2 = replace_frag_hash(Cs, FH2),
681
TabDef = mnesia_schema:cs2list(Cs2),
682
BaseOp = {op, change_table_frag, {add_frag, SortedNs}, TabDef},
684
[BaseOp, NewOp | SplitOps].
686
replace_frag_hash(Cs, FH) when record(FH, frag_state) ->
690
{true, {n_fragments, FH#frag_state.n_fragments}};
692
{true, {hash_module, FH#frag_state.hash_module}};
694
{true, {hash_state, FH#frag_state.hash_state}};
695
{next_n_to_split, _} ->
703
Props = lists:zf(Fun, Cs#cstruct.frag_properties),
704
Cs#cstruct{frag_properties = Props}.
706
%% Adjust table info before split
707
adjust_before_split(FH) ->
708
HashState = FH#frag_state.hash_state,
709
{HashState2, FromFrags, AdditionalWriteFrags} =
710
case FH#frag_state.hash_module of
711
HashMod when HashMod == ?DEFAULT_HASH_MOD ->
712
?DEFAULT_HASH_MOD:add_frag(HashState);
714
HashMod:add_frag(HashState)
716
N = FH#frag_state.n_fragments + 1,
717
FromFrags2 = (catch lists:sort(FromFrags)),
718
UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))),
719
VerifyFun = fun(F) when integer(F), F >= 1, F =< N -> false;
722
case catch lists:filter(VerifyFun, UnionFrags) of
724
FH2 = FH#frag_state{n_fragments = N,
725
hash_state = HashState2},
726
{FH2, FromFrags2, UnionFrags};
728
mnesia:abort({"add_frag: Fragment numbers out of range",
729
BadFrags, {range, 1, N}})
732
split(Tab, FH, [SplitN | SplitNs], FragNames, Ops) ->
733
SplitFrag = element(SplitN, FragNames),
734
Pat = mnesia:table_info(SplitFrag, wild_pattern),
735
{_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none),
736
Recs = mnesia:match_object(Tid, Ts, SplitFrag, Pat, read),
737
Ops2 = do_split(FH, SplitN, FragNames, Recs, Ops),
738
split(Tab, FH, SplitNs, FragNames, Ops2);
739
split(_Tab, _FH, [], _FragNames, Ops) ->
742
%% Perform the split of the table
743
do_split(FH, OldN, FragNames, [Rec | Recs], Ops) ->
745
HashKey = element(Pos, Rec),
746
case key_to_n(FH, HashKey) of
747
NewN when NewN == OldN ->
748
%% Keep record in the same fragment. No need to move it.
749
do_split(FH, OldN, FragNames, Recs, Ops);
751
case element(NewN, FragNames) of
752
NewFrag when NewFrag /= undefined ->
753
OldFrag = element(OldN, FragNames),
754
Key = element(2, Rec),
755
NewOid = {NewFrag, Key},
756
OldOid = {OldFrag, Key},
757
Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}},
758
{op, rec, unknown, {OldOid, [OldOid], delete}} | Ops],
759
do_split(FH, OldN, FragNames, Recs, Ops2);
761
%% Tried to move record to fragment that not is locked
762
mnesia:abort({"add_frag: Fragment not locked", NewN})
765
do_split(_FH, _OldN, _FragNames, [], Ops) ->
768
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
769
%% Delete a fragment from a fragmented table
770
%% and merge its records with an other fragment
772
make_multi_del_frag(Tab) ->
774
Ops = make_del_frag(Tab),
776
%% Propagate to foreigners
777
MoreOps = [make_del_frag(T) || T <- lookup_foreigners(Tab)],
780
make_del_frag(Tab) ->
781
FH = lookup_frag_hash(Tab),
782
case FH#frag_state.n_fragments of
784
Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
785
mnesia_schema:ensure_active(Cs),
786
{FH2, FromIndecies, WriteIndecies} = adjust_before_merge(FH),
787
FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, false),
789
MergeOps = merge(Tab, FH2, FromIndecies, FragNames, []),
790
LastFrag = element(N, FragNames),
791
[LastOp] = mnesia_schema:make_delete_table(LastFrag, single_frag),
792
Cs2 = replace_frag_hash(Cs, FH2),
793
TabDef = mnesia_schema:cs2list(Cs2),
794
BaseOp = {op, change_table_frag, del_frag, TabDef},
795
[BaseOp, LastOp | MergeOps];
797
%% Cannot remove the last fragment
798
mnesia:abort({no_exists, Tab})
801
%% Adjust tab info before merge
802
adjust_before_merge(FH) ->
803
HashState = FH#frag_state.hash_state,
804
{HashState2, FromFrags, AdditionalWriteFrags} =
805
case FH#frag_state.hash_module of
806
HashMod when HashMod == ?DEFAULT_HASH_MOD ->
807
?DEFAULT_HASH_MOD:del_frag(HashState);
809
HashMod:del_frag(HashState)
811
N = FH#frag_state.n_fragments,
812
FromFrags2 = (catch lists:sort(FromFrags)),
813
UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))),
814
VerifyFun = fun(F) when integer(F), F >= 1, F =< N -> false;
817
case catch lists:filter(VerifyFun, UnionFrags) of
819
case lists:member(N, FromFrags2) of
821
FH2 = FH#frag_state{n_fragments = N - 1,
822
hash_state = HashState2},
823
{FH2, FromFrags2, UnionFrags};
825
mnesia:abort({"del_frag: Last fragment number not included", N})
828
mnesia:abort({"del_frag: Fragment numbers out of range",
829
BadFrags, {range, 1, N}})
832
merge(Tab, FH, [FromN | FromNs], FragNames, Ops) ->
833
FromFrag = element(FromN, FragNames),
834
Pat = mnesia:table_info(FromFrag, wild_pattern),
835
{_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none),
836
Recs = mnesia:match_object(Tid, Ts, FromFrag, Pat, read),
837
Ops2 = do_merge(FH, FromN, FragNames, Recs, Ops),
838
merge(Tab, FH, FromNs, FragNames, Ops2);
839
merge(_Tab, _FH, [], _FragNames, Ops) ->
842
%% Perform the merge of the table
843
do_merge(FH, OldN, FragNames, [Rec | Recs], Ops) ->
845
LastN = FH#frag_state.n_fragments + 1,
846
HashKey = element(Pos, Rec),
847
case key_to_n(FH, HashKey) of
848
NewN when NewN == LastN ->
849
%% Tried to leave a record in the fragment that is to be deleted
850
mnesia:abort({"del_frag: Fragment number out of range",
851
NewN, {range, 1, LastN}});
852
NewN when NewN == OldN ->
853
%% Keep record in the same fragment. No need to move it.
854
do_merge(FH, OldN, FragNames, Recs, Ops);
855
NewN when OldN == LastN ->
856
%% Move record from the fragment that is to be deleted
857
%% No need to create a delete op for each record.
858
case element(NewN, FragNames) of
859
NewFrag when NewFrag /= undefined ->
860
Key = element(2, Rec),
861
NewOid = {NewFrag, Key},
862
Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}} | Ops],
863
do_merge(FH, OldN, FragNames, Recs, Ops2);
865
%% Tried to move record to fragment that not is locked
866
mnesia:abort({"del_frag: Fragment not locked", NewN})
869
case element(NewN, FragNames) of
870
NewFrag when NewFrag /= undefined ->
871
OldFrag = element(OldN, FragNames),
872
Key = element(2, Rec),
873
NewOid = {NewFrag, Key},
874
OldOid = {OldFrag, Key},
875
Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}},
876
{op, rec, unknown, {OldOid, [OldOid], delete}} | Ops],
877
do_merge(FH, OldN, FragNames, Recs, Ops2);
879
%% Tried to move record to fragment that not is locked
880
mnesia:abort({"del_frag: Fragment not locked", NewN})
883
do_merge(_FH, _OldN, _FragNames, [], Ops) ->
886
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
887
%% Add a node to the node pool of a fragmented table
889
make_multi_add_node(Tab, Node) ->
891
Ops = make_add_node(Tab, Node),
893
%% Propagate to foreigners
894
MoreOps = [make_add_node(T, Node) || T <- lookup_foreigners(Tab)],
897
make_add_node(Tab, Node) when atom(Node) ->
898
Pool = lookup_prop(Tab, node_pool),
899
case lists:member(Node, Pool) of
901
Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
902
Pool2 = Pool ++ [Node],
903
Props = Cs#cstruct.frag_properties,
904
Props2 = lists:keyreplace(node_pool, 1, Props, {node_pool, Pool2}),
905
Cs2 = Cs#cstruct{frag_properties = Props2},
906
TabDef = mnesia_schema:cs2list(Cs2),
907
Op = {op, change_table_frag, {add_node, Node}, TabDef},
910
mnesia:abort({already_exists, Tab, Node})
912
make_add_node(Tab, Node) ->
913
mnesia:abort({bad_type, Tab, Node}).
915
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
916
%% Delet a node from the node pool of a fragmented table
918
make_multi_del_node(Tab, Node) ->
920
Ops = make_del_node(Tab, Node),
922
%% Propagate to foreigners
923
MoreOps = [make_del_node(T, Node) || T <- lookup_foreigners(Tab)],
926
make_del_node(Tab, Node) when atom(Node) ->
927
Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
928
mnesia_schema:ensure_active(Cs),
929
Pool = lookup_prop(Tab, node_pool),
930
case lists:member(Node, Pool) of
932
Pool2 = Pool -- [Node],
933
Props = lists:keyreplace(node_pool, 1, Cs#cstruct.frag_properties, {node_pool, Pool2}),
934
Cs2 = Cs#cstruct{frag_properties = Props},
935
TabDef = mnesia_schema:cs2list(Cs2),
936
Op = {op, change_table_frag, {del_node, Node}, TabDef},
939
mnesia:abort({no_exists, Tab, Node})
941
make_del_node(Tab, Node) ->
942
mnesia:abort({bad_type, Tab, Node}).
944
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
945
%% Special case used to remove all references to a node during
946
%% mnesia:del_table_copy(schema, Node)
948
remove_node(Node, Cs) ->
949
Tab = Cs#cstruct.name,
950
case is_top_frag(Tab) of
954
Pool = lookup_prop(Tab, node_pool),
955
case lists:member(Node, Pool) of
957
Pool2 = Pool -- [Node],
958
Props = lists:keyreplace(node_pool, 1,
959
Cs#cstruct.frag_properties,
961
{Cs#cstruct{frag_properties = Props}, true};
967
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
971
case ?catch_val(Var) of
972
{'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
976
set_frag_hash(Tab, Props) ->
977
case props_to_frag_hash(Tab, Props) of
978
FH when record(FH, frag_state) ->
979
mnesia_lib:set({Tab, frag_hash}, FH);
981
mnesia_lib:unset({Tab, frag_hash})
984
props_to_frag_hash(_Tab, []) ->
986
props_to_frag_hash(Tab, Props) ->
987
case mnesia_schema:pick(Tab, base_table, Props, undefined) of
989
Foreign = mnesia_schema:pick(Tab, foreign_key, Props, must),
990
N = mnesia_schema:pick(Tab, n_fragments, Props, must),
992
case mnesia_schema:pick(Tab, hash_module, Props, undefined) of
994
Split = mnesia_schema:pick(Tab, next_n_to_split, Props, must),
995
Doubles = mnesia_schema:pick(Tab, n_doubles, Props, must),
996
FH = {frag_hash, Foreign, N, Split, Doubles},
997
HashState = ?OLD_HASH_MOD:init_state(Tab, FH),
998
#frag_state{foreign_key = Foreign,
1000
hash_module = ?OLD_HASH_MOD,
1001
hash_state = HashState};
1003
HashState = mnesia_schema:pick(Tab, hash_state, Props, must),
1004
#frag_state{foreign_key = Foreign,
1006
hash_module = HashMod,
1007
hash_state = HashState}
1008
%% Old style. Kept for backwards compatibility.
1014
lookup_prop(Tab, Prop) ->
1015
Props = val({Tab, frag_properties}),
1016
case lists:keysearch(Prop, 1, Props) of
1017
{value, {Prop, Val}} ->
1020
mnesia:abort({no_exists, Tab, Prop, {frag_properties, Props}})
1023
lookup_frag_hash(Tab) ->
1024
case ?catch_val({Tab, frag_hash}) of
1025
FH when record(FH, frag_state) ->
1027
{frag_hash, K, N, _S, _D} = FH ->
1028
%% Old style. Kept for backwards compatibility.
1029
HashState = ?OLD_HASH_MOD:init_state(Tab, FH),
1030
#frag_state{foreign_key = K,
1032
hash_module = ?OLD_HASH_MOD,
1033
hash_state = HashState};
1035
mnesia:abort({no_exists, Tab, frag_properties, frag_hash})
1039
case ?catch_val({Tab, frag_hash}) of
1043
[] == lookup_foreigners(Tab)
1046
%% Returns a list of tables
1047
lookup_foreigners(Tab) ->
1048
%% First field in HashPat is either frag_hash or frag_state
1049
HashPat = {'_', {Tab, '_'}, '_', '_', '_'},
1050
[T || [T] <- ?ets_match(mnesia_gvar, {{'$1', frag_hash}, HashPat})].
1052
%% Returns name of fragment table
1053
record_to_frag_name(Tab, Rec) ->
1054
case ?catch_val({Tab, frag_hash}) of
1059
Key = element(Pos, Rec),
1060
N = key_to_n(FH, Key),
1061
n_to_frag_name(Tab, N)
1065
case FH#frag_state.foreign_key of
1068
{_ForeignTab, Pos} ->
1072
%% Returns name of fragment table
1073
key_to_frag_name({BaseTab, _} = Tab, Key) ->
1074
N = key_to_frag_number(Tab, Key),
1075
n_to_frag_name(BaseTab, N);
1076
key_to_frag_name(Tab, Key) ->
1077
N = key_to_frag_number(Tab, Key),
1078
n_to_frag_name(Tab, N).
1080
%% Returns name of fragment table
1081
n_to_frag_name(Tab, 1) ->
1083
n_to_frag_name(Tab, N) when atom(Tab), integer(N) ->
1084
list_to_atom(atom_to_list(Tab) ++ "_frag" ++ integer_to_list(N));
1085
n_to_frag_name(Tab, N) ->
1086
mnesia:abort({bad_type, Tab, N}).
1088
%% Returns name of fragment table
1089
key_to_frag_number({Tab, ForeignKey}, _Key) ->
1090
FH = val({Tab, frag_hash}),
1091
case FH#frag_state.foreign_key of
1092
{_ForeignTab, _Pos} ->
1093
key_to_n(FH, ForeignKey);
1095
mnesia:abort({combine_error, Tab, frag_properties,
1096
{foreign_key, undefined}})
1098
key_to_frag_number(Tab, Key) ->
1099
case ?catch_val({Tab, frag_hash}) of
1106
%% Returns fragment number
1107
key_to_n(FH, Key) ->
1108
HashState = FH#frag_state.hash_state,
1110
case FH#frag_state.hash_module of
1111
HashMod when HashMod == ?DEFAULT_HASH_MOD ->
1112
?DEFAULT_HASH_MOD:key_to_frag_number(HashState, Key);
1114
HashMod:key_to_frag_number(HashState, Key)
1117
integer(N), N >= 1, N =< FH#frag_state.n_fragments ->
1120
mnesia:abort({"key_to_frag_number: Fragment number out of range",
1121
N, {range, 1, FH#frag_state.n_fragments}})
1124
%% Returns a list of frament table names
1126
case ?catch_val({Tab, frag_hash}) of
1130
N = FH#frag_state.n_fragments,
1131
frag_names(Tab, N, [])
1134
frag_names(Tab, 1, Acc) ->
1136
frag_names(Tab, N, Acc) ->
1137
Frag = n_to_frag_name(Tab, N),
1138
frag_names(Tab, N - 1, [Frag | Acc]).
1140
%% Returns a list of {Node, FragCount} tuples
1141
%% sorted on FragCounts
1143
Pool = lookup_prop(Tab, node_pool),
1144
Dist = [{good, Node, 0} || Node <- Pool],
1145
Dist2 = count_frag(frag_names(Tab), Dist),
1148
count_frag([Frag | Frags], Dist) ->
1149
Dist2 = incr_nodes(val({Frag, ram_copies}), Dist),
1150
Dist3 = incr_nodes(val({Frag, disc_copies}), Dist2),
1151
Dist4 = incr_nodes(val({Frag, disc_only_copies}), Dist3),
1152
count_frag(Frags, Dist4);
1153
count_frag([], Dist) ->
1156
incr_nodes([Node | Nodes], Dist) ->
1157
Dist2 = incr_node(Node, Dist),
1158
incr_nodes(Nodes, Dist2);
1159
incr_nodes([], Dist) ->
1162
incr_node(Node, [{Kind, Node, Count} | Tail]) ->
1163
[{Kind, Node, Count + 1} | Tail];
1164
incr_node(Node, [Head | Tail]) ->
1165
[Head | incr_node(Node, Tail)];
1166
incr_node(Node, []) ->
1169
%% Sorts dist according in decreasing count order
1171
Dist2 = deep_dist(Dist, []),
1172
Dist3 = lists:keysort(1, Dist2),
1173
shallow_dist(Dist3).
1175
deep_dist([Head | Tail], Deep) ->
1176
{Kind, _Node, Count} = Head,
1177
{Tag, Same, Other} = pick_count(Kind, Count, [Head | Tail]),
1178
deep_dist(Other, [{Tag, Same} | Deep]);
1179
deep_dist([], Deep) ->
1182
pick_count(Kind, Count, [{Kind2, Node2, Count2} | Tail]) ->
1183
Head = {Node2, Count2},
1184
{_, Same, Other} = pick_count(Kind, Count, Tail),
1187
{bad, [Head | Same], Other};
1189
{Count, Same, [{Kind2, Node2, Count2} | Other]};
1191
{Count, [Head | Same], Other};
1193
{Count, Same, [{Kind2, Node2, Count2} | Other]}
1195
pick_count(_Kind, Count, []) ->
1198
shallow_dist([{_Tag, Shallow} | Deep]) ->
1199
Shallow ++ shallow_dist(Deep);