102
102
%% {prot_vsn, Node} = Vsn - the exchange protocol version (not used now)
103
103
%% {sync_tag_my, Node} = My tag, used at synchronization with Node
104
104
%% {sync_tag_his, Node} = The Node's tag, used at synchronization
105
%% {lock_id, Node} = The resource locking the partitions
105
106
%%-----------------------------------------------------------------
106
107
-record(state, {connect_all, known = [], synced = [],
107
108
resolvers = [], syncers = [], node_name = node(),
108
the_locker, the_deleter}).
109
the_locker, the_deleter, the_registrar, trace,
110
global_lock_down = false
110
113
%%% There are also ETS tables used for bookkeeping of locks and names
111
114
%%% (the first position is the key):
113
%%% global_locks (set): {ResourceId, LockRequesterId, Pids}
114
%%% Pids is a list of all pids locking ResourceId.
115
%%% global_names (set): {Name, Pid, Method}
116
%%% Registered names.
116
%%% global_locks (set): {ResourceId, LockRequesterId, [{Pid,RPid,ref()]}
117
%%% pid() is locking ResourceId, ref() is the monitor ref.
118
%%% RPid =/= Pid if there is an extra process calling erlang:monitor().
119
%%% global_names (set): {Name, Pid, Method, RPid, ref()}
120
%%% Registered names. ref() is the monitor ref.
121
%%% RPid =/= Pid if there is an extra process calling erlang:monitor().
117
122
%%% global_names_ext (set): {Name, Pid, RegNode}
118
123
%%% External registered names (C-nodes).
124
%%% (The RPid:s can be removed when/if erlang:monitor() returns before
125
%%% trying to connect to the other node.)
120
127
%%% Helper tables:
121
%%% global_pid_names (bag): {Pid, Name}
128
%%% global_pid_names (bag): {Pid, Name} | {ref(), Name}
122
129
%%% Name(s) registered for Pid.
123
%%% global_pid_ids (bag): {Pid, ResourceId}
130
%%% There is one {Pid, Name} and one {ref(), Name} for every Pid.
131
%%% ref() is the same ref() as in global_names.
132
%%% global_pid_ids (bag): {Pid, ResourceId} | {ref(), ResourceId}
124
133
%%% Resources locked by Pid.
125
%%% global_node_pids (duplicated_bag): {{lock, node(Pid)}, Pid} or
126
%%% {{name, Node},Pid},
127
%%% where Node is a node where some name is registered
128
%%% Pids affected by a nodedown.
134
%%% ref() is the same ref() as in global_locks.
130
136
%%% global_pid_names is a 'bag' for backward compatibility.
131
%%% global_node_pids is a 'duplicate_bag' for the same reason.
132
137
%%% (Before vsn 5 more than one name could be registered for a process.)
139
%%% R11B-3 (OTP-6341): The list of pids in the table 'global_locks'
140
%%% was replaced by a list of {Pid, Ref}, where Ref is a monitor ref.
141
%%% It was necessary to use monitors to fix bugs regarding locks that
142
%%% were never removed. The signal {async_del_lock, ...} has been
143
%%% kept for backward compatibility. It can be removed later.
145
%%% R11B-4 (OTP-6428): Monitors are used for registered names.
146
%%% The signal {delete_name, ...} has been kept for backward compatibility.
147
%%% It can be removed later as can the deleter process.
148
%%% An extra process calling erlang:monitor() is sometimes created.
149
%%% The new_nodes messages has been augmented with the global lock id.
135
152
gen_server:start({local, global_name_server}, ?MODULE, [], []).
475
514
do_whereis(Name, From),
478
handle_call({register, Name, Pid, Method}, _From, S) ->
479
ins_name(Name, Pid, Method),
517
handle_call({registrar, Fun}, From, S) ->
518
S#state.the_registrar ! {trans_all_known, Fun, From},
521
%% The pattern {register,'_','_','_'} is traced by the inviso
522
%% application. Do not change.
523
handle_call({register, Name, Pid, Method}, {FromPid, _Tag}, S0) ->
524
S = ins_name(Name, Pid, Method, FromPid, [], S0),
482
handle_call({unregister, Name}, _From, S) ->
483
delete_global_name(Name),
527
handle_call({unregister, Name}, _From, S0) ->
528
S = delete_global_name2(Name, S0),
486
handle_call({register_ext, Name, Pid, Method, RegNode}, _F, S) ->
487
ins_name_ext(Name, Pid, Method, RegNode),
531
handle_call({register_ext, Name, Pid, Method, RegNode}, {FromPid,_Tag}, S0) ->
532
S = ins_name_ext(Name, Pid, Method, RegNode, FromPid, [], S0),
490
handle_call({set_lock, Lock}, {Pid, _Tag}, S) ->
491
Reply = handle_set_lock(Lock, Pid),
535
handle_call({set_lock, Lock}, {Pid, _Tag}, S0) ->
536
{Reply, S} = handle_set_lock(Lock, Pid, S0),
492
537
{reply, Reply, S};
494
handle_call({del_lock, Lock}, {Pid, _Tag}, S) ->
495
handle_del_lock(Lock, Pid),
539
handle_call({del_lock, Lock}, {Pid, _Tag}, S0) ->
540
S = handle_del_lock(Lock, Pid, S0),
496
541
{reply, true, S};
498
543
handle_call(get_known, _From, S) ->
665
731
{noreply, NewS#state{synced = NSynced}};
667
733
%% Called when Pid on other node crashed
668
handle_cast({async_del_name, Name, Pid}, S) ->
734
handle_cast({async_del_name, _Name, _Pid}, S) ->
669
735
%% Sent from the_deleter at some node in the partition but node().
670
case ets:lookup(global_names, Name) of
672
delete_global_name(Name, Pid);
736
%% The DOWN message deletes the name.
677
handle_cast({async_del_lock, _ResourceId, Pid}, S) ->
739
handle_cast({async_del_lock, _ResourceId, _Pid}, S) ->
678
740
%% Sent from global_name_server at some node in the partition but node().
741
%% The DOWN message deletes the lock.
744
handle_cast(Request, S) ->
745
error_logger:warning_msg("The global_name_server "
746
"received an unexpected message:\n"
747
"handle_cast(~p, _)\n", [Request]),
682
750
handle_info({'EXIT', Deleter, _Reason}=Exit, #state{the_deleter=Deleter}=S) ->
683
751
{stop, {deleter_died,Exit}, S#state{the_deleter=undefined}};
752
handle_info({'EXIT', Locker, _Reason}=Exit, #state{the_locker=Locker}=S) ->
753
{stop, {locker_died,Exit}, S#state{the_locker=undefined}};
754
handle_info({'EXIT', Registrar, _}=Exit, #state{the_registrar=Registrar}=S) ->
755
{stop, {registrar_died,Exit}, S#state{the_registrar=undefined}};
684
756
handle_info({'EXIT', Pid, _Reason}, S) when is_pid(Pid) ->
685
?trace({global_EXIT,Pid}),
686
%% The process that died was either a synch process started by
687
%% start_sync or a registered process.
688
#state{the_deleter=Deleter, known = Known} = S,
689
check_exit(Deleter, Pid, Known),
757
?trace({global_EXIT,_Reason,Pid}),
758
%% The process that died was a synch process started by start_sync
759
%% or a registered process running on an external node (C-node).
760
%% Links to external names are ignored here (there are also DOWN
690
762
Syncers = lists:delete(Pid, S#state.syncers),
691
763
{noreply, S#state{syncers = Syncers}};
754
830
io:format(">>>> ~p\n",[S#state.known]),
757
handle_info(_Message, S) ->
833
%% "High level trace". For troubleshooting only.
834
handle_info(high_level_trace, S) ->
836
#state{trace = [{Node, _Time, _M, Nodes, _X} | _]} ->
837
send_high_level_trace(),
840
case {CNode, CNodes} of
845
sofs:symmetric_partition(sofs:set([CNode|CNodes]),
846
sofs:set([Node|Nodes])),
847
M = {nodes_changed, {sofs:to_external(New),
848
sofs:to_external(Old)}},
849
{noreply, trace_message(S, M, [])}
854
handle_info({trace_message, M}, S) ->
855
{noreply, trace_message(S, M, [])};
856
handle_info({trace_message, M, X}, S) ->
857
{noreply, trace_message(S, M, X)};
859
handle_info({'DOWN', MonitorRef, process, _Pid, _Info}, S0) ->
860
S1 = delete_lock(MonitorRef, S0),
861
S = del_name(MonitorRef, S1),
864
handle_info(Message, S) ->
865
error_logger:warning_msg("The global_name_server "
866
"received an unexpected message:\n"
867
"handle_info(~p, _)\n", [Message]),
760
871
%%========================================================================
761
872
%%========================================================================
762
873
%%=============================== Internal Functions =====================
763
874
%%========================================================================
764
875
%%========================================================================
877
-define(HIGH_LEVEL_TRACE_INTERVAL, 500). % ms
879
wait_high_level_trace() ->
883
after ?HIGH_LEVEL_TRACE_INTERVAL+1 ->
887
send_high_level_trace() ->
888
erlang:send_after(?HIGH_LEVEL_TRACE_INTERVAL, self(), high_level_trace).
766
890
-define(GLOBAL_RID, global).
768
892
%% Similar to trans(Id, Fun), but always uses global's own lock
904
1029
put({wait_lock, Node}, {exchange, NameList})
907
resolved(Node, HisResolved, HisKnown, Names_ext, S) ->
1032
resolved(Node, HisResolved, HisKnown, Names_ext, S0) ->
908
1033
Ops = erase({save_ops, Node}) ++ HisResolved,
909
1034
%% Known may have shrunk since the lock was taken (due to nodedowns).
910
Known = S#state.known,
911
Synced = S#state.synced,
1035
Known = S0#state.known,
1036
Synced = S0#state.synced,
912
1037
NewNodes = [Node | HisKnown],
913
1038
sync_others(HisKnown),
914
do_ops(Ops, Names_ext),
915
gen_server:abcast(Known, global_name_server,
916
{new_nodes, node(), Ops, Names_ext, NewNodes,NewNodes}),
1039
ExtraInfo = [{vsn,get({prot_vsn, Node})}, {lock, get({lock_id, Node})}],
1040
S = do_ops(Ops, node(), Names_ext, ExtraInfo, S0),
917
1041
%% I am synced with Node, but not with HisKnown yet
918
1042
lists:foreach(fun(Pid) -> Pid ! {synced, [Node]} end, S#state.syncers),
919
NewS = lists:foldl(fun(Node1, S1) ->
920
Tag1 = get({sync_tag_my, Node1}),
921
?trace({calling_cancel_locker,Tag1,get()}),
922
S2 = cancel_locker(Node1, S1, Tag1),
923
reset_node_state(Node1),
1043
S3 = lists:foldl(fun(Node1, S1) ->
1044
F = fun(Tag) -> cancel_locker(Node1,S1,Tag) end,
1045
cancel_resolved_locker(Node1, F)
1047
%% The locker that took the lock is asked to send
1048
%% the {new_nodes, ...} message. This ensures that
1049
%% {del_lock, ...} is received after {new_nodes, ...}
1050
%% (except when abcast spawns process(es)...).
1051
NewNodesF = fun() ->
1052
gen_server:abcast(Known, global_name_server,
1053
{new_nodes, node(), Ops, Names_ext,
1054
NewNodes, ExtraInfo})
1056
F = fun(Tag) -> cancel_locker(Node, S3, Tag, NewNodesF) end,
1057
S4 = cancel_resolved_locker(Node, F),
926
1058
%% See (*) below... we're node b in that description
927
1059
AddedNodes = (NewNodes -- Known),
928
1060
NewKnown = Known ++ AddedNodes,
929
NewS#state.the_locker ! {add_to_known, AddedNodes},
1061
S4#state.the_locker ! {add_to_known, AddedNodes},
1062
NewS = trace_message(S4, {added, AddedNodes},
1063
[{new_nodes, NewNodes}, {abcast, Known}, {ops,Ops}]),
930
1064
NewS#state{known = NewKnown, synced = [Node | Synced]}.
932
new_nodes(Ops, Names_ext, Nodes, S) ->
933
Known = S#state.known,
1066
cancel_resolved_locker(Node, CancelFun) ->
1067
Tag = get({sync_tag_my, Node}),
1068
?trace({calling_cancel_locker,Tag,get()}),
1070
reset_node_state(Node),
1073
new_nodes(Ops, ConnNode, Names_ext, Nodes, ExtraInfo, S0) ->
1074
Known = S0#state.known,
934
1075
%% (*) This one requires some thought...
935
1076
%% We're node a, other nodes b and c:
936
1077
%% The problem is that {in_sync, a} may arrive before {resolved, [a]} to
937
1078
%% b from c, leading to b sending {new_nodes, [a]} to us (node a).
938
1079
%% Therefore, we make sure we never get duplicates in Known.
939
NewNodes = lists:delete(node(), Nodes -- Known),
940
sync_others(NewNodes),
941
do_ops(Ops, Names_ext),
942
?trace({new_nodes_in_sync,{new_nodes,NewNodes}}),
943
S#state.the_locker ! {add_to_known, NewNodes},
944
S#state{known = Known ++ NewNodes}.
1080
AddedNodes = lists:delete(node(), Nodes -- Known),
1081
sync_others(AddedNodes),
1082
S = do_ops(Ops, ConnNode, Names_ext, ExtraInfo, S0),
1083
?trace({added_nodes_in_sync,{added_nodes,AddedNodes}}),
1084
S#state.the_locker ! {add_to_known, AddedNodes},
1085
S1 = trace_message(S, {added, AddedNodes}, [{ops,Ops}]),
1086
S1#state{known = Known ++ AddedNodes}.
946
1088
do_whereis(Name, From) ->
947
case is_lock_set(?GLOBAL_RID) of
1089
case is_global_lock_set() of
949
1091
gen_server:reply(From, where(Name));
992
ins_name(Name, Pid, Method) ->
1133
ins_name(Name, Pid, Method, FromPidOrNode, ExtraInfo, S0) ->
993
1134
?trace({ins_name,insert,{name,Name},{pid,Pid}}),
994
delete_global_name(Name),
996
insert_global_name(Name, Pid, Method, node(Pid)).
1135
S1 = delete_global_name_keep_pid(Name, S0),
1136
S = trace_message(S1, {ins_name, node(Pid)}, [Name, Pid]),
1137
insert_global_name(Name, Pid, Method, FromPidOrNode, ExtraInfo, S).
998
ins_name_ext(Name, Pid, Method, RegNode) ->
1139
ins_name_ext(Name, Pid, Method, RegNode, FromPidOrNode, ExtraInfo, S0) ->
999
1140
?trace({ins_name_ext, {name,Name}, {pid,Pid}}),
1000
delete_global_name(Name),
1141
S1 = delete_global_name_keep_pid(Name, S0),
1001
1142
dolink_ext(Pid, RegNode),
1002
insert_global_name(Name, Pid, Method, RegNode),
1003
true = ets:insert(global_names_ext, {Name, Pid, RegNode}).
1143
S = trace_message(S1, {ins_name_ext, node(Pid)}, [Name, Pid]),
1144
true = ets:insert(global_names_ext, {Name, Pid, RegNode}),
1145
insert_global_name(Name, Pid, Method, FromPidOrNode, ExtraInfo, S).
1006
1148
case ets:lookup(global_names, Name) of
1007
[{_, Pid, _}] -> Pid;
1149
[{_Name, Pid, _Method, _RPid, _Ref}] -> Pid;
1008
1150
[] -> undefined
1011
handle_set_lock(Id, Pid) ->
1153
handle_set_lock(Id, Pid, S) ->
1012
1154
?trace({handle_set_lock, Id, Pid}),
1013
1155
case can_set_lock(Id) of
1015
case lists:member(Pid, Pids) of
1017
false -> insert_lock(Id, Pid, Pids)
1157
case pid_is_locking(Pid, PidRefs) of
1161
{true, insert_lock(Id, Pid, PidRefs, S)}
1023
1167
can_set_lock({ResourceId, LockRequesterId}) ->
1024
1168
case ets:lookup(global_locks, ResourceId) of
1025
[{ResourceId, LockRequesterId, Pids}] ->
1027
[{ResourceId, _LockRequesterId2, _Pids}] ->
1169
[{ResourceId, LockRequesterId, PidRefs}] ->
1171
[{ResourceId, _LockRequesterId2, _PidRefs}] ->
1033
insert_lock({ResourceId, LockRequesterId}, Pid, Pids) ->
1177
insert_lock({ResourceId, LockRequesterId}=Id, Pid, PidRefs, S) ->
1178
{RPid, Ref} = do_monitor(Pid),
1035
1179
true = ets:insert(global_pid_ids, {Pid, ResourceId}),
1036
true = ets:insert(global_node_pids, {{lock,node(Pid)}, Pid}),
1037
Lock = {ResourceId, LockRequesterId, [Pid | Pids]},
1038
true = ets:insert(global_locks, Lock).
1180
true = ets:insert(global_pid_ids, {Ref, ResourceId}),
1181
Lock = {ResourceId, LockRequesterId, [{Pid,RPid,Ref} | PidRefs]},
1182
true = ets:insert(global_locks, Lock),
1183
trace_message(S, {ins_lock, node(Pid)}, [Id, Pid]).
1185
is_global_lock_set() ->
1186
is_lock_set(?GLOBAL_RID).
1040
1188
is_lock_set(ResourceId) ->
1041
1189
ets:member(global_locks, ResourceId).
1043
handle_del_lock({ResourceId, LockRequesterId}, Pid) ->
1044
?trace({handle_del_lock, {pid,Pid},{id,{ResourceId, LockRequesterId}}}),
1191
handle_del_lock({ResourceId, LockReqId}, Pid, S0) ->
1192
?trace({handle_del_lock, {pid,Pid},{id,{ResourceId, LockReqId}}}),
1045
1193
case ets:lookup(global_locks, ResourceId) of
1046
[{ResourceId, LockRequesterId, Pids}]->
1047
remove_lock(ResourceId, LockRequesterId, Pid, Pids),
1194
[{ResourceId, LockReqId, PidRefs}]->
1195
remove_lock(ResourceId, LockReqId, Pid, PidRefs, false, S0);
1052
remove_lock(ResourceId, _LockRequesterId, Pid, [Pid]) ->
1199
remove_lock(ResourceId, LockRequesterId, Pid, [{Pid,RPid,Ref}], Down, S0) ->
1053
1200
?trace({remove_lock_1, {id,ResourceId},{pid,Pid}}),
1201
true = erlang:demonitor(Ref, [flush]),
1202
kill_monitor_proc(RPid, Pid),
1054
1203
true = ets:delete(global_locks, ResourceId),
1055
true = ets:delete_object(global_node_pids, {{lock,node(Pid)}, Pid}),
1056
true = ets:delete_object(global_pid_ids, {Pid, ResourceId});
1057
remove_lock(ResourceId, LockRequesterId, Pid, Pids) ->
1204
true = ets:delete_object(global_pid_ids, {Pid, ResourceId}),
1205
true = ets:delete_object(global_pid_ids, {Ref, ResourceId}),
1206
S = case ResourceId of
1207
?GLOBAL_RID -> S0#state{global_lock_down = Down};
1210
trace_message(S, {rem_lock, node(Pid)},
1211
[{ResourceId, LockRequesterId}, Pid]);
1212
remove_lock(ResourceId, LockRequesterId, Pid, PidRefs0, _Down, S) ->
1058
1213
?trace({remove_lock_2, {id,ResourceId},{pid,Pid}}),
1059
Lock = {ResourceId, LockRequesterId, lists:delete(Pid, Pids)},
1214
PidRefs = case lists:keysearch(Pid, 1, PidRefs0) of
1215
{value, {Pid, RPid, Ref}} ->
1216
true = erlang:demonitor(Ref, [flush]),
1217
kill_monitor_proc(RPid, Pid),
1218
true = ets:delete_object(global_pid_ids,
1220
lists:keydelete(Pid, 1, PidRefs0);
1224
Lock = {ResourceId, LockRequesterId, PidRefs},
1060
1225
true = ets:insert(global_locks, Lock),
1061
true = ets:delete_object(global_node_pids, {{lock,node(Pid)}, Pid}),
1062
true = ets:delete_object(global_pid_ids, {Pid, ResourceId}).
1064
do_ops(Ops, Names_ext) ->
1226
true = ets:delete_object(global_pid_ids, {Pid, ResourceId}),
1227
trace_message(S, {rem_lock, node(Pid)},
1228
[{ResourceId, LockRequesterId}, Pid]).
1230
kill_monitor_proc(Pid, Pid) ->
1232
kill_monitor_proc(RPid, _Pid) ->
1235
do_ops(Ops, ConnNode, Names_ext, ExtraInfo, S0) ->
1065
1236
?trace({do_ops, {ops,Ops}}),
1067
1238
XInserts = [{Name, Pid, RegNode, Method} ||
1068
1239
{Name2, Pid2, RegNode} <- Names_ext,
1069
1240
{insert, {Name, Pid, Method}} <- Ops,
1070
1241
Name =:= Name2, Pid =:= Pid2],
1071
lists:foreach(fun({Name, Pid, RegNode, Method}) ->
1072
ins_name_ext(Name, Pid, Method, RegNode)
1242
S1 = lists:foldl(fun({Name, Pid, RegNode, Method}, S1) ->
1243
ins_name_ext(Name, Pid, Method, RegNode,
1244
ConnNode, ExtraInfo, S1)
1075
1247
XNames = [Name || {Name, _Pid, _RegNode, _Method} <- XInserts],
1076
1248
Inserts = [{Name, Pid, node(Pid), Method} ||
1077
1249
{insert, {Name, Pid, Method}} <- Ops,
1078
1250
not lists:member(Name, XNames)],
1079
lists:foreach(fun({Name, Pid, _RegNode, Method}) ->
1080
ins_name(Name, Pid, Method)
1251
S2 = lists:foldl(fun({Name, Pid, _RegNode, Method}, S2) ->
1252
ins_name(Name, Pid, Method, ConnNode,
1083
1256
DelNames = [Name || {delete, Name} <- Ops],
1084
lists:foreach(fun(Name) -> delete_global_name(Name) end, DelNames).
1257
lists:foldl(fun(Name, S) -> delete_global_name2(Name, S)
1086
1260
%% It is possible that a node that was up and running when the
1087
1261
%% operations were assembled has since died. The final {in_sync,...}
1116
1290
?trace({missing_nodedown, {node, Node}}),
1117
1291
error_logger:warning_msg("global: ~w failed to connect to ~w\n",
1118
1292
[node(), Node]),
1119
global_name_server ! {nodedown, Node}
1293
global_name_server ! {extra_nodedown, Node}
1121
1295
gen_server:cast({global_name_server,Node}, {in_sync,node(),true})
1123
1297
% monitor_node(Node, false),
1124
1298
% exit(normal).
1126
insert_global_name(Name, Pid, Method, Node) ->
1300
insert_global_name(Name, Pid, Method, FromPidOrNode, ExtraInfo, S) ->
1301
{RPid, Ref} = do_monitor(Pid),
1302
true = ets:insert(global_names, {Name, Pid, Method, RPid, Ref}),
1127
1303
true = ets:insert(global_pid_names, {Pid, Name}),
1128
true = ets:insert(global_names, {Name, Pid, Method}),
1129
true = ets:insert(global_node_pids, {{name,Node}, Pid}).
1131
delete_global_name(Name) ->
1132
case ets:lookup(global_names, Name) of
1134
delete_global_name(Name, Pid);
1304
true = ets:insert(global_pid_names, {Ref, Name}),
1305
case lock_still_set(FromPidOrNode, ExtraInfo, S) of
1309
%% The node that took the lock has gone down and then up
1310
%% again. The {register, ...} or {new_nodes, ...} message
1311
%% was delayed and arrived after nodeup (maybe it caused
1312
%% the nodeup). The DOWN signal from the monitor of the
1313
%% lock has removed the lock.
1314
%% Note: it is assumed here that the DOWN signal arrives
1315
%% _before_ nodeup and any message that caused nodeup.
1316
%% This is true of Erlang/OTP.
1317
delete_global_name2(Name, S)
1320
lock_still_set(PidOrNode, ExtraInfo, S) ->
1321
case ets:lookup(global_locks, ?GLOBAL_RID) of
1322
[{?GLOBAL_RID, _LockReqId, PidRefs}] when is_pid(PidOrNode) ->
1323
%% Name registration.
1324
lists:keymember(PidOrNode, 1, PidRefs);
1325
[{?GLOBAL_RID, LockReqId, PidRefs}] when is_atom(PidOrNode) ->
1326
case extra_info(lock, ExtraInfo) of
1327
{?GLOBAL_RID, LockId} -> % R11B-4 or later
1328
LockReqId =:= LockId;
1330
lock_still_set_old(PidOrNode, LockReqId, PidRefs)
1333
%% If the global lock was not removed by a DOWN message
1334
%% then we have a node that do not monitor locking pids
1335
%% (pre R11B-3), or an R11B-3 node (which does not ensure
1336
%% that {new_nodes, ...} arrives before {del_lock, ...}).
1337
not S#state.global_lock_down
1340
%%% The following is probably overkill. It is possible that this node
1341
%%% has been locked again, but it is a rare occasion.
1342
lock_still_set_old(_Node, ReqId, _PidRefs) when is_pid(ReqId) ->
1343
%% Cannot do better than return true.
1345
lock_still_set_old(Node, ReqId, PidRefs) when is_list(ReqId) ->
1346
%% Connection, version > 4, but before R11B-4.
1347
[P || {P, _RPid, _Ref} <- PidRefs, node(P) =:= Node] =/= [].
1349
extra_info(Tag, ExtraInfo) ->
1350
%% ExtraInfo used to be a list of nodes (vsn 2).
1351
case catch lists:keysearch(Tag, 1, ExtraInfo) of
1352
{value, {Tag, Info}} ->
1359
NameL = [{Name, Pid} ||
1360
{_, Name} <- ets:lookup(global_pid_names, Ref),
1361
{_, Pid, _Method, _RPid, Ref1} <-
1362
ets:lookup(global_names, Name),
1364
?trace({async_del_name, self(), NameL, Ref}),
1367
del_names(Name, Pid, S),
1368
delete_global_name2(Name, S);
1373
%% Send {async_del_name, ...} to old nodes (pre R11B-3).
1374
del_names(Name, Pid, S) ->
1375
Send = case ets:lookup(global_names_ext, Name) of
1376
[{Name, Pid, RegNode}] ->
1379
node(Pid) =:= node()
1383
?trace({del_names, {pid,Pid}, {name,Name}}),
1384
S#state.the_deleter ! {delete_name, self(), Name, Pid};
1139
delete_global_name(Name, Pid) ->
1389
%% Keeps the entry in global_names for whereis_name/1.
1390
delete_global_name_keep_pid(Name, S) ->
1391
case ets:lookup(global_names, Name) of
1392
[{Name, Pid, _Method, RPid, Ref}] ->
1393
delete_global_name2(Name, Pid, RPid, Ref, S);
1398
delete_global_name2(Name, S) ->
1399
case ets:lookup(global_names, Name) of
1400
[{Name, Pid, _Method, RPid, Ref}] ->
1401
true = ets:delete(global_names, Name),
1402
delete_global_name2(Name, Pid, RPid, Ref, S);
1407
delete_global_name2(Name, Pid, RPid, Ref, S) ->
1408
true = erlang:demonitor(Ref, [flush]),
1409
kill_monitor_proc(RPid, Pid),
1410
delete_global_name(Name, Pid),
1140
1411
?trace({delete_global_name,{item,Name},{pid,Pid}}),
1141
true = ets:delete(global_names, Name),
1142
1412
true = ets:delete_object(global_pid_names, {Pid, Name}),
1413
true = ets:delete_object(global_pid_names, {Ref, Name}),
1143
1414
case ets:lookup(global_names_ext, Name) of
1144
1415
[{Name, Pid, RegNode}] ->
1145
1416
true = ets:delete(global_names_ext, Name),
1146
1417
?trace({delete_global_name, {name,Name,{pid,Pid},{RegNode,Pid}}}),
1147
true = ets:delete_object(global_node_pids, {{name,RegNode}, Pid}),
1148
1418
dounlink_ext(Pid, RegNode);
1150
1420
?trace({delete_global_name,{name,Name,{pid,Pid},{node(Pid),Pid}}}),
1151
true = ets:delete_object(global_node_pids,{{name,node(Pid)},Pid}),
1423
trace_message(S, {del_name, node(Pid)}, [Name, Pid]).
1425
%% delete_global_name/2 is traced by the inviso application.
1427
delete_global_name(_Name, _Pid) ->
1155
1430
%%-----------------------------------------------------------------
1156
1431
%% The locker is a satellite process to global_name_server. When a
1368
1650
{?GLOBAL_RID, lists:sort([self(), Pid])}.
1370
1652
lock_nodes_safely(LockId, Extra, S0) ->
1371
%% Locking the boss first is an optimization.
1372
First = lists:usort([node(), S0#multi.the_boss]) -- [nonode@nohost],
1373
case set_lock(LockId, First, 0) of
1653
%% Locking node() could stop some node that has already locked the
1654
%% boss, so just check if it is possible to lock node().
1655
First = delete_nonode([S0#multi.the_boss]),
1656
case ([node()] =:= First) orelse (can_set_lock(LockId) =/= false) of
1375
S = update_locker_known(S0),
1376
%% The boss may have changed, but don't bother.
1377
case set_lock(LockId, Extra, 0) of
1658
%% Locking the boss first is an optimization.
1659
case set_lock(LockId, First, 0) of
1379
case set_lock(LockId, S#multi.known, 0) of
1661
S = update_locker_known(S0),
1662
%% The boss may have changed, but don't bother.
1663
Second = delete_nonode([node() | Extra] -- First),
1664
case set_lock(LockId, Second, 0) of
1666
Known = S#multi.known,
1667
case set_lock(LockId, Known -- First, 0) of
1669
locker_trace(S, ok, {First, Known}),
1672
%% Since the boss is locked we should have
1673
%% gotten the lock, at least if there are
1674
%% no version 4 nodes in the partition or
1675
%% someone else is locking 'global'.
1676
%% Calling set_lock with Retries > 0 does
1677
%% not seem to speed things up.
1678
SoFar = First ++ Second,
1679
del_lock(LockId, SoFar),
1680
locker_trace(S, not_ok, {Known,SoFar}),
1383
%% Since the boss is locked we should have
1384
%% gotten the lock, at least if there are
1385
%% no version 4 nodes in the partition or
1386
%% someone else is locking 'global'.
1387
%% Calling set_lock with Retries > 0 does
1388
%% not seem to speed things up.
1389
del_lock(LockId, Extra ++ First),
1684
del_lock(LockId, First),
1685
locker_trace(S, not_ok, {Second, First}),
1393
del_lock(LockId, First),
1689
locker_trace(S0, not_ok, {First, []}),
1697
lists:delete(nonode@nohost, L).
1699
%% Let the server add timestamp.
1700
locker_trace(#multi{do_trace = false}, _, _Nodes) ->
1702
locker_trace(#multi{do_trace = true}, ok, Ns) ->
1703
global_name_server ! {trace_message, {locker_succeeded, node()}, Ns};
1704
locker_trace(#multi{do_trace = true}, not_ok, Ns) ->
1705
global_name_server ! {trace_message, {locker_failed, node()}, Ns};
1706
locker_trace(#multi{do_trace = true}, rejected, Ns) ->
1707
global_name_server ! {trace_message, {lock_rejected, node()}, Ns}.
1400
1709
update_locker_known(S) ->
1402
1711
{add_to_known, Nodes} ->
1435
1744
{lock_set, P, true, _} when node(P) =:= Node ->
1436
gen_server:cast(global_name_server, {lock_is_set, Node, MyTag}),
1745
gen_server:cast(global_name_server,
1746
{lock_is_set, Node, MyTag, LockId}),
1437
1747
?trace({lock_sync_done, {p,P, node(P)}, {me,self()}}),
1439
1749
%% Wait for global to tell us to remove lock. Should the
1440
1750
%% other locker's node die, global_name_server will
1441
%% receive nodedown, and then send {cancel, Node, Tag}.
1751
%% receive nodedown, and then send {cancel, Node, Tag, Fun}.
1443
{cancel, Node, _} ->
1753
{cancel, Node, _, Fun} ->
1444
1754
?trace({lock_set_loop, {known1,Known1}}),
1445
del_lock(LockId, Known1)
1756
delete_global_lock(LockId, Known1)
1447
1758
S#multi{just_synced = true,
1448
1759
local = lists:delete(Him, S#multi.local),
1449
1760
remote = lists:delete(Him, S#multi.remote)};
1450
1761
{lock_set, P, false, _} when node(P) =:= Node ->
1451
1762
?trace({not_both_set, {node,Node},{p, P},{known1,Known1}}),
1452
del_lock(LockId, Known1),
1763
locker_trace(S, rejected, Known1),
1764
delete_global_lock(LockId, Known1),
1454
{cancel, Node, _} ->
1766
{cancel, Node, _, Fun} ->
1455
1767
?trace({the_locker, cancel2, {node,Node}}),
1456
del_lock(LockId, Known1),
1769
locker_trace(S, rejected, Known1),
1770
delete_global_lock(LockId, Known1),
1457
1771
remove_node(Node, S);
1458
1772
{'EXIT', _, _} ->
1459
1773
?trace({the_locker, exit, {node,Node}}),
1460
del_lock(LockId, Known1),
1774
locker_trace(S, rejected, Known1),
1775
delete_global_lock(LockId, Known1),
1666
%% check_exit/3 removes the Pid from affected tables.
1667
%% This function needs to abcast the thingie since only the local
1668
%% server is linked to the registered process (or the owner of the
1669
%% lock). All the other servers rely on the nodedown mechanism.
1670
check_exit(Deleter, Pid, KnownNodes) ->
1671
del_names(Deleter, Pid),
1672
del_locks(pid_locks(Pid), Pid, KnownNodes).
1674
del_names(Deleter, Pid) ->
1675
lists:foreach(fun({_Pid,Name}) ->
1676
%% The local name is deleted immediately,
1677
%% before the lock is taken. It is not known
1678
%% exactly why, but it may have something to
1679
%% do with supervisors...
1680
?trace({del_names, {pid,Pid}, {name,Name}}),
1681
delete_global_name(Name, Pid),
1682
Deleter ! {delete_name, self(), Name, Pid}
1683
end, ets:lookup(global_pid_names, Pid)).
1686
L = lists:flatmap(fun({_Pid, ResourceId}) ->
1993
pid_is_locking(Pid, PidRefs) ->
1994
lists:keysearch(Pid, 1, PidRefs) =/= false.
1996
delete_lock(Ref, S0) ->
1997
Locks = pid_locks(Ref),
1998
del_locks(Locks, Ref, S0#state.known),
1999
F = fun({ResourceId, LockRequesterId, PidRefs}, S) ->
2000
{value, {Pid, _RPid, Ref}} =
2001
lists:keysearch(Ref, 3, PidRefs),
2002
remove_lock(ResourceId, LockRequesterId, Pid, PidRefs, true,S)
2004
lists:foldl(F, S0, Locks).
2007
L = lists:flatmap(fun({_, ResourceId}) ->
1687
2008
ets:lookup(global_locks, ResourceId)
1688
end, ets:lookup(global_pid_ids, Pid)),
1689
[Lock || Lock = {_Id, _Req, Pids} <- L, lists:member(Pid, Pids)].
1691
del_locks([{ResourceId, LockReqId, Pids} | Tail], Pid, KnownNodes) ->
1692
remove_lock(ResourceId, LockReqId, Pid, Pids),
1693
gen_server:abcast(KnownNodes, global_name_server,
1694
{async_del_lock, ResourceId, Pid}),
1695
del_locks(Tail, Pid, KnownNodes);
1696
del_locks([], _Pid, _KnownNodes) -> done.
1698
%% Unregister all Name/Pid pairs such that node(Pid) =:= Node
1699
%% and delete all locks where node(Pid) =:= Node
1700
do_node_down(Node) ->
1701
?trace({do_node_down, Node}),
1702
do_node_down_names(Node),
1703
do_node_down_locks(Node).
1705
do_node_down_names(Node) ->
1706
[[] || {_, Pid} <- ets:lookup(global_node_pids, {name,Node}),
1707
{_, Name} <- ets:lookup(global_pid_names, Pid),
1708
ok =/= delete_global_name(Name, Pid)].
1710
do_node_down_locks(Node) ->
1711
NodePids = ets:lookup(global_node_pids, {lock, Node}),
1712
?trace({do_node_down_locks, {node_pids, NodePids}}),
1713
Pids = lists:usort([Pid || {_Node, Pid} <- NodePids]),
1714
lists:foreach(fun async_del_lock/1, Pids).
1716
async_del_lock(Pid) ->
1717
lists:foreach(fun({ResourceId, LockRequesterId, Pids}) ->
1718
remove_lock(ResourceId, LockRequesterId, Pid, Pids)
1719
end, pid_locks(Pid)).
2009
end, ets:lookup(global_pid_ids, Ref)),
2010
[Lock || Lock = {_Id, _Req, PidRefs} <- L,
2011
rpid_is_locking(Ref, PidRefs)].
2013
rpid_is_locking(Ref, PidRefs) ->
2014
lists:keysearch(Ref, 3, PidRefs) =/= false.
2016
%% Send {async_del_lock, ...} to old nodes (pre R11B-3).
2017
del_locks([{ResourceId, _LockReqId, PidRefs} | Tail], Ref, KnownNodes) ->
2018
{value, {Pid, _RPid, Ref}} = lists:keysearch(Ref, 3, PidRefs),
2019
case node(Pid) =:= node() of
2021
gen_server:abcast(KnownNodes, global_name_server,
2022
{async_del_lock, ResourceId, Pid});
2026
del_locks(Tail, Ref, KnownNodes);
2027
del_locks([], _Ref, _KnownNodes) ->
2030
handle_nodedown(Node, S) ->
2031
%% DOWN signals from monitors have removed locks and registered names.
2032
#state{known = Known, synced = Syncs} = S,
2033
NewS = cancel_locker(Node, S, get({sync_tag_my, Node})),
2034
NewS#state.the_locker ! {remove_from_known, Node},
2035
reset_node_state(Node),
2036
NewS#state{known = lists:delete(Node, Known),
2037
synced = lists:delete(Node, Syncs)}.
1722
ets:tab2list(global_names).
2040
ets:select(global_names,
2041
ets:fun2ms(fun({Name, Pid, Method, _RPid, _Ref}) ->
1724
2045
get_names_ext() ->
1725
2046
ets:tab2list(global_names_ext).