1
%%% -----------------------------------------------------------------------------
2
%%% File : inviso_tool.erl
3
%%% Author : Lennart �hman <lennart.ohman@st.se>
6
%%% Created : 22 Sep 2005 by Lennart �hman <lennart.ohman@st.se>
7
%%% This is the tool that uses the inviso trace functionality.
8
%%% It replaces ttb in many aspects.
9
%%% -----------------------------------------------------------------------------
12
%% ------------------------------------------------------------------------------
14
%% ------------------------------------------------------------------------------
15
-export([start/3,start/2,start/1,start/0]).
17
-export([start_session/1,cancel_session/1,stop_session/3]).
20
%% ------------------------------------------------------------------------------
22
%% ------------------------------------------------------------------------------
23
-export([init/1,handle_call/3,handle_info/2]).
27
%% ------------------------------------------------------------------------------
28
%% Module-wide constants.
29
%% ------------------------------------------------------------------------------
31
-define(SERVER,inviso_tool). % The tool control process name.
32
-define(DEFAULT_TIMEOUT,10000). % Gen-server server side timeout.
33
-define(LOCAL_RUNTIME,local_runtime). % Name for node when non-distributed.
36
%% A standard mandatory safety-catch.
38
%% ------------------------------------------------------------------------------
40
%% Constants used as options to the trace control component.
41
%% TraceControlComponentOptions.
42
-define(TCCO_SUBSCRIBE,{subscribe,self()}).
43
%% ------------------------------------------------------------------------------
45
%% Constants describing the events which may arrive from our trace control component.
46
%% TraceControlComponentEvents
47
-define(TCCE_DISCONNECTED,left_cluster).
48
-define(TCCE_CONNECTED,foobar). % TBD!
49
%% ------------------------------------------------------------------------------
51
%% Constants for events generated by inviso_tool to our subscribers.
52
-define(E_DISCONNECT_NODE,disconnect_node).
53
-define(E_CONNECT_NODE,connect_node).
54
-define(E_END_SESSION,session_ended).
55
%% ------------------------------------------------------------------------------
59
%% ==============================================================================
60
%% Exported API functions.
61
%% ==============================================================================
64
%% ------------------------------------------------------------------------------
65
%% start/3,/2,/1,/0 = {ok,Pid}.
66
%% CtrlNode=atom( )Nodename where the tracer control component shall run.
67
%% TracerNodes=list() Nodes where tracing shall be conducted.
68
%% SafetyCatches=[{Mod,Func},...]
70
%% Start function which spawns the trace-tool-builder control process. This function
71
%% comes in two major versions, for a distributed system and for a non-distributed
72
%% system. When non-distributed all components of the trace system are supposed to
73
%% run on this node. Non-distributed functionality of underlaying tracing will be
75
start(CtrlNode,TraceNodes,Options) ->
76
gen_server:start({local,?SERVER},
78
{{node,CtrlNode},TraceNodes,Options},
80
start(CtrlNode,TraceNodes) ->
81
gen_server:start({local,?SERVER},?MODULE,{{node,CtrlNode},TraceNodes,[]},[]).
83
gen_server:start({local,?SERVER},?MODULE,{void,void,Options},[]).
85
gen_server:start({local,?SERVER},?MODULE,{void,void,[]},[]).
86
%% ------------------------------------------------------------------------------
89
%% Nodes=[{NodeName,Status},...].
90
%% Status=running | unavailable
92
%% Returns a list of all nodes we have asked to be part of our set. Meaning having
93
%% a runtime component controlled by "our" control component.
95
gen_server:call(?SERVER,nodes).
96
%% ------------------------------------------------------------------------------
99
% run(InitMod,SuiteMod,SuiteArgs) ->
100
% gen_server:call(?SERVER,{run,InitMod,SuiteMod,SuiteArgs}).
102
%% ==============================================================================
103
%% Exported trace functions.
104
%% ==============================================================================
106
%% start_session(NodeParams) = {ok,{SID,Result}} | {error,Reason}
107
%% start_session(TracerData) =
108
%% NodeParams=[{Node,TracerData},...]
109
%% SID=pid(), tool session identifier.
110
%% Result=[{Node,NodeResult},...] | NodeResult
111
%% NodeResult= TBD!!!
113
%% Function starting a session managed by the control-process. The nodes mentioned in
114
%% the NodeParams list must previously have been specified as our nodes. In the
115
%% non-distributed case, just provide tracer-data as argument.
116
start_session(NodeParams) when list(NodeParams) ->
117
gen_server:call(?SERVER,{start_session,NodeParams});
118
start_session(TracerData) when tuple(TracerData); atom(TracerData) ->
119
gen_server:call(?SERVER,{start_session_non_distributed,TracerData}).
120
%% ------------------------------------------------------------------------------
122
%% stop_session(SID) = {ok,Nodes} | ok | {error,Reason}
123
%% Nodes=[NodeName,...]
124
%% Reason=unknown_session
126
%% Function which stops an ongoing session. This means loosing all information
127
%% related to this session. Lookin in the session handler for further info on
128
%% what happens when a session is stopped.
129
cancel_session(SID) when pid(SID) ->
130
gen_server:call(?SERVER,{cancel_session,SID}).
131
%% ------------------------------------------------------------------------------
134
stop_session(SID,Dir,Prefix) when pid(SID) ->
135
gen_server:call(?SERVER,{stop_session,SID,Dir,Prefix}).
138
%% run_suite(Params) -> {ok,SuiteRef,Result} | {error,Reason}
139
%% SuiteRef=A reference to the suite.
140
%% Result=[{SessionName,SessionReturn},...]
141
%% SessionReturn=ok|{error,Reason}
142
%% Starts a suite, that is one or several sessions. The suite ends when all
143
%% sessions have finished and made to to end by the suite. Note that ok will
144
%% be returned for a session as long as no general failure occured. This means
145
%% that even if no nodes at all were initiated, it may be ok.
146
% run_suite(Params) ->
147
% gen_server:call(?SERVER,{run_suite,Params}).
151
%% ==============================================================================
152
%% Genserver call-backs.
153
%% ==============================================================================
155
%% The init function for the trace tool builder control process.
156
%% It will start the trace control component and run-time components on all
157
%% Erlang nodes participating in the tracing.
158
init({CtrlNode,TraceNodes,Options}) ->
159
process_flag(trap_exit,true),
160
case get_and_check_init_options(Options) of
161
{ok,SafetyCa,TimeOut,EventPid,Dbg,RTtag} ->
163
void -> % Non distributed case.
164
case start_trace_control(Dbg,RTtag) of
167
mk_ld(CtrlNode,Pid,[{local_runtime,Info}],?SC++SafetyCa,
168
TimeOut,EventPid,Dbg,RTtag),
170
{error,Reason} -> % Could not start the tracer!
174
case start_trace_control(NodeName,TraceNodes,Dbg,RTtag) of
177
mk_ld(CtrlNode,Pid,Result,?SC++SafetyCa,TimeOut,EventPid,Dbg,RTtag),
179
{error,Reason} -> % Could not start the tracer!
182
{M,F,Args} when atom(M),atom(F),list(Args) ->
183
case apply(M,F,Args) of
184
NodeName when atom(NodeName) ->
185
case start_trace_control(NodeName,TraceNodes,Dbg,RTtag) of
188
mk_ld({CtrlNode,NodeName},Pid,Result,?SC++SafetyCa,
189
TimeOut,EventPid,Dbg,RTtag),
195
{stop,{faulty_nodename,Else}}
199
{stop,{faulty_options,What}}
201
%% -----------------------------------------------------------------------------
204
%% Help function which starts the tracer-control-component on the
205
%% desired Erlang node. It also tells the control component where to start or
206
%% possibly adopt run-time components.
207
%% This function also makes the inviso tool control process subscribe to event from
208
%% the trace control component.
209
%% Returns {ok,{TracerControlPid,Result}} or {error,Reason}, where
210
%% Result is [{Node,Info},...] or Info (the later in the non distributed case).
211
%% Info is {ok,new}, {ok,{tag,Tag}} or {error,Reason}.
212
start_trace_control(Dbg,RTtag) ->
213
case inviso:start([?TCCO_SUBSCRIBE]) of % This is the non distributed case.
215
case inviso:add_node(trace_c_tag(RTtag)) of
216
{ok,{adopted,State,Status,Tag}} when Tag=/=RTtag -> % Other tag, stop it!
217
case inviso:stop_node() of
219
inviso_tool_lib:debug(stop_wrong_tag,
221
[{ok,{adopted,State,Status,Tag}},ok]),
222
{ok,{Pid,{error,wrong_tag}}};
223
{error,Reason} -> % Extremely strange!
225
{error,{stop_node,Reason}}
227
{ok,Result} -> % All other cases of ok are ok!
228
{ok,{Pid,{ok,Result}}};
229
{error,already_added} -> % Maybe it autoconnected?!
230
{ok,{Pid,{error,already_added}}}; % Wait for the connect event.
231
{error,Reason} -> % Failed to start a local rt component.
232
inviso:stop(), % Stop trace_c!
233
{error,{local_runtime,Reason}}
236
{error,{start_trace_c,Reason}}
239
start_trace_control(CtrlNode,TraceNodes,Dbg,RTtag) ->
240
case inviso_tool_lib:inviso_cmd(CtrlNode,start,[[?TCCO_SUBSCRIBE]]) of
242
case inviso_tool_lib:inviso_cmd(CtrlNode,add_nodes,[TraceNodes,trace_c_tag(RTtag)]) of
243
{ok,Result} -> % May contain some errors too.
244
{ok,Result2}=stop_adopted_nodes(CtrlNode,RTtag,Result,Dbg),
246
{error,Reason} -> % Strange, started trace_c but failed here.
247
inviso_tool_lib:inviso_cmd(CtrlNode,stop_all,[]),
248
{error,{add_nodes,Reason}}
250
{error,Reason} -> % Maybe already started.
251
{error,{start_trace_c,Reason}}
253
%% -----------------------------------------------------------------------------
258
% handle_call({run,InitMod,TraceCases,StopCond},_From,LD) ->
259
% P=spawn_link(?MODULE,trace_case_handler,[TraceCases]),
261
handle_call(nodes,_From,LD) ->
262
NodeData=handle_nodes(get_nodedata_ld(LD)),
263
{reply,NodeData,LD,get_timeout_ld(LD)};
265
handle_call({start_session_non_distributed,TracerData},From,LD) ->
266
handle_call({start_session,[{?LOCAL_RUNTIME,TracerData}]},From,LD);
268
handle_call({start_session,NodeParams},From,LD) ->
269
CtrlNode=get_ctrlnode_node_ld(LD),
270
CtrlPid=get_ctrlpid_ld(LD),
271
SC=get_catches_ld(LD),
273
{NodesIn,NodesNotIn}=not_in_session(ld_nodedata_mk_list(get_nodedata_ld(LD)),NodeParams),
276
void -> % Use non-distributed functions.
277
{_,TracerData}=NodeParams,
278
inviso_tool_sh:start_link(From,TracerData,CtrlPid,SC,Dbg);
280
inviso_tool_sh:start_link(From,NodeParams,CtrlNode,CtrlPid,SC,
281
Dbg,NodesIn,NodesNotIn)
283
{ok,SID} -> % The pid is the SID.
284
NewLD=add_session_ld(NodesNotIn,SID,LD), % Add the new session to loopdata.
285
{noreply,NewLD,get_timeout_ld(NewLD)}; % Reply will come from SID-pid.
287
{noreply,LD,get_timeout_ld(LD)}
290
handle_call({cancel_session,SID},_From,LD) ->
291
Sessions=get_sessions_ld(LD),
292
case lists:member(SID,Sessions) of % Check amongst our SIDs.
293
true -> % Ok, it is a valid SID!
294
inviso_tool_sh:cancel_session(SID),
295
case delete_session_ld(SID,LD) of
296
{NewLD,?LOCAL_RUNTIME} -> % The non-distributed case.
297
{reply,ok,NewLD,get_timeout_ld(NewLD)};
299
{reply,{ok,Nodes},NewLD,get_timeout_ld(NewLD)}
301
false -> % Incorrect SID.
302
{reply,{error,unknown_session},LD,get_timeout_ld(LD)}
305
handle_call({stop_session,SID,Dir,Prefix},_From,LD) ->
306
Sessions=get_sessions_ld(LD),
307
case lists:member(SID,Sessions) of % Check amongst our SIDs.
308
true -> % Ok, it is a valid SID!
309
case inviso_tool_sh:stop_session(SID,Dir,Prefix) of
310
{ok,{FailedNodes,FetchedFiles}} ->
311
case delete_session_ld(SID,LD) of
312
{NewLD,[?LOCAL_RUNTIME]} -> % The non-distributed case.
313
{reply,{ok,[{failed_nodes,FailedNodes},
314
{files,FetchedFiles}]},
316
get_timeout_ld(NewLD)};
317
{NewLD,Nodes} -> % Nodes we removed from the session.
318
{reply,{ok,[{nodes,Nodes},
319
{failed_nodes,FailedNodes},
320
{files,FetchedFiles}]},
322
get_timeout_ld(NewLD)}
325
false -> % Incorrect SID.
326
{reply,{error,unknown_session},LD,get_timeout_ld(LD)}
329
handle_call(What,_From,LD) ->
330
inviso_tool_lib:debug(garbage,get_dbg_ld(LD),[What]),
331
{reply,{error,unknown_request},LD}.
333
% handle_call({run_suite,Sessions},_From,LD) ->
334
% {SuiteID,SData,Result}=handle_run_suite(Sessions,LD),
335
% {reply,{ok,SuiteID,Result},update_sdata_ld(SData,LD)};
339
%% Handling of periodic timeouts if used.
340
%% We search for new nodes, that is runtime components listed as ours but
341
%% not been able to own yet.
342
handle_info(timeout,LD) ->
343
case timeout_check_new_nodes(get_ctrlnode_node_ld(LD),
347
false -> % No new nodes found, or all connected.
348
{noreply,LD,get_timeout_ld(LD)};
349
{ok,NewNodeData} -> % New nodes found and connected.
350
handle_send_event(get_eventpid_ld(LD),?E_CONNECT_NODE),
351
{noreply,put_nodedata_ld(NewNodeData,LD),get_timeout_ld(LD)};
352
{error,_Reason} -> % Some searious problem occured.
353
{noreply,LD,get_timeout_ld(LD)} % Here we can actually become inconsistent!
356
%% Receiving subscribed events from the trace control component.
357
handle_info({trace_event,CtrlPid,_When,{disconnected,Node,_}},LD) ->
358
case get_ctrlpid_ld(LD) of
359
CtrlPid -> % It is indeed from our control component.
360
case trace_event_disconnect_node(Node,get_nodedata_ld(LD)) of
361
{ok,NewNodeData} -> % It was one of our nodes.
362
handle_send_event(get_eventpid_ld(LD),?E_DISCONNECT_NODE),
363
inviso_tool_lib:debug(trace_event,get_dbg_ld(LD),[?TCCE_DISCONNECTED,Node]),
364
{noreply,put_nodedata_ld(NewNodeData,LD),get_timeout_ld(LD)};
365
false -> % It was not one of our nodes.
366
{noreply,LD,get_timeout_ld(LD)} % Ignore it!
368
_ -> % Hmm, not from our control component!
369
{noreply,LD,get_timeout_ld(LD)} % Ignore it!
372
%% A runtime component connects to our trace control component. Either because
373
%% we ordered it to, or because it spontaneously wanted to. We do only accept
374
%% runtime components having our reference tag and that are specified as being
376
handle_info({trace_event,CtrlPid,_When,{connected,Node,{Tag,_}}},LD) ->
377
case get_ctrlpid_ld(LD) of
378
CtrlPid -> % It is indeed from our control component.
379
case trace_event_is_my_tag(Tag,get_rttag_ld(LD)) of
380
true -> % This node is allowed join.
381
case trace_event_connect_node(Node,get_nodedata_ld(LD)) of
383
handle_send_event(get_eventpid_ld(LD),?E_CONNECT_NODE),
384
inviso_tool_lib:debug(trace_event,
386
[?TCCE_CONNECTED,Node]),
387
{noreply,put_nodedata_ld(NewNodeData,LD),get_timeout_ld(LD)};
388
false -> % The node is not set-up as our node.
389
{noreply,LD,get_timeout_ld(LD)}
391
false -> % Not previously started by "us".
392
inviso_tool_lib:inviso_cmd(get_ctrlnode_node_ld(LD),stop_nodes,[[Node]]),
393
inviso_tool_lib:debug(trace_event,get_dbg_ld(LD),[?TCCE_CONNECTED,Node]),
394
inviso_tool_lib:debug(trace_event,get_dbg_ld(LD),[stopped_node,Node]),
395
{noreply,LD,get_timeout_ld(LD)}
397
_ -> % Hmm, not from our control component!
398
{noreply,LD,get_timeout_ld(LD)} % Ignore it!
401
%% We expect exit messages from our session handlers. If a session handler has
402
%% terminated we want to clean up and mark participating nodes as no longer part
403
%% of the no longer existing session.
404
%% We distinguage between the 'normal' reason and other terminations. If it
405
%% terminates normally, the session handler has been stopped due to that we are
406
%% actually stopping the session. No cleaning necessary then (since the handle
407
%% function for stop cleans!).
408
handle_info({'EXIT',_,normal},LD) -> % Do nothing when reason 'normal'.
409
{noreply,LD,get_timeout_ld(LD)}; % The SID is already removed!(?).
410
handle_info({'EXIT',Pid,Reason},LD) ->
411
case lists:member(Pid,get_sessions_ld(LD)) of % Is it a SID?
412
true -> % It was one of our sessions.
413
{NewNodeData,Nodes}=ld_nodedata_remove_sid(Pid,get_nodedata_ld(LD)),
414
io:format("Session handler ~w crashed:~n~p~n",[Pid,Reason]),
415
handle_send_event(get_eventpid_ld(LD),[?E_END_SESSION,Pid,Nodes]),
416
inviso_tool_lib:debug(session_handler,get_dbg_ld(LD),[Pid,Reason,Nodes]),
417
{noreply,put_nodedata_ld(NewNodeData,LD),get_timeout_ld(LD)};
418
false -> % It was not one of our sessions.
419
{noreply,LD,get_timeout_ld(LD)}
423
{noreply,LD,get_timeout_ld(LD)}.
426
%% ==============================================================================
427
%% First level help functions to handle_call functions.
428
%% ==============================================================================
430
%% Help function to nodes/0 call-back. Nodes is the list from our loop data
431
%% containing the status of all nodes, connected and not connected.
432
handle_nodes(NodeData) ->
433
handle_nodes_2(ld_nodedata_mk_list(NodeData)).
435
handle_nodes_2([{Node,Status,_SID}|Rest]) ->
436
[{Node,ld_nodedata_translate_to_ui_indication(Status)}|handle_nodes_2(Rest)];
437
handle_nodes_2([]) ->
439
%% ------------------------------------------------------------------------------
443
%% ------------------------------------------------------------------------------
446
%% ------------------------------------------------------------------------------
447
%% Help functions for run_suite.
448
%% ------------------------------------------------------------------------------
450
%% Help function which starts the sessions of this suite. A session handling
451
%% process is spawned for each session successfully started with the trace control
453
%% Returns {SuiteID,SessionDataStructur,Result}.
454
% handle_run_suite(Sessions,LD) ->
455
% SData=lookup_sdata_ld(LD),
456
% CtrlNode=lookup_ctrlnode_ld(LD),
457
% handle_run_suite_2(Sessions,CtrlNode,SData,make_ref(),[]).
460
% handle_run_suite_2([{SName,Session}|Rest],CtrlNode,SData,SuiteID,Accum) ->
461
% case h_run_suite_start_session(CtrlnNode,Session) of
462
% {ok,SessionID,Result,FileInstr} -> % Started a session..
463
% Pid=session_handler(CtrlNode,SessionID,Result,TraceCases,FileInstr),
464
% NewSData=put_session_sdata(SName,SessionID,SuiteID,Pid),
465
% handle_run_suite_2(Rest,CtrlNode,NewSData,[{SName,ok}|Accum]);
466
% {error,Reason} -> % Could not start the session.
467
% NewSData=put_session_sdata(SName,error,SuiteID,void),
468
% handle_run_suite_2(Rest,CtrlNode,NewSData,SuiteID,[{SName,{error,Reason}}|Accum])
470
% handle_run_suite_2([],_,SData,SuiteID,Accum) ->
471
% {SuiteID,SData,Accum}.
473
% h_run_suite_start_session(void,Session) -> % Non distributed case.
474
% SessionParams=get_s_params_session(Session),
475
% FileInstr=get_fileinstr_session(Session), % How to fetch files when finished.
476
% case inviso:start_session() of
477
% {ok,{SessionID,Result}} -> % Session started.
478
% {ok,SessionID,Result,FileInstr};
479
% {error,Reason} -> % Some general failure.
482
% h_run_suite_start_session(CtrlNode,Session) ->
483
% Nodes=get_nodes_session(Session), % Get all desired Erlang nodes.
484
% SessionParams=get_s_params_session(Session),
485
% FileInstr=get_fileinstr_session(Session), % How to fetch files when finished.
487
% CtrlNode -> % We are at the control node, no rpc.
488
% case inviso:start_session(Nodes) of
489
% {ok,{SessionID,Result}} -> % Session started at control component.
490
% {ok,SessionID,Result,FileInstr};
491
% {error,Reason} -> % Some general failure.
494
% _ -> % We are at other node than control component.
495
% case rpc:call(CtrlNode,inviso,start_session,[Nodes]) of
496
% {ok,{SessionID,Result}} -> % Session started at control component.
497
% {ok,SessionID,Result};
501
% {error,{badrpc,{CtrlNode,Reason}}}
504
%% ------------------------------------------------------------------------------
506
%% ------------------------------------------------------------------------------
507
%% Help functions for handle_info.
508
%% ------------------------------------------------------------------------------
510
%% Help function which traverses the NodeData structure and tries to connect/start
511
%% runtime components on all nodes where we not already have one.
512
%% NodeData contains the current status (according to internal status notation).
513
%% In that we can find the nodes that are not running.
514
%% Returns {ok,NewNodeData} or 'false' if no changes have been made. Can also
515
%% return {error,Reason} if serious problem was encountered.
516
%% If a "serious" problem occurred we may very well be in an undefined state.
517
%% Meaning we have added nodes we don't want and are unable to get rid of.
518
timeout_check_new_nodes(CtrlNode,NodeData,RTtag,Dbg) ->
519
timeout_check_new_nodes_2(CtrlNode,ld_nodedata_mk_list(NodeData),RTtag,Dbg,[],false).
521
timeout_check_new_nodes_2(CtrlNode,[X={Node,Status,SID}|Rest],RTtag,Dbg,Accum,Flag) ->
522
case ld_nodedata_is_running(Status) of
523
false -> % Then try to start it!
524
case timeout_check_new_nodes_3(CtrlNode,Node,RTtag,Dbg) of
525
{ok,NewStatus} -> % Managed to start it this time.
526
timeout_check_new_nodes_2(CtrlNode,Rest,RTtag,Dbg,
527
[{Node,NewStatus,SID}|Accum],true);
528
false -> % No still no success.
529
timeout_check_new_nodes_2(CtrlNode,Rest,RTtag,Dbg,
530
[{Node,Status,SID}|Accum],Flag);
531
{error,Reason} -> % Serious problem, cancel the operation.
534
true -> % Running! No need to start it then.
535
timeout_check_new_nodes_2(CtrlNode,Rest,RTtag,Dbg,[X|Accum],Flag) % Do nothing.
537
timeout_check_new_nodes_2(_,[],_RTtag,_Dbg,Accum,true) -> % There has been some change, report it.
538
{ok,ld_nodedata_mk_struct(lists:reverse(Accum))};
539
timeout_check_new_nodes_2(_,[],_RTtag,_Dbg,_Accum,false) -> % No changes, drop Accum.
542
timeout_check_new_nodes_3(void,_Node,RTtag,Dbg) -> % The non-distributed case.
543
case inviso:add_node(trace_c_tag(RTtag)) of
544
{ok,{{tag,Tag},_State}} when Tag=/=RTtag -> % We don't want this one.
545
case inviso:stop_node() of
547
inviso_tool_lib:debug(stop_wrong_tag,Dbg,[local_runtime,ok]),
549
{error,Reason} -> % Strange could not stop the node!
550
inviso_tool_lib:debug(stop_wrong_tag,Dbg,[local_runtime,{error,Reason}]),
551
{error,{stop_node,local_runtime}}
553
{ok,already_added} -> % We expect a connect event then.
554
false; % Wait for it instead.
556
{ok,ld_nodedata_translate_to_status(Result)};
558
inviso_tool_lib:debug(add_node,Dbg,[local_runtime,{error,Reason}]),
561
timeout_check_new_nodes_3(CtrlNode,Node,RTtag,Dbg) ->
562
case inviso_tool_lib:inviso_cmd(CtrlNode,add_nodes,[[Node],trace_c_tag(RTtag)]) of
563
{ok,[{Node,{ok,already_added}}]} -> % Strange, must have added it self.
564
false; % Wait for the event message instead.
565
{ok,[{Node,{error,_Reason}}]} -> % Can still not start this runtime comp.
567
{ok,[{Node,{ok,{{tag,Tag},_State}}}]} when Tag=/=RTtag -> % Don't want this one!
568
case inviso_tool_lib:inviso_cmd(CtrlNode,stop_nodes,[[Node]]) of
569
{ok,_} -> % Managed to stop it.
570
inviso_tool_lib:debug(stop_wrong_tag,Dbg,[[Node],ok]),
571
false; % Indicate it as no started node!
572
{error,Reason} -> % This is really difficult.
573
inviso_tool_lib:debug(stop_wrong_tag,Dbg,[[Node],{error,Reason}]),
574
{error,{stop_nodes,Reason}}
576
{ok,[{Node,Result}]} -> % We managed to get control over it!
577
{ok,ld_nodedata_translate_to_status(Result)};
578
{error,Reason} -> % Strange, some kind of general failure.
579
inviso_tool_lib:debug(add_nodes,Dbg,[[Node],{error,Reason}]),
580
{error,{add_nodes,Reason}}
582
%% ------------------------------------------------------------------------------
585
%% Help function which changes the internal status for a node in the nodes
586
%% list found the loop data.
587
%% Returns {ok,NewNodeData} or 'false' if no changes were made to NodeData.
588
trace_event_disconnect_node(Node,NodeData) ->
589
case ld_nodedata_get_status(Node,NodeData) of
590
{ok,_Status} -> % It is one of our nodes!
591
{ok,ld_nodedata_update_status(Node,ld_nodedata_disconnected_status(),NodeData)};
592
false -> % No it is not our node.
595
%% ------------------------------------------------------------------------------
597
%% Help function which takes a nodedata structure of nodes from the Loop data
598
%% structure and marks a node as running. Note that we will only do that
599
%% for a node which is part of our set of nodes.
600
%% Returns 'false' or {ok,NewNodeData}.
601
trace_event_connect_node(Node,NodeData) ->
602
case ld_nodedata_get_status(Node,NodeData) of
603
{ok,_Status} -> % It is one of our nodes!
604
{ok,ld_nodedata_update_status(Node,ld_nodedata_running_status(),NodeData)};
605
false -> % No it is not our node.
608
%% ------------------------------------------------------------------------------
610
%% Help function which compares two tags to find out if it is one "we" have
612
%% Returns 'true' or 'false'.
613
trace_event_is_my_tag(MyTag,MyTag) ->
615
trace_event_is_my_tag(_,_MyTag) ->
617
%% ------------------------------------------------------------------------------
620
%% Help function which sends an event to our subscriber. Note that this function
621
%% must handle the case when there is no subscriber registered.
622
%% Returns nothing significant.
623
handle_send_event(Pid,What) when pid(Pid) ->
624
Pid ! {inviso_tool_event,self(),now(),What};
625
handle_send_event(_,_) -> % The case when no subscriber.
627
%% ------------------------------------------------------------------------------
631
%% ==============================================================================
632
%% Various help functions.
633
%% ==============================================================================
636
%% Help function which returns the tag with which we cn identify if a runtime
637
%% component was previously controlled by "us" or not.
638
trace_c_tag(RTtag) ->
640
%% -----------------------------------------------------------------------------
642
%% Help function which takes a return value from add_nodes and stops all
643
%% runtimes that were adopted. I.e they were already running and had another
644
%% reference tag than our own. If the keep connecting mechanism is activated
645
%% the stopped nodes may be connected later.
646
%% Returns a new Result list with the stopped nodes removed.
647
stop_adopted_nodes(NodeName,RTtag,Result,Dbg) ->
648
Nodes=lists:foldl(fun({N,{ok,{adopted,_S,_Status,Tag}}},Acc) when Tag=/=RTtag -> [N|Acc];
652
Result), % Get all "problem" nodes.
653
case inviso_tool_lib:inviso_cmd(NodeName,stop_nodes,[Nodes]) of
655
inviso_tool_lib:debug(stop_wrong_tags,Dbg,[Result,StopResult]),
656
Result2=lists:filter(fun({N,{ok,_}}) ->
657
case lists:member(N,Nodes) of
658
true -> % Remove this node.
660
false -> % Keep in result list.
663
(_) -> % Keep it in all other cases.
666
Result), % A Result without the "wrong" nodes.
669
inviso_tool_lib:debug(stop_wrong_tags,Dbg,[Result,{error,Reason}]),
672
%% -----------------------------------------------------------------------------
674
%% Help function which of the nodes in NodeParams finds out which nodes are not
675
%% already active in a session.
676
%% Returns {NodeInASession,NodesNotInASession}.
677
%% Note that this function can only be used for the distributed situation.
678
%% NodeParams=[{Node,TracerData},...]
679
not_in_session(NodeDataList,NodeParams) ->
680
not_in_session_2(NodeDataList,NodeParams,[],[]).
682
not_in_session_2(NodeDataList,[NodeParam|Rest],AccIn,AccNotIn)
683
when tuple(NodeParam),size(NodeParam)>1 ->
684
Node=element(1,NodeParam),
685
case lists:keysearch(Node,1,NodeDataList) of
686
{value,{_,_,SID}} when pid(SID) -> % This one is in a session already.
687
not_in_session_2(NodeDataList,Rest,[Node|AccIn],AccNotIn);
688
_ -> % Not in a session, add to accum.
689
not_in_session_2(NodeDataList,Rest,AccIn,[Node|AccNotIn])
691
not_in_session_2(NodeDataList,[_|Rest],AccIn,AccNotIn) -> % Don't understand.
692
not_in_session_2(NodeDataList,Rest,AccIn,AccNotIn); % Skip it!
693
not_in_session_2(_,[],AccIn,AccNotIn) ->
694
{lists:reverse(AccIn),lists:reverse(AccNotIn)}.
695
%% -----------------------------------------------------------------------------
698
%% -----------------------------------------------------------------------------
699
%% Functions handling start options.
700
%% -----------------------------------------------------------------------------
702
-define(SAFETY_CATCHES,safety_catches). % Own defined safety catches.
703
-define(KEEP_CONNECTING,keep_connecting). % Periodically try to connect.
704
-define(SUBSCRIBE_EVENTS,subscribe_events). % Have the tool inform someone about events.
705
-define(DEBUG,debug). % Make the tool print own debug info.
706
-define(RTTAG,rtref). % The reference used for our nodes.
708
-define(DBG_OFF,off). % No internal debug indicator.
710
%% This function extracts options given to the start-functions.
711
%% Returns {ok,SafetyCatches,TimeOutMillisec,EventPid,Dbg} or {error,Reason}.
712
get_and_check_init_options(Opts) when list(Opts) ->
713
case (catch get_and_check_init_options_2(Opts)) of
719
get_and_check_init_options(_) ->
722
get_and_check_init_options_2(Opts) ->
724
case lists:keysearch(?SAFETY_CATCHES,1,Opts) of
725
{value,{_,SCs}} when list(SCs) ->
726
case check_safetycatches(SCs) of
729
{error,Reason} -> % Then we will abort everything.
730
throw({error,{bad_safetycatch,Reason}})
732
{value,{_,SC={_,_}}} -> % Lets tolerate a single tuple too.
733
SC; % We already made the check, return it!
737
TimeOut= % Shall we contineously connect to nodes?
738
case lists:member(?KEEP_CONNECTING,Opts) of
739
true -> % Default connect interval.
742
case lists:keysearch(?KEEP_CONNECTING,1,Opts) of
743
{value,{_,T}} when integer(T) ->
745
_ -> % Do not use serverside timeouts!
749
EventPid= % Process subscribing to events from tool.
750
case lists:keysearch(?SUBSCRIBE_EVENTS,1,Opts) of
751
{value,{_,Pid}} when pid(Pid) ->
757
case lists:keysearch(?DEBUG,1,Opts) of
761
?DBG_OFF % Indicates that internal debug is off!
764
case lists:keysearch(?RTTAG,1,Opts) of
765
{value,{_,Tag}} -> % The reference used to indicate that
766
Tag; % it is our runtime component.
770
{ok,SafetyCatches,TimeOut,EventPid,Dbg,RTtag}.
771
%% -----------------------------------------------------------------------------
773
%% Help function which just checks that it is a list of two-tuples with atoms.
774
check_safetycatches([{M,F}|Rest]) when atom(M),atom(F) ->
775
check_safetycatches(Rest);
776
check_safetycatches([Item|_]) ->
778
check_safetycatches([]) ->
780
%% -----------------------------------------------------------------------------
783
%% -----------------------------------------------------------------------------
784
%% Functions handling loop-data
785
%% -----------------------------------------------------------------------------
787
%% Loopdata is a record:
789
%% ctrlnode : The Erlang node where the control component runs, or 'void'
790
%% {node,NodeName} or {{M,F,Args},NodeName}.
791
%% ctrlpid : The pid of the control component.
792
%% safetycatches: A list of {Mod,Func} of all safety catch functions.
793
%% nodedata : A datastructure decribing all our runtime components and
794
%% their current status. See below separate primitives.
795
%% timeout : gen_server server side timeout for the inviso tool.
796
%% eventpid : Subscriber to events generated by the tool.
797
%% dbg : inviso tool internal debug flag.
798
%% rttag : The tag indicating our trace run-time component processes.
801
%% Macro constants used to highlight that we are dependant of the return
802
%% values from trace control component APIs.
803
-record(ld,{ctrlnode,ctrlpid,safetycatches,nodedata,
804
timeout,eventpid,dbg,rttag,sessions}).
806
mk_ld(CtrlNode,Pid,Result,SafetyCatches,TimeOut,EventPid,Dbg,RTtag) ->
807
NodeData=ld_nodedata_mk_nodedata(Result),
811
safetycatches=SafetyCatches,
819
%% -----------------------------------------------------------------------------
821
%% Primitive fetching the nodename component of the ctrlnode field.
822
get_ctrlnode_node_ld(#ld{ctrlnode={_,NodeName}}) -> NodeName;
823
get_ctrlnode_node_ld(#ld{ctrlnode=void}) -> void.
824
%% -----------------------------------------------------------------------------
826
get_ctrlpid_ld(#ld{ctrlpid=CtrlPid}) -> CtrlPid.
827
%% -----------------------------------------------------------------------------
829
get_catches_ld(#ld{safetycatches=SC}) -> SC.
830
%% -----------------------------------------------------------------------------
832
%% Primitive fetching the NodeData structure.
833
get_nodedata_ld(#ld{nodedata=NodeData}) -> NodeData.
834
put_nodedata_ld(NodeData,LD) -> LD#ld{nodedata=NodeData}.
835
%% -----------------------------------------------------------------------------
837
get_timeout_ld(#ld{timeout=TimeOut}) -> TimeOut.
838
%% -----------------------------------------------------------------------------
840
get_eventpid_ld(#ld{eventpid=EventPid}) -> EventPid.
841
%% -----------------------------------------------------------------------------
843
get_dbg_ld(#ld{dbg=DBG}) -> DBG.
844
%% -----------------------------------------------------------------------------
846
get_rttag_ld(#ld{rttag=RTtag}) -> RTtag.
847
%% -----------------------------------------------------------------------------
849
%% Function adding a new session to the loop data structure.
850
%% Note that adding a session involves putting it in the sessions list but
851
%% also updating all nodedata.
852
%% NodeData is a list of all nodes which possibly can be part of the session.
853
%% That means that the SID may be set for a runtime component node that is not
855
add_session_ld(Nodes,SID,LD) ->
856
NewNodeData=add_session_ld_2(Nodes,SID,ld_nodedata_mk_list(LD#ld.nodedata),[]),
857
LD#ld{nodedata=ld_nodedata_mk_struct(NewNodeData),
858
sessions=[SID|LD#ld.sessions]}.
860
add_session_ld_2(Nodes,SID,[X={Node,Status,_SID}|Rest],Accum) ->
861
case lists:member(Node,Nodes) of
862
true -> % Set the SID for this node!
863
add_session_ld_2(Nodes,SID,Rest,[{Node,Status,SID}|Accum]);
865
add_session_ld_2(Nodes,SID,Rest,[X|Accum])
867
add_session_ld_2(_,_,[],Accum) ->
868
lists:reverse(Accum).
869
%% -----------------------------------------------------------------------------
871
%% Function which removes a session from the loopdata structure.
872
%% It both removes it from the nodedata structure and from the sessions list.
873
%% Returns {NewLD,Nodes} where Nodes is a list of all nodes changed in the
874
%% nodedata structure.
875
delete_session_ld(SID,LD) ->
876
{NewNodeDataList,Nodes}=
877
delete_session_ld_2(SID,ld_nodedata_mk_list(get_nodedata_ld(LD)),[],[]),
878
NewSessions=lists:delete(SID,get_sessions_ld(LD)),
879
{LD#ld{nodedata=ld_nodedata_mk_struct(NewNodeDataList),sessions=NewSessions},
882
delete_session_ld_2(SID,[{Node,Status,SID}|Rest],AccNDL,AccNodes) ->
883
delete_session_ld_2(SID,Rest,[{Node,Status,void}|AccNDL],[Node|AccNodes]);
884
delete_session_ld_2(SID,[NodeDataTuple|Rest],AccNDL,AccNodes) ->
885
delete_session_ld_2(SID,Rest,[NodeDataTuple|AccNDL],AccNodes);
886
delete_session_ld_2(_,[],AccNDL,AccNodes) ->
887
{lists:reverse(AccNDL),AccNodes}.
888
%% -----------------------------------------------------------------------------
890
%% Returns a list of all existing sessions and their SIDs. A list of
892
get_sessions_ld(#ld{sessions=Sessions}) -> Sessions.
893
%% -----------------------------------------------------------------------------
896
%% -----------------------------------------------------------------------------
897
%% Functions working on our internal representation of runtime nodes, node_data.
898
%% -----------------------------------------------------------------------------
899
%% The nodedata structure is currently a list of {Node,Status,SID} or {Status,SID}
900
%% in the non-distributed case. where Status describes the current condition of
901
%% the node. SID is a pid if the node currently belongs to a session
902
%% (then handled by that session handler pid).
903
%% All nodes which have been specified to be owned by our trace control component
904
%% will be part of the nodedata structure.
906
%% A node can be in one of these states:
907
%% (1) running : Normal, "we" started a fresh runtime here.
908
%% (2) rejected : Runtime already controlled by someone else.
909
%% (3) {error,Reason}: Unavailable due to some reason.
911
%% It is not the work of the inviso tool master process to know in what state a runtime
912
%% component is. That is the work of a session handler process!
915
%% Help function which converts a list returned from the trace control components
916
%% start function. Returns a nodedata data structure which is supposed to be
917
%% stored in the loopdata field nodedata.
918
%% Note that this function my only be used on a return value comming from a call
919
%% where we do not accept runtime components started with another tag than our own.
920
%% This since there is no mechanism which converts such returned statuses to an
921
%% {error,Reason} instead.
922
ld_nodedata_mk_nodedata(Result) ->
923
lists:map(fun({N,Status}) ->
924
{N,ld_nodedata_translate_to_status(Status),void}
928
%% Help function which converts the status value returned from the trace control
929
%% component when starting a runtime component to our internal status values.
930
ld_nodedata_translate_to_status({error,refused}) -> % The rt is owned by other.
932
ld_nodedata_translate_to_status({error,Reason}) -> {error,Reason};
933
ld_nodedata_translate_to_status({ok,new}) -> % Never a problem when freshly started.
934
ld_nodedata_running_status();
935
ld_nodedata_translate_to_status({ok,{adopted,_State,_Status,_Tag}}) ->
936
ld_nodedata_running_status();
937
ld_nodedata_translate_to_status({ok,already_added}) -> % We will get a connect event.
938
{error,already_added}. % Therefore set it in error for now.
939
%% -----------------------------------------------------------------------------
941
%% Function which returns the current status for a certain Node.
942
%% Returns {ok,Status} or 'false' if Node does not exist in the NodeData structure.
943
ld_nodedata_get_status(Node,NodeData) ->
944
case lists:keysearch(Node,1,NodeData) of
945
{value,{_,Status,_SID}} ->
950
%% -----------------------------------------------------------------------------
952
%% Function changing the status of and existing node.
953
%% Returns a new nodedata structure.
954
ld_nodedata_update_status(NodeName,Status,NodeData) ->
955
case lists:keysearch(NodeName,1,NodeData) of
957
lists:keyreplace(NodeName,1,NodeData,{NodeName,Status,SID});
958
false -> % Then no use changing it.
961
%% -----------------------------------------------------------------------------
963
%% Function which removes a session identifier pid from the nodedata structure.
964
%% This is necessary when a session has been ended and the SID no longer exists.
965
%% Returns {NewNodeData,Nodes} where Nodes is a list of all nodes that were
967
ld_nodedata_remove_sid(SID,NodeData) ->
968
{NewNDList,Nodes}=ld_nodedata_remove_sid_2(SID,ld_nodedata_mk_list(NodeData),[],[]),
969
{ld_nodedata_mk_struct(NewNDList),Nodes}.
971
ld_nodedata_remove_sid_2(SID,[{Node,Status,SID}|Rest],AccND,AccNodes) ->
972
ld_nodedata_remove_sid_2(SID,Rest,[{Node,Status,void}|AccND],[Node|AccNodes]);
973
ld_nodedata_remove_sid_2(SID,[NodeDataTuple|Rest],AccND,AccNodes) ->
974
ld_nodedata_remove_sid_2(SID,Rest,[NodeDataTuple|AccND],AccNodes);
975
ld_nodedata_remove_sid_2(_,[],AccND,AccNodes) ->
976
{lists:reverse(AccND),AccNodes}.
977
%% -----------------------------------------------------------------------------
979
%% Function which takes a nodedata structure and returns a list of
980
%% {Node,Status,SID}.
981
ld_nodedata_mk_list(NodeData) -> NodeData.
982
%% -----------------------------------------------------------------------------
984
%% Function which takes a nodedata list and returns a nodedata structure
985
%% as it is supposed to be stored in the loopdata structure.
986
ld_nodedata_mk_struct(NodeList) -> NodeList.
987
%% -----------------------------------------------------------------------------
989
%% This function translates the internal status indication to the one used
990
%% in the nodes/0 api.
991
ld_nodedata_translate_to_ui_indication(running) -> running;
992
ld_nodedata_translate_to_ui_indication(_) -> unavailable.
993
%% -----------------------------------------------------------------------------
995
%% Help function which returns the internal status indication for a running
996
%% runtime component.
997
ld_nodedata_running_status() -> running.
998
%% -----------------------------------------------------------------------------
1000
%% Help function which returns the internal status indication for a disconnected
1001
%% runtime component.
1002
ld_nodedata_disconnected_status() -> {error,disconnected}.
1003
%% -----------------------------------------------------------------------------
1005
%% Help function which returns 'true' or 'false' depending on whether the
1006
%% argument indicates a node to be running or not.
1007
ld_nodedata_is_running(running) -> true;
1008
ld_nodedata_is_running(_) -> false.
1009
%% -----------------------------------------------------------------------------