3
"$Id: task.c,v 1.15 1997/12/01 22:17:43 pvmsrc Exp $";
6
* PVM version 3.4: Parallel Virtual Machine System
7
* University of Tennessee, Knoxville TN.
8
* Oak Ridge National Laboratory, Oak Ridge TN.
9
* Emory University, Atlanta GA.
10
* Authors: J. J. Dongarra, G. E. Fagg, M. Fischer
11
* G. A. Geist, J. A. Kohl, R. J. Manchek, P. Mucci,
12
* P. M. Papadopoulos, S. L. Scott, and V. S. Sunderam
13
* (C) 1997 All Rights Reserved
17
* Permission to use, copy, modify, and distribute this software and
18
* its documentation for any purpose and without fee is hereby granted
19
* provided that the above copyright notice appear in all copies and
20
* that both the copyright notice and this permission notice appear in
21
* supporting documentation.
23
* Neither the Institutions (Emory University, Oak Ridge National
24
* Laboratory, and University of Tennessee) nor the Authors make any
25
* representations about the suitability of this software for any
26
* purpose. This software is provided ``as is'' without express or
29
* PVM version 3 was funded in part by the U.S. Department of Energy,
30
* the National Science Foundation and the State of Tennessee.
39
* Revision 1.15 1997/12/01 22:17:43 pvmsrc
40
* Fixed tracer registry problem.
41
* - in task_cleanup(), if exited task was the tracer, forward a
42
* DM_SLCONF_TRACE message to all hosts to clear tracer setting.
45
* Revision 1.14 1997/06/27 20:05:27 pvmsrc
46
* Integrated WIN32 changes.
48
* Revision 1.13 1997/06/27 19:22:08 pvmsrc
49
* Task struct updated to hold message state info about its last message
51
* Avoids duplicate messages and helps migrated/new RMs understand task
54
* Revision 1.12 1997/06/02 13:50:05 pvmsrc
55
* Added missing #include host.h for waitc.h.
57
* Revision 1.11 1997/05/12 20:28:20 pvmsrc
58
* Removed duplicate #includes...
60
* Revision 1.10 1997/04/30 21:26:29 pvmsrc
61
* SGI Compiler Warning Cleanup.
63
* Revision 1.9 1997/04/10 17:52:35 pvmsrc
64
* Added WT_RECVINFO case to task_cleanup().
65
* - if task died while waiting for a reply from pvm_recvinfo(),
66
* wipe out the wait context.
68
* Revision 1.8 1997/04/08 19:42:57 pvmsrc
69
* *** Added new system reset protocol / wait linkage.
70
* - new DM_RESET / dm_reset() & DM_RESETACK / dm_resetack().
71
* - new WT_RESET wait type.
72
* - modified dm_db() to include new TMDB_RESET(nnr, noresets) option:
73
* * clean up mboxes, except for "no-reset" tasks.
74
* * for persistent mboxes, set up WT_RESET to remove mbox on task
75
* exit, propagate to task's host via DM_RESET.
76
* * on task exit, WT_RESET wipes mbox out, DM_RESETACK passes
77
* word on to master pvmd (if necessary).
79
* Revision 1.7 1997/04/04 15:45:39 pvmsrc
80
* Take into account the number of system contexts (NUM_SYSCTX)
81
* that are preallocated.
83
* Revision 1.6 1997/02/13 19:05:07 pvmsrc
84
* Fixed mbox cleanup problem:
85
* - in dm_db() for TMDB_PUT case, if successful create master PVMD
86
* notify on inserting task (if task not local, forward DM_NOTIFY).
87
* - then on task exit, call mb_tidy() if WT_TASKX notify wait context
88
* exists (in hostfailentry() and task_cleanup()), or if empty
89
* notify propagates back to master PVMD via DM_NOTIFYACK.
91
* Revision 1.5 1997/02/13 15:10:04 pvmsrc
92
* Removed unnecessary extern for struct waitc *waitlist.
95
* Revision 1.4 1997/01/28 19:27:32 pvmsrc
96
* New Copyright Notice & Authors.
98
* Revision 1.3 1996/10/25 13:58:07 pvmsrc
99
* Replaced old #includes for protocol headers:
100
* - <pvmsdpro.h>, "ddpro.h", "tdpro.h"
101
* With #include of new combined header:
104
* Revision 1.2 1996/10/24 22:10:04 pvmsrc
105
* Moved #include "global.h" below other #include's for typing.
106
* Added #include <pvmtev.h> for tracing constants.
107
* Added extern struct Pvmtracer pvmtracer for tracer info.
108
* Modified checking of trctid / outtid:
109
* - check for > 0, not non-zero, to handle new case where task
110
* denies external collection.
111
* Replaced inline code for pvmd trace events with new routine calls:
112
* - tev_send_endtask().
113
* On task exit, check for tracer registration -> clear tracer info...
115
* Revision 1.1 1996/09/23 23:44:43 pvmsrc
118
* Revision 1.8 1995/09/05 19:26:36 manchek
119
* clear wait WT_HOSTF in task_cleanup
121
* Revision 1.7 1995/05/17 16:53:35 manchek
122
* don't need mytid anymore
124
* Revision 1.6 1995/02/06 18:45:45 manchek
125
* added stuff to task_dump
127
* Revision 1.5 1994/11/08 15:40:59 manchek
128
* shared memory cleanup.
129
* check if wa_tid is zero before sending message in task_cleanup
131
* Revision 1.4 1994/10/15 19:32:49 manchek
132
* added log in task_free()
134
* Revision 1.3 1994/06/03 20:38:28 manchek
137
* Revision 1.2 1993/11/30 15:55:25 manchek
138
* task_free() deletes auth file if one exists
140
* Revision 1.1 1993/08/30 23:26:51 manchek
146
#include <machine/endian.h>
152
#include <sys/endian.h>
155
#include <rpc/types.h>
157
#include <sys/time.h>
158
#include <sys/socket.h>
159
#include <netinet/in.h>
162
#include "..\xdr\types.h"
163
#include "..\xdr\xdr.h"
166
#include <sys/types.h>
175
#include <pvmproto.h>
181
#include "pvmalloc.h"
194
extern void pvmbailout();
196
extern int busyadding; /* from ddpro.c */
197
extern int pvmdebmask; /* from pvmd.c */
198
extern int hostertid; /* from pvmd.c */
199
extern struct htab *hosts; /* from pvmd.c */
200
extern int myhostpart; /* from pvmd.c */
201
extern int taskertid; /* from pvmd.c */
202
extern struct Pvmtracer pvmtracer; /* from pvmd.c */
203
extern int tidlmask; /* from pvmd.c */
205
struct task *locltasks = 0; /* all task structs sorted by tid */
208
void tev_send_endtask();
215
static struct ccon *cconlist = 0;
225
* Call once before any other task stuff.
232
locltasks = TALLOC(1, struct task, "task");
233
BZERO((char*)locltasks, sizeof(struct task));
234
locltasks->t_link = locltasks;
235
locltasks->t_rlink = locltasks;
236
locltasks->t_plink = locltasks;
237
locltasks->t_prlink = locltasks;
239
cconlist = TALLOC(1, struct ccon, "ccon");
240
BZERO((char*)cconlist, sizeof(struct ccon));
241
cconlist->c_link = cconlist->c_rlink = cconlist;
248
* Allocate a new unique context id to a task.
255
static int lastcid = 0;
259
struct ccon *cp, *cp2;
261
if (++lastcid > tidlmask - NUM_SYSCTX)
267
cid = myhostpart + lastcid;
268
while (cp->c_cid < cid)
269
if ((cp = cp->c_link) == cconlist)
272
if (cp->c_cid != cid)
275
if (++lastcid > tidlmask) {
279
if (lastcid == startcid) {
280
pvmlogprintf("ccon_new() out of cids?\n");
285
if (!(cp2 = TALLOC(1, struct ccon, "ccon"))) {
286
pvmlogprintf("ccon_new() can't get memory\n");
290
cp2->c_tid = tp->t_tid;
291
LISTPUTBEFORE(tp->t_ccs, cp2, c_peer, c_rpeer);
292
LISTPUTBEFORE(cp, cp2, c_link, c_rlink);
293
if (pvmdebmask & PDMTASK) {
294
pvmlogprintf("ccon_new() tid=%x cid=%x\n", cp2->c_tid, cp2->c_cid);
302
* Generates a task id not already in use.
304
* XXX Be sure to call task_new() with tid before calling
311
static int lastind = 0; /* last local-part assigned */
313
int startind; /* to detect when we're out of tids */
317
if (++lastind > tidlmask)
323
tid = myhostpart + lastind;
324
while (tp->t_tid < tid)
325
if ((tp = tp->t_link) == locltasks)
328
if (tp->t_tid != tid)
331
if (++lastind > tidlmask) {
335
if (lastind == startind)
343
task_sethandle(tp,t_handle)
347
tp->t_handle = t_handle;
354
* Make a new task descriptor, add to list of local tasks but not
362
struct task *tp, *tp2;
364
if (!(tp = TALLOC(1, struct task, "task"))) {
365
pvmlogprintf("task_new() can't get memory\n");
368
BZERO((char*)tp, sizeof(struct task));
370
tp->t_txq = pk_new(0);
375
tp->t_ccs = TALLOC(1, struct ccon, "ccon");
376
tp->t_ccs->c_link = tp->t_ccs->c_rlink = 0;
377
tp->t_ccs->c_peer = tp->t_ccs->c_rpeer = tp->t_ccs;
378
tp->t_ccs->c_cid = 0;
379
tp->t_ccs->c_tid = tid;
380
FORLIST (tp2, locltasks, t_link)
381
if (tp2->t_tid > tid)
383
LISTPUTBEFORE(tp2, tp, t_link, t_rlink);
390
* Do low-level cleanup needed when a task exits.
391
* Remove task descriptor from lists and destroy it.
392
* Close any fds, unlink any files, free mbufs.
403
if (pvmdebmask & PDMTASK) {
404
pvmlogprintf("task_free() t%x\n", tp->t_tid);
407
/* XXX this is inside out - mpp_free should call task_free.
408
XXX but for now task_free is what's called.
409
XXX this will change in the portable processor interface cleanup. */
412
if (tp->t_plink && tp->t_prlink) {
413
LISTDELETE(tp, t_plink, t_prlink);
415
if (tp->t_link && tp->t_rlink) {
416
LISTDELETE(tp, t_link, t_rlink);
419
pmsg_unref(tp->t_rxm);
425
wait_delete(tp->t_wait);
427
(void)unlink(tp->t_authnam);
428
PVM_FREE(tp->t_authnam);
430
if (tp->t_sock != -1) {
431
wrk_fds_delete(tp->t_sock, 3);
432
(void)close(tp->t_sock);
434
if (tp->t_out != -1) {
435
wrk_fds_delete(tp->t_out, 1);
436
(void)close(tp->t_out);
438
if (tp->t_outtid > 0) {
440
mp->m_dst = tp->t_outtid;
441
mp->m_ctx = tp->t_outctx;
442
mp->m_tag = tp->t_outtag;
443
pkint(mp, tp->t_tid);
448
if (tp->t_trctid > 0) {
449
tev_send_endtask( tp->t_trctid, tp->t_trcctx, tp->t_trctag,
450
tp->t_tid, tp->t_status,
451
tp->t_utime.tv_sec, tp->t_utime.tv_usec,
452
tp->t_stime.tv_sec, tp->t_stime.tv_usec );
455
if (tp->t_authfd != -1)
456
(void)close(tp->t_authfd);
458
PVM_FREE(tp->t_a_out);
462
while (cp = LISTFIRST(tp->t_ccs, c_peer)) {
463
LISTDELETE(cp, c_link, c_rlink);
464
LISTDELETE(cp, c_peer, c_rpeer);
475
* Find a task in local tasks list by its tid.
485
tp = locltasks->t_link;
486
while (tp != locltasks && tp->t_tid < tid)
488
if (tp->t_tid == tid)
491
return (struct task*)0;
497
* Find a task in local tasks list by its pid.
506
tp = locltasks->t_plink;
507
while (tp != locltasks && tp->t_pid < pid)
509
return (tp->t_pid == pid) ? tp : (struct task*)0;
515
* Set the pid for a task, insert it into by-pid list. Move it
516
* if it's already in the list.
527
LISTDELETE(tp, t_plink, t_prlink);
530
for (tp2 = locltasks->t_plink; tp2 != locltasks; tp2 = tp2->t_plink)
531
if (tp2->t_pid > pid)
533
LISTPUTBEFORE(tp2, tp, t_plink, t_prlink);
539
* Do high-level cleanup needed when a task exits.
540
* Wake up any entities waiting on task, free multicast context.
541
* XXX should flush any partial messages, but that would be hard. hm.
549
struct waitc *wp, *wp2;
551
struct pvmmentry *ep;
557
/* notify anyone who asked */
560
if (pvmdebmask & PDMTASK)
561
pvmlogprintf("task_cleanup() t%x\n", tp->t_tid);
563
for (wp = waitlist->wa_link; wp != waitlist; wp = wp->wa_link) {
565
/* waits depending on this task */
567
if (wp->wa_on == tp->t_tid) {
568
switch (wp->wa_kind) {
572
free_waitc_add((struct waitc_add *)wp->wa_spec);
573
pkint(wp->wa_mesg, PvmDSysErr);
574
sendmessage(wp->wa_mesg);
576
if (pvmdebmask & (PDMTASK|PDMSTARTUP))
578
"task_cleanup() hoster t%x takes wid %d with it\n",
579
tp->t_tid, wp->wa_wid);
584
if (pvmdebmask & PDMTASK) {
586
"task_cleanup() tasker t%x takes t%x with it\n",
587
tp->t_tid, wp->wa_tid);
589
if (tp2 = task_find(wp->wa_tid)) {
598
if (wp->wa_tid && wp->wa_mesg) {
599
sendmessage(wp->wa_mesg);
606
if (wp->wa_tid && wp->wa_mesg) {
607
sendmessage(wp->wa_mesg);
610
mb_tidy_reset(tp->t_tid);
614
/* clean up pending recvinfo */
615
ep = (struct pvmmentry *) wp->wa_spec;
616
if ( ep->me_msg ) /* class name (overload :-Q) */
617
PVM_FREE( ep->me_msg );
626
"task_cleanup() can't deal with wait kind %d\n",
636
/* waits this task was waiting on */
638
if (wp->wa_tid == tp->t_tid) {
639
switch (wp->wa_kind) {
643
wp2 = wp; /* some kinds we can toss now */
649
wp->wa_tid = 0; /* in case tid gets recycled */
655
/* notify the scheduler */
657
if ((tp->t_sched)&(tp->t_schedlmsg!=SM_TASKX)) {
659
mp->m_dst = tp->t_sched;
660
mp->m_tag = SM_TASKX;
661
tp->t_schedlmsg = SM_TASKX;
662
if (pvmdebmask & PDMSCHED) {
663
pvmlogprintf("task_cleanup() taskx to t%x status = 0x%x\n",
664
tp->t_sched, tp->t_status);
666
pkint(mp, tp->t_tid);
667
pkint(mp, tp->t_status);
668
pkint(mp, (int)tp->t_utime.tv_sec);
669
pkint(mp, (int)tp->t_utime.tv_usec);
670
pkint(mp, (int)tp->t_stime.tv_sec);
671
pkint(mp, (int)tp->t_stime.tv_usec);
675
/* check if it's the hoster */
677
if (tp->t_tid == hostertid) {
678
if (pvmdebmask & (PDMTASK|PDMSTARTUP)) {
679
pvmlogprintf("task_cleanup() unreg hoster t%x\n", tp->t_tid);
684
/* check if it's the tasker */
686
if (tp->t_tid == taskertid) {
687
if (pvmdebmask & PDMTASK) {
688
pvmlogprintf("task_cleanup() unreg tasker t%x\n", tp->t_tid);
693
/* check if it's the tracer */
695
if (tp->t_tid == pvmtracer.trctid) {
697
if (pvmdebmask & PDMTASK) {
698
pvmlogprintf("task_cleanup() unreg tracer t%x\n",
702
pvmtracer.trctid = 0;
703
pvmtracer.trcctx = 0;
704
pvmtracer.trctag = 0;
705
pvmtracer.outtid = 0;
706
pvmtracer.outctx = 0;
707
pvmtracer.outtag = 0;
708
TEV_MASK_INIT(pvmtracer.tmask);
709
pvmtracer.trcbuf = 0;
710
pvmtracer.trcopt = 0;
712
/* tell the other pvmds */
714
for (hh = hosts->ht_last; hh > 0; hh--) {
715
if (hh != hosts->ht_local
716
&& (hp = hosts->ht_hosts[hh])) {
718
mpd->m_tag = DM_SLCONF;
719
mpd->m_dst = hp->hd_hostpart | TIDPVMD;
720
pkint(mpd, DM_SLCONF_TRACE);
721
sprintf(buf, "%x %d %d %x %d %d %d %d %s",
722
pvmtracer.trctid, pvmtracer.trcctx,
724
pvmtracer.outtid, pvmtracer.outctx,
726
pvmtracer.trcbuf, pvmtracer.trcopt,
735
/* complete multicast */
738
/* XXX should send an EOM frag to all rcpts */
752
pvmlogprintf("task_dump()\n");
753
pvmlogprintf(" tid ptid flag pid soc out wait outtid trctid sched es\n");
754
for (tp = locltasks->t_link; tp != locltasks; tp = tp->t_link) {
755
pvmlogprintf("%8x %8x %4x %6d %3d %3d %8d %8x %8x %8x %4x\n",
762
(tp->t_wait ? tp->t_wait->wa_wid : 0),
767
if (LISTFIRST(tp->t_txq, pk_link)) {
768
pvmlogprintf(" txq:pkt src dst flag len ofs\n");
769
FORLIST (pp, tp->t_txq, pk_link) {
770
pvmlogprintf("%08x %8x %8x %4x %6d %6d\n",
776
pp->pk_dat - pp->pk_buf);
779
if (LISTFIRST(tp->t_ccs, c_peer)) {
780
pvmlogprintf("contexts:");
781
FORLIST (cp, tp->t_ccs, c_peer)
782
pvmlogprintf(" 0x%x", cp->c_cid);
794
pvmlogprintf("ccon_dumpall()\n");
795
FORLIST (cp, cconlist, c_link)
796
pvmlogprintf(" tid=%x cid=%x\n", cp->c_tid, cp->c_cid);