~ubuntu-branches/debian/sid/boinc/sid

« back to all changes in this revision

Viewing changes to client/app_control.cpp

  • Committer: Package Import Robot
  • Author(s): Steffen Moeller
  • Date: 2011-08-08 01:36:51 UTC
  • mfrom: (6.1.11 experimental)
  • Revision ID: package-import@ubuntu.com-20110808013651-m1hs3cltiveuteyn
Tags: 6.13.1+dfsg-2
* Bringing notify patch to unstable.
* Adjusted build dependency to libjpeg-dev (Closes: #641093)
* Further improvements on stripchart.

Show diffs side-by-side

added added

removed removed

Lines of Context:
120
120
//
121
121
#ifdef _WIN32
122
122
bool ACTIVE_TASK::kill_all_children() {
123
 
        unsigned int i,j;
 
123
    unsigned int i,j;
124
124
    std::vector<PROCINFO> ps;
125
125
    std::vector<PROCINFO> tps;
126
126
 
130
130
    pi.id = pid;
131
131
    tps.push_back(pi);
132
132
 
133
 
        for (i=0; i < tps.size(); i++) {
134
 
                PROCINFO tp = tps[i];
135
 
            for (j=0; j < ps.size(); j++) {
136
 
                    PROCINFO p = ps[j];
 
133
    for (i=0; i < tps.size(); i++) {
 
134
        PROCINFO tp = tps[i];
 
135
        for (j=0; j < ps.size(); j++) {
 
136
            PROCINFO p = ps[j];
137
137
            if (tp.id == p.parentid) {
138
138
                if (TerminateProcessById(p.id)) {
139
139
                    tps.push_back(p);
140
140
                }
141
141
            }
142
 
            }
143
 
        }
 
142
        }
 
143
    }
144
144
    return true;
145
145
}
146
146
#endif
151
151
int ACTIVE_TASK::request_exit() {
152
152
    if (!app_client_shm.shm) return 1;
153
153
    process_control_queue.msg_queue_send(
154
 
                "<quit/>",
 
154
        "<quit/>",
155
155
        app_client_shm.shm->process_control_request
156
156
    );
 
157
    set_task_state(PROCESS_QUIT_PENDING, "request_exit()");
157
158
    quit_time = gstate.now;
 
159
    descendants.clear();
 
160
    get_descendants(pid, descendants);
158
161
    return 0;
159
162
}
160
163
 
163
166
int ACTIVE_TASK::request_abort() {
164
167
    if (!app_client_shm.shm) return 1;
165
168
    process_control_queue.msg_queue_send(
166
 
                "<abort/>",
 
169
        "<abort/>",
167
170
        app_client_shm.shm->process_control_request
168
171
    );
169
172
    return 0;
183
186
#endif
184
187
}
185
188
 
 
189
static inline void kill_processes(vector<int> pids) {
 
190
    for (unsigned int i=0; i<pids.size(); i++) {
 
191
        kill_app_process(pids[i]);
 
192
    }
 
193
}
 
194
 
186
195
// Kill the task (and descendants) by OS-specific means.
187
196
//
188
197
int ACTIVE_TASK::kill_task(bool restart) {
193
202
    // all we can do is terminate the main process,
194
203
    // using the handle we got when we created it.
195
204
    //
196
 
    TerminateProcess(process_handle, 1);
 
205
    if (g_use_sandbox) {
 
206
        TerminateProcess(process_handle, 1);
 
207
        return 0;
 
208
    }
197
209
#endif
198
210
    get_descendants(pid, pids);
199
211
    pids.push_back(pid);
200
 
    for (unsigned int i=0; i<pids.size(); i++) {
201
 
        kill_app_process(pids[i]);
202
 
    }
 
212
    kill_processes(pids);
203
213
    cleanup_task();
204
 
        if (restart) {
205
 
                set_task_state(PROCESS_UNINITIALIZED, "kill_task");
 
214
    if (restart) {
 
215
        set_task_state(PROCESS_UNINITIALIZED, "kill_task");
206
216
        char buf[256];
207
217
        sprintf(buf, "restarting %s", result->name);
208
 
                gstate.request_schedule_cpus(buf);
209
 
        } else {
210
 
                set_task_state(PROCESS_ABORTED, "kill_task");
211
 
        }
 
218
        gstate.request_schedule_cpus(buf);
 
219
    } else {
 
220
        set_task_state(PROCESS_ABORTED, "kill_task");
 
221
    }
212
222
    return 0;
213
223
}
214
224
 
276
286
    }
277
287
}
278
288
 
279
 
// handle a task that exited prematurely (i.e. the job isn't done)
 
289
// handle a task that exited prematurely (i.e. no finish file)
280
290
//
281
291
void ACTIVE_TASK::handle_premature_exit(bool& will_restart) {
282
 
    // if it exited because we sent it a quit message, don't count
283
 
    //
284
 
    if (task_state() == PROCESS_QUIT_PENDING) {
 
292
    switch (task_state()) {
 
293
    case PROCESS_QUIT_PENDING:
285
294
        set_task_state(PROCESS_UNINITIALIZED, "handle_premature_exit");
286
295
        will_restart = true;
 
296
        kill_processes(descendants);
 
297
        return;
 
298
    case PROCESS_ABORT_PENDING:
 
299
        set_task_state(PROCESS_UNINITIALIZED, "handle_premature_exit");
 
300
        will_restart = false;
 
301
        kill_processes(descendants);
287
302
        return;
288
303
    }
289
304
 
526
541
void ACTIVE_TASK_SET::send_heartbeats() {
527
542
    unsigned int i;
528
543
    ACTIVE_TASK* atp;
529
 
        char buf[1024];
530
 
        double ar = gstate.available_ram();
 
544
    char buf[1024];
 
545
    double ar = gstate.available_ram();
531
546
 
532
547
    for (i=0; i<active_tasks.size(); i++) {
533
548
        atp = active_tasks[i];
534
549
        if (!atp->process_exists()) continue;
535
550
        if (!atp->app_client_shm.shm) continue;
536
 
                snprintf(buf, sizeof(buf), "<heartbeat/>"
537
 
                        "<wss>%e</wss>"
538
 
                        "<max_wss>%e</max_wss>",
539
 
                        atp->procinfo.working_set_size, ar
540
 
                );
 
551
        snprintf(buf, sizeof(buf), "<heartbeat/>"
 
552
            "<wss>%e</wss>"
 
553
            "<max_wss>%e</max_wss>",
 
554
            atp->procinfo.working_set_size, ar
 
555
        );
541
556
        if (gstate.network_suspended) {
542
557
            strcat(buf, "<network_suspended/>");
543
558
        }
567
582
        if (!atp->process_exists()) continue;
568
583
        if (!atp->app_client_shm.shm) continue;
569
584
 
570
 
                // if app has had the same message in its send buffer for 180 sec,
571
 
                // assume it's hung and restart it
572
 
                //
573
 
                if (atp->process_control_queue.timeout(180)) {
 
585
        // if app has had the same message in its send buffer for 180 sec,
 
586
        // assume it's hung and restart it
 
587
        //
 
588
        if (atp->process_control_queue.timeout(180)) {
574
589
            if (log_flags.task_debug) {
575
590
                msg_printf(atp->result->project, MSG_INFO,
576
591
                    "Restarting %s - message timeout", atp->result->name
577
592
                );
578
593
            }
579
 
                        atp->kill_task(true);
580
 
                } else {
581
 
                        atp->process_control_queue.msg_queue_poll(
582
 
                                atp->app_client_shm.shm->process_control_request
583
 
                        );
584
 
                }
 
594
            atp->kill_task(true);
 
595
        } else {
 
596
            atp->process_control_queue.msg_queue_poll(
 
597
                atp->app_client_shm.shm->process_control_request
 
598
            );
 
599
        }
585
600
    }
586
601
}
587
602
 
681
696
    bool do_disk_check = false;
682
697
    bool did_anything = false;
683
698
 
684
 
        double ram_left = gstate.available_ram();
685
 
        double max_ram = gstate.max_available_ram();
 
699
    double ram_left = gstate.available_ram();
 
700
    double max_ram = gstate.max_available_ram();
686
701
 
687
702
    // Some slot dirs have lots of files,
688
703
    // so only check every min(disk_interval, 300) secs
701
716
                atp->result->name, atp->max_elapsed_time,
702
717
                atp->result->wup->rsc_fpops_bound/1e9,
703
718
                atp->result->avp->flops/1e9
704
 
                        );
705
 
                        atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum elapsed time exceeded");
706
 
                        did_anything = true;
707
 
                        continue;
708
 
                }
709
 
                if (atp->procinfo.working_set_size_smoothed > max_ram) {
710
 
                        msg_printf(atp->result->project, MSG_INFO,
711
 
                                "Aborting task %s: exceeded memory limit %.2fMB > %.2fMB\n",
712
 
                                atp->result->name,
713
 
                                atp->procinfo.working_set_size_smoothed/MEGA, max_ram/MEGA
714
 
                        );
715
 
                        atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum memory exceeded");
716
 
                        did_anything = true;
717
 
                        continue;
718
 
                }
 
719
            );
 
720
            atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum elapsed time exceeded");
 
721
            did_anything = true;
 
722
            continue;
 
723
        }
 
724
        if (atp->procinfo.working_set_size_smoothed > max_ram) {
 
725
            msg_printf(atp->result->project, MSG_INFO,
 
726
                "Aborting task %s: exceeded memory limit %.2fMB > %.2fMB\n",
 
727
                atp->result->name,
 
728
                atp->procinfo.working_set_size_smoothed/MEGA, max_ram/MEGA
 
729
            );
 
730
            atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum memory exceeded");
 
731
            did_anything = true;
 
732
            continue;
 
733
        }
719
734
        if (do_disk_check && atp->check_max_disk_exceeded()) {
720
735
            did_anything = true;
721
 
                        continue;
 
736
            continue;
722
737
        }
723
 
                ram_left -= atp->procinfo.working_set_size_smoothed;
724
 
    }
725
 
        if (ram_left < 0) {
726
 
                gstate.request_schedule_cpus("RAM usage limit exceeded");
727
 
        }
 
738
        ram_left -= atp->procinfo.working_set_size_smoothed;
 
739
    }
 
740
    if (ram_left < 0) {
 
741
        gstate.request_schedule_cpus("RAM usage limit exceeded");
 
742
    }
728
743
    if (do_disk_check) {
729
744
        last_disk_check_time = gstate.now;
730
745
    }
741
756
    if (task_state() == PROCESS_EXECUTING || task_state() == PROCESS_SUSPENDED) {
742
757
        set_task_state(PROCESS_ABORT_PENDING, "abort_task");
743
758
        abort_time = gstate.now;
744
 
                request_abort();
 
759
        request_abort();
 
760
        descendants.clear();
 
761
        get_descendants(pid, descendants);
745
762
    } else {
746
763
        set_task_state(PROCESS_ABORTED, "abort_task");
747
764
    }
828
845
}
829
846
 
830
847
 
831
 
// send quit signal to all tasks in the project
 
848
// send quit message to all tasks in the project
832
849
// (or all tasks, if proj==0).
833
850
// If they don't exit in 5 seconds,
834
851
// send them a kill signal and wait up to 5 more seconds to exit.
935
952
            // if we're doing CPU throttling, don't bother suspending apps
936
953
            // that don't use a full CPU
937
954
            //
938
 
            if (atp->result->non_cpu_intensive()) continue;
 
955
            if (atp->result->dont_throttle()) continue;
939
956
            if (atp->app_version->avg_ncpus < 1) continue;
940
957
            atp->preempt(REMOVE_NEVER);
941
958
            break;
1028
1045
//
1029
1046
int ACTIVE_TASK::suspend() {
1030
1047
    if (!app_client_shm.shm) return 0;
1031
 
        if (task_state() != PROCESS_EXECUTING) {
1032
 
                msg_printf(result->project, MSG_INFO,
1033
 
                        "Internal error: expected process %s to be executing", result->name
1034
 
                );
1035
 
        }
1036
 
        int n = process_control_queue.msg_queue_purge("<resume/>");
1037
 
        if (n == 0) {
1038
 
                process_control_queue.msg_queue_send(
1039
 
                        "<suspend/>",
1040
 
                        app_client_shm.shm->process_control_request
1041
 
                );
1042
 
        }
 
1048
    if (task_state() != PROCESS_EXECUTING) {
 
1049
        msg_printf(result->project, MSG_INFO,
 
1050
            "Internal error: expected process %s to be executing", result->name
 
1051
        );
 
1052
    }
 
1053
    int n = process_control_queue.msg_queue_purge("<resume/>");
 
1054
    if (n == 0) {
 
1055
        process_control_queue.msg_queue_send(
 
1056
            "<suspend/>",
 
1057
            app_client_shm.shm->process_control_request
 
1058
        );
 
1059
    }
1043
1060
    set_task_state(PROCESS_SUSPENDED, "suspend");
1044
1061
    return 0;
1045
1062
}
1048
1065
//
1049
1066
int ACTIVE_TASK::unsuspend() {
1050
1067
    if (!app_client_shm.shm) return 0;
1051
 
        if (task_state() != PROCESS_SUSPENDED) {
1052
 
                msg_printf(result->project, MSG_INFO,
1053
 
                        "Internal error: expected process %s to be suspended", result->name
1054
 
                );
1055
 
        }
 
1068
    if (task_state() != PROCESS_SUSPENDED) {
 
1069
        msg_printf(result->project, MSG_INFO,
 
1070
            "Internal error: expected process %s to be suspended", result->name
 
1071
        );
 
1072
    }
1056
1073
    if (log_flags.cpu_sched) {
1057
1074
        msg_printf(result->project, MSG_INFO,
1058
1075
            "[cpu_sched] Resuming %s", result->name
1059
1076
        );
1060
1077
    }
1061
 
        int n = process_control_queue.msg_queue_purge("<suspend/>");
1062
 
        if (n == 0) {
1063
 
                process_control_queue.msg_queue_send(
1064
 
                        "<resume/>",
1065
 
                        app_client_shm.shm->process_control_request
1066
 
                );
1067
 
        }
 
1078
    int n = process_control_queue.msg_queue_purge("<suspend/>");
 
1079
    if (n == 0) {
 
1080
        process_control_queue.msg_queue_send(
 
1081
            "<resume/>",
 
1082
            app_client_shm.shm->process_control_request
 
1083
        );
 
1084
    }
1068
1085
    set_task_state(PROCESS_EXECUTING, "unsuspend");
1069
1086
    return 0;
1070
1087
}