5
##########################################################################
7
# The Contents of this file are made available subject to the terms of
8
# the Sun Industry Standards Source License Version 1.2
10
# Sun Microsystems Inc., March, 2001
13
# Sun Industry Standards Source License Version 1.2
14
# =================================================
15
# The contents of this file are subject to the Sun Industry Standards
16
# Source License Version 1.2 (the "License"); You may not use this file
17
# except in compliance with the License. You may obtain a copy of the
18
# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
20
# Software provided under this License is provided on an "AS IS" basis,
21
# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
22
# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
23
# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
24
# See the License for the specific provisions governing your rights and
25
# obligations concerning the Software.
27
# The Initial Developer of the Original Code is: Sun Microsystems, Inc.
29
# Copyright: 2001 by Sun Microsystems, Inc.
31
# All Rights Reserved.
33
##########################################################################
40
if [ ! -f $ckpt_dir/ckpt.log ]; then
41
touch $ckpt_dir/ckpt.log
42
chmod 666 $ckpt_dir/ckpt.log
45
# create temp directory for holding checkpoint info
47
tmpdir=$ckpt_dir/ckpt.$1
53
#F=$tmpdir/checkpoint.log
57
print ------------------------------------------------------------- >> $F 2>&1
58
print `basename $0` called at `date` >> $F 2>&1
59
print called by: `id` >> $F 2>&1
60
print with args: $* >> $F 2>&1
62
print `date +"%D %T"` Job $1 "(pid=$2)" restarting >> $ckpt_dir/ckpt.log
65
if [ "$currcpr" != "2" ]; then
68
print Restart command: cpr -r cpr_$1.$currcpr >> $F 2>&1
69
cpr -r cpr_$1.$currcpr >> $F 2>&1
71
# Now be careful: The restart command is the parent process of the restarted
72
# job. SGE is the parent process of the restart command.
73
# If the job was killed (probably due to a migration request), we need to
74
# tell our parent that by killing ourselves. SGE will also detect an
75
# exit status > 128 analogous to a KILL
78
print Exit status of restart command: $exit_status >> $F 2>&1
80
# poll for job completion (based on job script pid)
82
if [ $exit_status -gt 0 ]; then
91
# This doesn't work under Irix 6.2, since the variable $$ is not
93
#if [ $exit_status -gt 128 ]; then
94
# signal=`expr $exit_status - 128`
95
# print Killing ourself: kill -$signal $$ >> $F 2>&1
96
# /usr/bin/kill -$signal $pid >> $F 2>&1
99
# If killing ourselves didn't help or the exit_status was < 128 exit
100
# with the exit status of our child
102
print `date +"%D %T"` Job $1 "(pid=$2) exiting, status=$exit_status" >> $ckpt_dir/ckpt.log
104
print Exiting with exit status: $exit_status >> $F 2>&1