5
##########################################################################
7
# The Contents of this file are made available subject to the terms of
8
# the Sun Industry Standards Source License Version 1.2
10
# Sun Microsystems Inc., March, 2001
13
# Sun Industry Standards Source License Version 1.2
14
# =================================================
15
# The contents of this file are subject to the Sun Industry Standards
16
# Source License Version 1.2 (the "License"); You may not use this file
17
# except in compliance with the License. You may obtain a copy of the
18
# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
20
# Software provided under this License is provided on an "AS IS" basis,
21
# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
22
# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
23
# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
24
# See the License for the specific provisions governing your rights and
25
# obligations concerning the Software.
27
# The Initial Developer of the Original Code is: Sun Microsystems, Inc.
29
# Copyright: 2001 by Sun Microsystems, Inc.
31
# All Rights Reserved.
33
##########################################################################
40
if [ ! -f $ckpt_dir/ckpt.log ]; then
41
touch $ckpt_dir/ckpt.log
42
chmod 666 $ckpt_dir/ckpt.log
48
# workaround to force job to restart on same queue (svd)
49
. $sge_root/${sge_cell:-default}/common/settings.sh
50
qalter -q $QUEUE $JOB_ID
52
# create temp directory for holding checkpoint info
54
tmpdir=$ckpt_dir/ckpt.$1
60
#F=$tmpdir/checkpoint.log
64
print ------------------------------------------------------------- >> $F 2>&1
65
print `basename $0` called at `date` >> $F 2>&1
66
print called by: `id` >> $F 2>&1
67
print with args: $* >> $F 2>&1
69
# checkpoint the job to one of two different files (i.e. ping-pong)
70
# just in case we go down while checkpointing
73
if [ "$currcpr" = "2" ]; then
81
# use the ASH to checkpoint if it is available.
82
# otherwise, use the process group ID
91
print Migration command: cpr -c cpr_$1.$currcpr -p $popt -f -k >> $F 2>&1
92
cpr -c cpr_$1.$currcpr -p $popt -f -k >> $F 2>&1
94
if [ $cc -eq 0 ]; then
95
print $currcpr > currcpr
96
if [ -d cpr_$1.$prevcpr ]; then
97
print Deleting old checkpoint file >> $F 2>&1
98
cpr -D cpr_$1.$prevcpr >> $F 2>&1
102
print `date +"%D %T"` Job $1 "(pid=$2) checkpointed and killed, status=$cc" >> $ckpt_dir/ckpt.log