Fix for bug #3714. All LRMS bakends should report finished jobs through gm-kick.

parent 4079eeaf
......@@ -29,6 +29,8 @@ pkgdatadir="$basedir"
if [ -z "$1" ] ; then echo "Argument missing" 1>&2 ; exit 1 ; fi
GMKICK=${pkglibexecdir}/gm-kick
# Prints the uid of the owner of the file given as argument
# Perl is used because it's more portable than using the stat command
printuid () {
......@@ -142,6 +144,7 @@ for control_dir in "$@" ; do
debug "got exitcode=$exitcode"
save_commentfile "$uid" "${joboption_directory}.comment" "${control_dir}/job.${job}.errors"
echo "$exitcode $comment" > "${control_dir}/job.${job}.lrms_done"
"${GMKICK}" -j "${job}" "${control_dir}/job.${job}.local"
done
done
......
......@@ -411,7 +411,7 @@ job_write_donefile() {
echo "${LRMSExitcode:--1} $msg" > $donefile || log "failed writing file: $PWD/$donefile"
# wake up GM
"${pkglibexecdir}/gm-kick" "job.$gridid.local"
"${pkglibexecdir}/gm-kick" -j "$gridid" "job.$gridid.local"
}
#
......
......@@ -248,10 +248,9 @@ done
# Kick the GM
if [ -n "${kicklist[*]}" ];then
"${pkglibexecdir}/gm-kick" \
$(for ind in "${kicklist[@]}";do
echo "${basenames[$ind]}.status"
done | xargs)
for ind in "${kicklist[@]}";do
"${pkglibexecdir}/gm-kick" -j "${gridids[$ind]}" "${basenames[$ind]}.status"
done
fi
exit 0
......@@ -211,19 +211,19 @@ do
job_write_diag
${GMKICK} "$jobfile"
${GMKICK} -j "${jobid}" "$jobfile"
continue
fi
# job finished and exit code is known
save_commentfile "$uid" "${sessiondir}.comment" "$errorsfile"
echo "$exitcode Executable finished with exit code $exitcode" >> "$donefile"
${GMKICK} "$jobfile"
${GMKICK} -j "${jobid}" "$jobfile"
continue
fi
exitcode=-1
save_commentfile "$uid" "${sessiondir}.comment" "$errorsfile"
echo "$exitcode Job finished with unknown exit code" >> "$donefile"
${GMKICK} "$jobfile"
${GMKICK} -j "${jobid}" "$jobfile"
done
if [ ! -z "$perflogdir" ]; then
......
......@@ -185,7 +185,7 @@ for ctr_dir in $control_dir ; do
# job finished and exit code is known
save_commentfile "$uid" "${sessiondir}.comment" "${ctr_dir}/job.${gridid}.errors"
echo "$exitcode Executable finished with exit code $exitcode" > "$donefile"
${GMKICK} "$jobfile"
${GMKICK} -j "${gridid}" "$jobfile"
continue
fi
fi
......@@ -203,7 +203,7 @@ for ctr_dir in $control_dir ; do
rm -f "$countfile"
save_commentfile "$uid" "${sessiondir}.comment" "${ctr_dir}/job.${gridid}.errors"
echo "$exitcode Job was lost with unknown exit code" > "$donefile"
${GMKICK} "$jobfile"
${GMKICK} -j "${gridid}" "$jobfile"
else
echo "$counter" > "$countfile"
fi
......
......@@ -168,6 +168,7 @@ process_log_file () {
base_name=`echo "$name" 2>/dev/null | sed -n 's/\.local$//p'`
if [ -z "${base_name}" ] ; then continue ; fi
gridid=`echo "$base_name" 2>/dev/null | sed -n 's/.*\.\([^\.]*\)$/\1/'`
# check if job already reported
if [ -f "${base_name}.lrms_done" ] ; then continue ; fi
......@@ -281,7 +282,7 @@ process_log_file () {
fi
fi
# wake up GM
${GMKICK} "${base_name}.local"
${GMKICK} -j "${gridid}" "${base_name}.local"
done
IFS=$old_IFS
}
......@@ -414,7 +415,7 @@ for ctr_dir in "$@" ; do
# job finished and exit code is known
save_commentfile "$uid" "${session}.comment" "${ctr_dir}/job.${gridid}.errors"
echo "$exitcode Executable finished with exit code $exitcode" > "$donefile"
${GMKICK} "$jobfile"
${GMKICK} -j "$gridid" "$jobfile"
echo "Job $gridid finished with exit code $exitcode"
continue
fi
......@@ -431,7 +432,7 @@ for ctr_dir in "$@" ; do
rm -f "$countfile"
save_commentfile "$uid" "${session}.comment" "${ctr_dir}/job.${gridid}.errors"
echo "$exitcode Job was lost with unknown exit code" > "$donefile"
${GMKICK} "$jobfile"
${GMKICK} -j "$gridid" "$jobfile"
echo "Job $gridid finished with unknown exit code"
else
echo "$counter" > "$countfile"
......
......@@ -266,7 +266,7 @@ add_accounting_to_diag () {
handle_failedcode "$failedcode" "$donefile" "$exitcode" "$sgeexitcode" "$overlimit"
# wake up GM
$GMKICK "$localfile" >> "$errorsfile"
$GMKICK -j "${gridid}" "$localfile" >> "$errorsfile"
rm -f "$countfile"
rm -f "$diagfile_tmp" "$diagfile_acct"
......@@ -307,7 +307,7 @@ handle_missing_accounting () {
echo "$exitcode Job failed with exit code $exitcode" > "$donefile"
fi
$GMKICK "$localfile" >> "$errorsfile"
$GMKICK -j "${gridid}" "$localfile" >> "$errorsfile"
return
fi
......@@ -342,7 +342,7 @@ handle_missing_accounting () {
rm -f "$countfile"
# wake up GM
$GMKICK "$localfile" >> "$errorsfile"
$GMKICK -j "${gridid}" "$localfile" >> "$errorsfile"
else
# test again for job existence, only count if not known
......
......@@ -408,10 +408,10 @@ fi
# Kick the GM
if [ -n "${kicklist[*]}" ];then
"${pkglibexecdir}/gm-kick" \
$(for localid in "${kicklist[@]}";do
echo "${basenames[$localid]}.local"
done | xargs)
for localid in "${kicklist[@]}";do
gridid=`echo "${basenames[$localid]}" | sed 's/.*\.\([^\.]*\)$/\1/'`
"${pkglibexecdir}/gm-kick" -j "${gridid}" "${basenames[$localid]}.local"
done
fi
exit 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment