#!/bin/bash
# set -x
#
#  Based on globus submission script for pbs
#
#  Submits job to SLURM.
#  Input: path to grami file (same as Globus).
#
# The temporary job script is created for the submission and then removed 
# at the end of this script. 

echo "----- starting submit_slurm_job -----" 1>&2
joboption_lrms=SLURM

# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi

basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?

pkglibdir="$basedir/../../lib/arc"
pkglibdir=`cd $pkglibdir > /dev/null && pwd` || exit $?

. ${pkglibdir}/submit_common.sh || exit $?

##############################################################
# Parse grami file, read arc config
##############################################################

init $1

failures_file="$joboption_controldir/job.$joboption_gridid.failed"

if [ -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then
  if [ -z "${RUNTIME_LOCAL_SCRATCH_DIR}" ] ; then
    echo "Need to know at which directory to run job: RUNTIME_LOCAL_SCRATCH_DIR must be set if RUNTIME_NODE_SEES_FRONTEND is empty" 1>&2
    echo "Submission: Configuration error.">>"$failures_file"
    exit 1
  fi
fi

##############################################################
# Zero stage of runtime environments
##############################################################
RTE_stage0

##############################################################
# create job script
##############################################################
mktempscript

is_cluster=true
##############################################################
# Start job script
##############################################################
echo "#!/bin/bash -l" > $LRMS_JOB_SCRIPT
echo "# SLURM batch job script built by grid-manager" >> $LRMS_JOB_SCRIPT

# rerun is handled by GM, do not let SLURM rqueue jobs itself.
echo "#SBATCH --no-requeue" >> $LRMS_JOB_SCRIPT

# write SLURM output to 'comment' file
echo "#SBATCH -e ${joboption_directory}.comment">> $LRMS_JOB_SCRIPT
echo "#SBATCH -o ${joboption_directory}.comment">> $LRMS_JOB_SCRIPT
echo "" >> $LRMS_JOB_SCRIPT
# choose queue
if [ ! -z "${joboption_queue}" ] ; then
  echo "#SBATCH -p $joboption_queue" >> $LRMS_JOB_SCRIPT
fi
# project name for accounting
if [ ! -z "${joboption_rsl_project}" ] ; then
  echo "#SBATCH -U $joboption_rsl_project" >> $LRMS_JOB_SCRIPT
fi
# job name for convenience
if [ ! -z "${joboption_jobname}" ] ; then
    #TODO is this necessary? do parts of the infosys need these limitations?
  jobname=`echo "$joboption_jobname" | \
           sed 's/^\([^[:alpha:]]\)/N\1/' | \
           sed 's/[^[:alnum:]]/_/g' | \
	   sed 's/\(...............\).*/\1/'`
  echo "#SBATCH -J '$jobname'" >> $LRMS_JOB_SCRIPT
else
    jobname="gridjob"
    echo "#SBATCH -J '$jobname'" >> $LRMS_JOB_SCRIPT
fi
echo "SLURM jobname: $jobname" 1>&2
# Set up the user's environment on the compute node where the script
# is executed.
echo "#SBATCH --get-user-env=10L" >> $LRMS_JOB_SCRIPT

##############################################################
# (non-)parallel jobs
##############################################################
if [ -z "$joboption_count" ] ; then 
  joboption_count=1
elif [ "$joboption_count" -le 0 ] ; then
  joboption_count=1
fi

nodes_string="#SBATCH -n ${joboption_count}"

i=0
eval "var_is_set=\${joboption_nodeproperty_$i+yes}"
while [ ! -z "${var_is_set}" ] ; do
  eval "var_value=\${joboption_nodeproperty_$i}"
  nodes_string="${nodes_string}:${var_value}"
  i=$(( $i + 1 ))
  eval "var_is_set=\${joboption_nodeproperty_$i+yes}"
done
echo "$nodes_string" >> $LRMS_JOB_SCRIPT

##############################################################
# Execution times (minutes)
##############################################################
if [ ! -z "$joboption_cputime" ] ; then
  if [ $joboption_cputime -lt 0 ] ; then
    joboption_cputime=0
  fi
  maxcputime="$joboption_cputime"
  cputime_min=$(( $maxcputime / 60 ))
  cputime_sec=$(( $maxcputime - $cputime_min * 60 ))
  echo "#SBATCH -t ${cputime_min}:${cputime_sec}" >> $LRMS_JOB_SCRIPT
fi  
  
if [ -z "$joboption_walltime" ] ; then
  if [ ! -z "$joboption_cputime" ] ; then
    # Set walltime for backward compatibility or incomplete requests
    joboption_walltime=$(( $joboption_cputime * $walltime_ratio ))
  fi
fi

if [ ! -z "$joboption_walltime" ] ; then
  if [ $joboption_walltime -lt 0 ] ; then
    joboption_walltime=0
  fi
  maxwalltime="$joboption_walltime"
  walltime_min=$(( $maxwalltime / 60 ))
  walltime_sec=$(( $maxwalltime - $walltime_min * 60 ))
  echo "#SBATCH -t ${walltime_min}:${walltime_sec}" >> $LRMS_JOB_SCRIPT
fi

##############################################################
# Requested memory (mb)
##############################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
  # downloader and uploader can take a lot of memory
  if [ -z "$joboption_memory" ] ; then
    joboption_memory=1000
  else
    if [ "$joboption_memory" -lt 1000 ] ; then
      joboption_memory=1000
    fi
  fi
fi
if [ ! -z "$joboption_memory" ] ; then
  echo "#SBATCH --mem ${joboption_memory}mb" >> $LRMS_JOB_SCRIPT
fi

echo "" >> $LRMS_JOB_SCRIPT
echo "# Overide umask of execution node (sometime values are really strange)" >> $LRMS_JOB_SCRIPT
echo "umask 077" >> $LRMS_JOB_SCRIPT

##############################################################
# Add environment variables
##############################################################
add_user_env

##############################################################
# Check for existance of executable,
# there is no sense to check for executable if files are 
# downloaded directly to computing node
##############################################################
if [ -z "${joboption_arg_0}" ] ; then
  echo 'Executable is not specified' 1>&2
  rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  echo "Submission: Job description error.">>"$failures_file"
  exit 1
fi

#######################################################################
# copy information useful for transfering files to/from node directly
#######################################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
  setup_local_transfer
fi

######################################################################
# Adjust working directory for tweaky nodes
# RUNTIME_GRIDAREA_DIR should be defined by external means on nodes
######################################################################
if [ ! -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then
  setup_runtime_env
else
  echo "RUNTIME_JOB_DIR=$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid" >> $LRMS_JOB_SCRIPT
  echo "RUNTIME_JOB_DIAG=$RUNTIME_LOCAL_SCRATCH_DIR/${joboption_gridid}.diag" >> $LRMS_JOB_SCRIPT
  echo "RUNTIME_GRIDAREA_DIR=" >> $LRMS_JOB_SCRIPT
  RUNTIME_STDIN_REL=`echo "${joboption_stdin}" | sed "s#^${joboption_directory}/*##"`
  RUNTIME_STDOUT_REL=`echo "${joboption_stdout}" | sed "s#^${joboption_directory}/*##"`
  RUNTIME_STDERR_REL=`echo "${joboption_stderr}" | sed "s#^${joboption_directory}/*##"`
  if [ "$RUNTIME_STDIN_REL" = "${joboption_stdin}" ] ; then
    echo "RUNTIME_JOB_STDIN=\"${joboption_stdin}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDIN=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDIN_REL\"" >> $LRMS_JOB_SCRIPT
  fi
  if [ "$RUNTIME_STDOUT_REL" = "${joboption_stdout}" ] ; then
    echo "RUNTIME_JOB_STDOUT=\"${joboption_stdout}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDOUT=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDOUT_REL\"" >> $LRMS_JOB_SCRIPT
  fi
  if [ "$RUNTIME_STDERR_REL" = "${joboption_stderr}" ] ; then
    echo "RUNTIME_JOB_STDERR=\"${joboption_stderr}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDERR=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDERR_REL\"" >> $LRMS_JOB_SCRIPT
  fi
fi

##############################################################
# Add std... to job arguments
##############################################################
include_std_streams

##############################################################
#  Move files to local working directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
##############################################################
move_files_to_node

echo "" >> $LRMS_JOB_SCRIPT
echo "RESULT=0" >> $LRMS_JOB_SCRIPT
echo "" >> $LRMS_JOB_SCRIPT


#####################################################
#  Download input files
####################################################
download_input_files

##############################################################
#  Skip execution if something already failed
##############################################################
echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime configuration at computing node
##############################################################
RTE_stage1

##############################################################
#  Diagnostics
##############################################################
echo "echo \"runtimeenvironments=\$runtimeenvironments\" >> \"\$RUNTIME_JOB_DIAG\"" >> $LRMS_JOB_SCRIPT
cat >> $LRMS_JOB_SCRIPT <<'EOSCR'
if [ ! "X$SLURM_NODEFILE" = 'X' ] ; then
  if [ -r "$SLURM_NODEFILE" ] ; then
    cat "$SLURM_NODEFILE" | sed 's/\(.*\)/nodename=\1/' >> "$RUNTIME_JOB_DIAG"
  else
    SLURM_NODEFILE=
  fi
fi
EOSCR

##############################################################
#  Check intermediate result again
##############################################################
echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT

##############################################################
#  Execution
##############################################################
cd_and_run

##############################################################
#  End of RESULT checks
##############################################################
echo "fi" >> $LRMS_JOB_SCRIPT
echo "fi" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime (post)configuration at computing node
##############################################################
configure_runtime

#####################################################
#  Upload output files
####################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
  upload_output_files
else
# There is no sense to keep trash till GM runs uploader
  echo 'if [ ! -z  "$RUNTIME_LOCAL_SCRATCH_DIR" ] ; then' >> $LRMS_JOB_SCRIPT
# Delete all files except listed in job.#.output
  echo '  find ./ -type l -exec rm -f "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type f -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT
  if [ -f "$joboption_controldir/job.$joboption_gridid.output" ] ; then
    cat "$joboption_controldir/job.$joboption_gridid.output" | \
    sed 's/^ *"\([^"]*\)".*/\1/g;t suc;s/^ *//;s/\([^\\]\) .*/\1/;:suc;s/\\\(.\)/\1/g' | \
    { while true ; do
      read name
      if [ $? -ne '0' ] ; then break ; fi
      echo "  chmod -R u-w \"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$name\" 2>/dev/null" >> $LRMS_JOB_SCRIPT
    done; }
  fi


  for fname in $joboption_rsl_outputfiles; do
    if [ "${fname:0:1}" = "@" ]; then
      dynlist=${fname:1}
      echo "dynlist='$dynlist'" >> $LRMS_JOB_SCRIPT
      cat >> $LRMS_JOB_SCRIPT <<'EOSCR'
  chmod -R u-w "./$dynlist" 2>/dev/null
  cat "./$dynlist" \
  | sed 's/^ *"\([^"]*\)".*/\1/g;t suc;s/^ *//;s/\([^\\]\) .*/\1/;:suc;s/\\\(.\)/\1/g' \
  | while true; do
      read name
      if [ $? -ne '0' ]; then break; fi
        chmod -R u-w "./$name" 2>/dev/null
    done
EOSCR
    fi
  done

  echo '  find ./ -type f -perm +200 -exec rm -f "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type f -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo 'fi' >> $LRMS_JOB_SCRIPT
fi
echo "" >> $LRMS_JOB_SCRIPT

##############################################################
#  Move files back to session directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
# !!!!!!!!!!!!!!!!!!! would be better to know the names of files !!!!!!!!!!!
##############################################################
move_files_to_frontend

#######################################
#  Submit the job
#######################################
echo "SLURM job script built" 1>&2
# Execute sbatch command
cd "$joboption_directory"
echo "SLURM script follows:" 1>&2
echo "-------------------------------------------------------------------" 1>&2
cat "$LRMS_JOB_SCRIPT" 1>&2
echo "-------------------------------------------------------------------" 1>&2
echo "" 1>&2
SLURM_RESULT=1
SLURM_TRIES=0
while [ "$SLURM_TRIES" -lt '10' ] ; do

    # Unset all environment variables before calling sbatch. Otherwise
    # SLURM will forward them to the job and leak information about
    # the grid-manager.
    # TODO: Maybe we only should unset $ARC_*, $CONFIG_*, $GLOBUS_* etc?
  (for i in $(env|grep -v "LRMS_JOB_SCRIPT"|cut -d= -f1);do unset $i;done; \
       ${sbatch} $LRMS_JOB_SCRIPT) 1>$LRMS_JOB_OUT 2>$LRMS_JOB_ERR
  SLURM_RESULT="$?"
  if [ "$SLURM_RESULT" -eq '0' ] ; then break ; fi 
  if [ "$SLURM_RESULT" -eq '198' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    SLURM_TRIES=0
    continue
  fi
  grep 'maximum number of jobs' "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  if [ $? -eq '0' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    SLURM_TRIES=0
    continue
  fi 
  SLURM_TRIES=$(( $SLURM_TRIES + 1 ))
  sleep 2
done
if [ $SLURM_RESULT -eq '0' ] ; then

#TODO test what happens when the jobqueue is full or when the slurm ctld is not responding

   job_id=`cat $LRMS_JOB_ERR | sed 's/sbatch: Submitted batch job \([0-9]*\)$/\1/'`
   if [ "${job_id}" = "" ] ; then
      echo "job *NOT* submitted successfully!" 1>&2
      echo "failed getting the slurm jobid for the job!" 1>&2
      echo "Submission: Local submission client behaved unexpectedly.">>"$failures_file"
   else
      echo "joboption_jobid=$job_id" >> $arg_file
      echo "job submitted successfully!" 1>&2
      echo "local job id: $job_id" 1>&2
      # Remove temporary job script file
      rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR
      echo "----- exiting submit_slurm_job -----" 1>&2
      echo "" 1>&2
      exit 0
   fi
else
  echo "job *NOT* submitted successfully!" 1>&2
  echo "got error code from sbatch: $SLURM_RESULT !" 1>&2
  echo "Submission: Local submission client failed.">>"$failures_file"
fi
echo "Output is:" 1>&2
cat $LRMS_JOB_OUT 1>&2
echo "Error output is:"
cat $LRMS_JOB_ERR 1>&2
rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
echo "----- exiting submit_slurm_job -----" 1>&2
echo "" 1>&2
exit 1
