Raad2: atk

From TAMUQ Research Computing User Documentation Wiki
Jump to navigation Jump to search


Sample Job File

#!/bin/bash
#SBATCH --job-name atk_job
#SBATCH --partition l_long
#SBATCH --qos ll
#SBATCH --nodes=2
#SBATCH --ntasks-per-node=16
#SBATCH --mem=128400
#SBATCH --time=168:00:00
#SBATCH --hint=nomultithread
#SBATCH --output=atk_job.o%j
#SBATCH --error=atk_job.e%j

# This job file assumes that the atk resource requirements for this run
# will be too great for a single node, and that we must spread the
# run across two nodes in this case.

# Set --ntasks-per-node to the number of atk slaves desired per node.
# Set --mem to the maximum allowed memory per node (i.e. 128400 MB)
# so that each allocated node can effectively be reserved for the
# atk job in its entirety, even if only a subset of cores will actually
# be used on those nodes.

# For some reason, srun does not launch atk jobs correctly.  We must therefore
# use mpirun instead, and we must dynamically generate the machinefile
# for mpirun on our own:

MACHINEFILE="nodes.$SLURM_JOB_ID"
srun -l /bin/hostname | sort -n | awk '{print $2}' > $MACHINEFILE

# Set the environment variables needed to launch atk

module switch PrgEnv-cray/5.2.82 PrgEnv-intel/17.1.132/64bit

export OMP_NUM_THREADS=1
export QUANTUM_LICENSE_PATH=6200@192.195.88.224
export QUANTUM_AUTOMATIC_SERVER_DISCOVERY=0
export MKL_DYNAMIC=TRUE

ATK_EXECUTABLE="/lustre/sw/xc40ac/quantumwise/vnlatk164/bin/atkpython"
MPIRUN_EXECUTABLE="/lustre/opt/compilers/intel/cluster/17.1.132/compilers_and_libraries_2017.1.132/linux/mpi/intel64/bin/mpirun"

# Launch atk

$MPIRUN_EXECUTABLE -np $SLURM_NTASKS -machinefile $MACHINEFILE $ATK_EXECUTABLE input_file.py > output_file