Skip to content
Snippets Groups Projects
Commit 0898feea authored by Adrian Schmitz's avatar Adrian Schmitz
Browse files

Merge branch 'srun-driver' into 'main'

Use srun instead of salloc+ssh

See merge request ci-playground/customdriver!3
parents aa5e5da1 95881c27
No related branches found
No related tags found
No related merge requests found
...@@ -29,7 +29,7 @@ fail-exit-code-job-Singularity: # This job runs in the build stage, which ...@@ -29,7 +29,7 @@ fail-exit-code-job-Singularity: # This job runs in the build stage, which
CI_MODE: "Singularity" CI_MODE: "Singularity"
CONTAINER: "tensorflow" CONTAINER: "tensorflow"
tags: tags:
- "custom" - "custom2"
script: script:
- cd rewagha - cd rewagha
...@@ -41,7 +41,7 @@ fail-timeout-job-Singularity: # This job runs in the build stage, which ru ...@@ -41,7 +41,7 @@ fail-timeout-job-Singularity: # This job runs in the build stage, which ru
CI_MODE: "Singularity" CI_MODE: "Singularity"
CONTAINER: "tensorflow" CONTAINER: "tensorflow"
tags: tags:
- "custom" - "custom2"
script: script:
- echo "Compiling the code..." - echo "Compiling the code..."
- sleep 1200 - sleep 1200
...@@ -51,7 +51,7 @@ fail-exit-code-job: # This job runs in the build stage, which runs first. ...@@ -51,7 +51,7 @@ fail-exit-code-job: # This job runs in the build stage, which runs first.
variables: variables:
SLURM_CPUS_PER_TASK: "1" SLURM_CPUS_PER_TASK: "1"
tags: tags:
- "custom" - "custom2"
script: script:
- cd rewagha - cd rewagha
...@@ -61,7 +61,7 @@ fail-timeout-job: # This job runs in the build stage, which runs first. ...@@ -61,7 +61,7 @@ fail-timeout-job: # This job runs in the build stage, which runs first.
SLURM_CPUS_PER_TASK: "1" SLURM_CPUS_PER_TASK: "1"
SLURM_TIME: "00:01:00" SLURM_TIME: "00:01:00"
tags: tags:
- "custom" - "custom2"
script: script:
- echo "Compiling the code..." - echo "Compiling the code..."
- sleep 1200 - sleep 1200
...@@ -71,7 +71,7 @@ build-job: # This job runs in the build stage, which runs first. ...@@ -71,7 +71,7 @@ build-job: # This job runs in the build stage, which runs first.
variables: variables:
SLURM_CPUS_PER_TASK: "2" SLURM_CPUS_PER_TASK: "2"
tags: tags:
- "custom" - "custom2"
script: script:
- echo "Compiling the code..." - echo "Compiling the code..."
- echo "Compile complete." - echo "Compile complete."
...@@ -83,7 +83,7 @@ build-job-Singularity: # This job runs in the build stage, which runs firs ...@@ -83,7 +83,7 @@ build-job-Singularity: # This job runs in the build stage, which runs firs
CI_MODE: "Singularity" CI_MODE: "Singularity"
CONTAINER: "tensorflow" CONTAINER: "tensorflow"
tags: tags:
- "custom" - "custom2"
script: script:
- echo "Compiling the code..." - echo "Compiling the code..."
- echo "Compile complete." - echo "Compile complete."
...@@ -94,7 +94,7 @@ batch-job: # This job runs in the build stage, which runs first. ...@@ -94,7 +94,7 @@ batch-job: # This job runs in the build stage, which runs first.
CI_MODE: "Batch" CI_MODE: "Batch"
BATCH_SCRIPT: "batch.sh" BATCH_SCRIPT: "batch.sh"
tags: tags:
- "custom" - "custom2"
script: script:
- echo "I do nothing" - echo "I do nothing"
...@@ -104,7 +104,7 @@ fail-batch-job: # This job runs in the build stage, which runs first. ...@@ -104,7 +104,7 @@ fail-batch-job: # This job runs in the build stage, which runs first.
CI_MODE: "Batch" CI_MODE: "Batch"
BATCH_SCRIPT: "doesntexist.sh" BATCH_SCRIPT: "doesntexist.sh"
tags: tags:
- "custom" - "custom2"
script: script:
- echo "I do nothing" - echo "I do nothing"
...@@ -113,7 +113,7 @@ unit-test-job: # This job runs in the test stage. ...@@ -113,7 +113,7 @@ unit-test-job: # This job runs in the test stage.
variables: variables:
SLURM_CPUS_PER_TASK: "4" SLURM_CPUS_PER_TASK: "4"
tags: tags:
- "custom" - "custom2"
script: script:
- echo "Running unit tests... This will take about 60 seconds." - echo "Running unit tests... This will take about 60 seconds."
- sleep 60 - sleep 60
...@@ -124,7 +124,7 @@ lint-test-job: # This job also runs in the test stage. ...@@ -124,7 +124,7 @@ lint-test-job: # This job also runs in the test stage.
variables: variables:
SLURM_CPUS_PER_TASK: "8" SLURM_CPUS_PER_TASK: "8"
tags: tags:
- "custom" - "custom2"
script: script:
- echo "Linting code... This will take about 10 seconds." - echo "Linting code... This will take about 10 seconds."
- sleep 10 - sleep 10
...@@ -133,7 +133,7 @@ lint-test-job: # This job also runs in the test stage. ...@@ -133,7 +133,7 @@ lint-test-job: # This job also runs in the test stage.
deploy-job: # This job runs in the deploy stage. deploy-job: # This job runs in the deploy stage.
stage: deploy # It only runs when *both* jobs in the test stage complete successfully. stage: deploy # It only runs when *both* jobs in the test stage complete successfully.
tags: tags:
- "custom" - "custom2"
script: script:
- echo "Deploying application..." - echo "Deploying application..."
- echo "Application successfully deployed." - echo "Application successfully deployed."
batch.sh 100644 → 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
...@@ -3,6 +3,7 @@ from re import I ...@@ -3,6 +3,7 @@ from re import I
import sys import sys
import subprocess import subprocess
import variableHandle as vh import variableHandle as vh
import subprocess
import random import random
import string import string
...@@ -97,37 +98,36 @@ def handle_run(): ...@@ -97,37 +98,36 @@ def handle_run():
if argv[3] == 'build_script' or argv[3] == 'step_script': if argv[3] == 'build_script' or argv[3] == 'step_script':
Slurm_vars = vh.get_slurm_variables() Slurm_vars = vh.get_slurm_variables()
command = []
if mode == 'Batch': if mode == 'Batch':
full_script_path = get_clone_path() + '/' + script command += ['sbatch', f'{get_clone_path()}/{script}']
print(full_script_path) print('Warning: The contents of the script section in the CI definition '
os.system('. '+ HOME + '/Runner/batchRunstep.sh ' + full_script_path + ' ' + script_hash) 'will be ignored in the batch mode. If you want to work on the results '
print('Warning: The contents of the script section in the CI definition will be ignored in the batch mode. If you want to work on the results please create additional stages and connect them via artifacts.') 'please create additional stages and connect them via artifacts.')
else: else:
command = 'salloc --job-name=CI' command += ['srun', '--job-name=CI']
for x in Slurm_vars: for x in Slurm_vars:
command = command + ' ' + x[0] + x[1] command += [f'{x[0]}{x[1]}']
# Handle Slurm shell and singularity shell environment # Handle Slurm shell and singularity shell environment
if mode == "Slurm": if mode == "Slurm":
command = command + ' ' + HOME + '/Runner/sshRunstep.sh ' + script_hash command += [f'{HOME}/Runner/scripts/script{script_hash}', 'step_script']
elif mode =="Singularity": elif mode =="Singularity":
if os.path.exists(container): if os.path.exists(container):
container = get_clone_path() + '/' + script container = f'{get_clone_path()}/{script}'
command = command + ' ' + HOME + '/Runner/singularityLocalRunstep.sh ' + get_clone_path() + '/' + container + ' ' + script_hash command += [f'{HOME}/Runner/singularityLocalRunstep.sh',
f'{get_clone_path()}/{container}', script_hash]
else: else:
command = command + ' ' + HOME + '/Runner/singularityRunstep.sh ' + container + ' ' + script_hash command += [f'{HOME}/Runner/singularityRunstep.sh', container, script_hash]
print(command)
os.system(command)
file1 = open(HOME + '/Runner/errorCodes/' + script_hash + '.txt', 'r') print(command)
Lines = file1.readlines() cmd_ret = subprocess.run(command)
error_code = Lines[0].rstrip('\n') # $HOME/Runner/errorCodes/$1.txt first line
print(error_code)
file1.close()
os.remove(HOME + '/Runner/scripts/script' + script_hash) os.remove(HOME + '/Runner/scripts/script' + script_hash)
os.remove(HOME + '/Runner/errorCodes/' + script_hash + '.txt') if int(cmd_ret.returncode) != 0:
exit(int(error_code)) exit(1)
else:
exit(0)
else: else:
os.system('. ' + '$HOME/Runner/scripts/script' + script_hash + ' ' + argv[3]) os.system('. ' + '$HOME/Runner/scripts/script' + script_hash + ' ' + argv[3])
......
main.sh 100644 → 100755
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
module load python module load python
python $HOME/Runner/driver.py $@ python3 $HOME/Runner/driver.py $@
\ No newline at end of file
...@@ -5,23 +5,10 @@ then ...@@ -5,23 +5,10 @@ then
exit 1 exit 1
fi fi
NODES=($SLURM_JOB_NODELIST)
# starting node
RUNNODE=(${NODES[@]:0:1})
#echo 'target node: '
#echo $RUNNODE
#echo "singularity exec --nv $R_CONTAINER $HOME/Runner/scripts/script$2 " #echo "singularity exec --nv $R_CONTAINER $HOME/Runner/scripts/script$2 "
module load CONTAINERS module load CONTAINERS
dos2unix $HOME/Runner/scripts/script$2 dos2unix $HOME/Runner/scripts/script$2
touch $HOME/Runner/errorCodes/$2.txt singularity exec --nv $R_CONTAINER $HOME/Runner/scripts/script$2 step_script
ssh -T $RUNNODE singularity exec --nv $R_CONTAINER $HOME/Runner/scripts/script$2 step_script
echo $? > $HOME/Runner/errorCodes/$2.txt
\ No newline at end of file
...@@ -5,15 +5,6 @@ then ...@@ -5,15 +5,6 @@ then
exit 1 exit 1
fi fi
NODES=($SLURM_JOB_NODELIST)
# starting node
RUNNODE=(${NODES[@]:0:1})
#echo 'target node: '
#echo $RUNNODE
#echo "singularity exec --nv $R_CONTAINER $HOME/Runner/scripts/script$2 " #echo "singularity exec --nv $R_CONTAINER $HOME/Runner/scripts/script$2 "
module load CONTAINERS module load CONTAINERS
...@@ -22,8 +13,4 @@ module load $1 ...@@ -22,8 +13,4 @@ module load $1
dos2unix $HOME/Runner/scripts/script$2 dos2unix $HOME/Runner/scripts/script$2
touch $HOME/Runner/errorCodes/$2.txt singularity exec --nv $R_CONTAINER $HOME/Runner/scripts/script$2 step_script
ssh -T $RUNNODE singularity exec --nv $R_CONTAINER $HOME/Runner/scripts/script$2 step_script
echo $? > $HOME/Runner/errorCodes/$2.txt
\ No newline at end of file
sshRunstep.sh 100644 → 100755
File mode changed from 100644 to 100755
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment