Blame view
egs/wsj/s5/utils/parallel/limit_num_gpus.sh
1.91 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
#!/bin/bash # This script functions as a wrapper of a bash command that uses GPUs. # # It sets the CUDA_VISIBLE_DEVICES variable so that it limits the number of GPUs # used for programs. It is neccesary for running a job on the grid if the job # would automatically grabs all resources available on the system, e.g. a # TensorFlow program. num_gpus=1 # this variable indicates how many GPUs we will allow the command # passed to this script will run on. We achieve this by setting the # CUDA_VISIBLE_DEVICES variable set -e if [ "$1" == "--num-gpus" ]; then num_gpus=$2 shift shift fi if ! printf "%d" "$num_gpus" >/dev/null || [ $num_gpus -le -1 ]; then echo $0: Must pass a positive interger or 0 after --num-gpus echo e.g. $0 --num-gpus 2 local/tfrnnlm/run_lstm.sh exit 1 fi if [ $# -eq 0 ]; then echo "Usage: $0 [--num-gpus <num-gpus>] <command> [<arg1>...]" echo "Runs <command> with args after setting CUDA_VISIBLE_DEVICES to " echo "make sure exactly <num-gpus> GPUs are visible (default: 1)." exit 1 fi CUDA_VISIBLE_DEVICES= num_total_gpus=`nvidia-smi -L | wc -l` num_gpus_assigned=0 if [ $num_gpus -eq 0 ] ; then echo "$0: Running the job on CPU. Disabling submitting to gpu" export CUDA_VISIBLE_DEVICES="" else for i in `seq 0 $[$num_total_gpus-1]`; do # going over all GPUs and check if it is idle, and add to the list if yes if nvidia-smi -i $i | grep "No running processes found" >/dev/null; then CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}$i, && num_gpus_assigned=$[$num_gpus_assigned+1] fi # once we have enough GPUs, break out of the loop [ $num_gpus_assigned -eq $num_gpus ] && break done [ $num_gpus_assigned -ne $num_gpus ] && echo Could not find enough idle GPUs && exit 1 export CUDA_VISIBLE_DEVICES=$(echo $CUDA_VISIBLE_DEVICES | sed "s=,$==g") echo "$0: Running the job on GPU(s) $CUDA_VISIBLE_DEVICES" fi "$@" |