Workflow 7081, Stage 1
Priority | 50 |
Processors | 1 |
Wall seconds | 80000 |
Image | /cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest |
RSS bytes | 3144679424 (2999 MiB) |
Max distance for inputs | 30.0 |
Enabled input RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
Enabled output RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
Enabled sites |
BR_CBPF, CA_SFU, CA_Victoria, CERN, CH_UNIBE-LHEP, ES_CIEMAT, ES_PIC, FR_CCIN2P3, IN_TIFR, IT_CNAF, NL_SURFsara, UK_Bristol, UK_Brunel, UK_Durham, UK_Edinburgh, UK_Lancaster, UK_Manchester, UK_Oxford, UK_QMUL, UK_RAL-PPD, UK_RAL-Tier1, UK_Sheffield, US_Caltech, US_Colorado, US_FNAL-FermiGrid, US_FNAL-T1, US_Michigan, US_MIT, US_Nebraska, US_NotreDame, US_PuertoRico, US_SU-ITS, US_Swan, US_UChicago, US_UConn-HPC, US_UCSD, US_Wisconsin |
Scope | usertests |
Events for this stage |
Output patterns
| Destination | Pattern | Lifetime | For next stage | RSE expression |
---|
1 | Rucio usertests:calcuttj_test_convert_unet-w7081s1p1 | *.pt | 604800 | False | |
Environment variables
Name | Value |
---|
DOCOPY | 1 |
NFILES | 5 |
UNET_DIR | /cvmfs/fifeuser3.opensciencegrid.org/sw/dune/b5765e0d70ac8df928f55219574a442063a5eb63 |
File states
Total files | Finding | Unallocated | Allocated | Outputting | Processed | Not found | Failed |
---|
|
10 | 0 | 9 | 0 | 0 | 1 | 0 | 0 |
Job states
Total | Submitted | Started | Processing | Outputting | Finished | Notused | Aborted | Stalled | Jobscript error | Outputting failed | None processed |
---|
44 | 0 | 0 | 0 | 0 | 34 | 0 | 8 | 2 | 0 | 0 | 0 |
RSEs used
Name | Inputs | Outputs |
---|
DUNE_UK_MANCHESTER_CEPH | 12 | 0 |
RAL_ECHO | 6 | 0 |
RAL-PP | 6 | 5 |
DUNE_US_FNAL_DISK_STAGE | 4 | 0 |
DUNE_FR_CCIN2P3_DISK | 1 | 0 |
Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)
File reset events, by site
Site | Allocated | Outputting |
---|
UK_RAL-PPD | 11 | 0 |
UK_Edinburgh | 8 | 0 |
FR_CCIN2P3 | 5 | 0 |
US_Colorado | 4 | 0 |
Jobscript
#!/bin/bash
#
if [ -z $UNET_DIR ]; then
echo "fatal must provide UNET_DIR env var"
exit 1
fi
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup python v3_9_15
setup xrootd v5_5_5a -q e26:p3915:prof
if [ -z ${JUSTIN_PROCESSORS} ]; then
JUSTIN_PROCESSORS=1
fi
echo "Justin processors: ${JUSTIN_PROCESSORS}"
export TF_NUM_THREADS=${JUSTIN_PROCESSORS}
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS}
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS}
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS}
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS}
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}
NFILES=${NFILES:-1}
files=()
nfiles=0
#if [ $NFILES -eq 1 ]; then
# echo "Will use justin-get-file"
# DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
# if [ "${DID_PFN_RSE}" == "" ] ; then
# echo "Could not get file"
# exit 0
# fi
# export pfn=`echo ${DID_PFN_RSE} | cut -f2 -d' '`
# export did=`echo ${DID_PFN_RSE} | cut -f1 -d' '`
#
# if [ -z ${TESTFILE} ]; then
# files+=($pfn)
# else
# files+=(${TESTFILE})
# fi
#
if [ -n "${TESTFILE}" ]; then
echo "Using testfile"
files+=(${TESTFILE})
nfiles=1
else
nfiles=0
for i in `seq 1 ${NFILES:-1}`; do
echo $i
DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
if [ "${DID_PFN_RSE}" == "" ] ; then
echo "Could not get file -- exiting loop"
break
fi
echo "did_pfn_rse: ${DID_PFN_RSE}"
THISFILE=`echo ${DID_PFN_RSE} | cut -f2 -d' '`
echo $THISFILE
files+=(${THISFILE})
nfiles=$(( nfiles + 1 ))
done
fi
echo "Got $nfiles Files"
echo ${files[@]}
if [ $nfiles -eq 0 ]; then
echo "Got no files. Exiting safely"
fi
echo "Justin specific env vars"
env | grep JUSTIN
now=$(date -u +"%Y%m%dT%H%M%SZ")
jobid=`echo "${JUSTIN_JOBSUB_ID:-1}" | cut -f1 -d'@' | sed -e "s/\./_/"`
stageid=${JUSTIN_STAGE_ID:-1}
if [ -z ${TORCHDIR} ]; then
echo "installing torch"
pip install --user torch h5py
exitcode=$?
if [ $exitcode -ne 0 ]; then
echo "Error installing torch. Exiting with ${exitcode}"
exit $exitcode
fi
else
echo "using venv"
source ${TORCHDIR}/bin/activate
fi
if [ -z ${DOCOPY} ]; then
echo "Not copyign"
input=${files[@]}
else
for f in ${files[@]}; do
echo "Copying $f"
xrdcp $f .
done
input=`ls *h5`
fi
echo "input ${input[@]}"
## Run convert on the output
for f in $input; do
echo $f
LD_PRELOAD=$XROOTD_LIB/libXrdPosixPreload.so python $UNET_DIR/convert_to_pt.py $f #$files #$pfn #extracted*h5
exitcode=$?
if [ $exitcode -ne 0 ]; then
echo "Error converting to torch. Exiting with ${exitcode}"
exit $exitcode
fi
done
#echo "$pfn" > justin-processed-pfns.txt
for i in ${files[@]}; do
echo "${i}" >> justin-processed-pfns.txt
done