Workflow 7106, Stage 1
Priority | 50 |
Processors | 1 |
Wall seconds | 80000 |
Image | /cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest |
RSS bytes | 8387559424 (7999 MiB) |
Max distance for inputs | 100.0 |
Enabled input RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
Enabled output RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
Enabled sites |
BR_CBPF, CA_SFU, CA_Victoria, CERN, CH_UNIBE-LHEP, ES_CIEMAT, ES_PIC, FR_CCIN2P3, IN_TIFR, IT_CNAF, NL_SURFsara, UK_Bristol, UK_Brunel, UK_Durham, UK_Edinburgh, UK_Lancaster, UK_Manchester, UK_Oxford, UK_QMUL, UK_RAL-PPD, UK_RAL-Tier1, UK_Sheffield, US_Caltech, US_Colorado, US_FNAL-FermiGrid, US_FNAL-T1, US_Michigan, US_MIT, US_Nebraska, US_NotreDame, US_PuertoRico, US_SU-ITS, US_Swan, US_UChicago, US_UConn-HPC, US_UCSD, US_Wisconsin |
Scope | usertests |
Events for this stage |
Output patterns
| Destination | Pattern | Lifetime | For next stage | RSE expression |
---|
1 | Rucio usertests:calcuttj_unet_input-w7106s1p1 | *.h5 | 7776000 | False | |
Environment variables
Name | Value |
---|
DUNESW_TAR | /cvmfs/fifeuser3.opensciencegrid.org/sw/dune/c9be5504be824ed976a7e79d7471f1ecffc87d20 |
DUNESW_VERSION | v10_01_03d00 |
MCJob | 1 |
NEVENTS | 5 |
pipyaml | 1 |
UNET_DIR | /cvmfs/fifeuser3.opensciencegrid.org/sw/dune/b5765e0d70ac8df928f55219574a442063a5eb63 |
UTIL_TAR | /cvmfs/fifeuser2.opensciencegrid.org/sw/dune/d0e7b0ea355210af5e3f44d4e21e4b90d8f11883 |
YAMLFILE | pdhd_unet.yaml |
File states
Job states
RSEs used
Name | Inputs | Outputs |
---|
MONTECARLO | 8746 | 0 |
DUNE_US_FNAL_DISK_STAGE | 0 | 1118 |
RAL_ECHO | 0 | 954 |
DUNE_UK_MANCHESTER_CEPH | 0 | 621 |
DUNE_CERN_EOS | 0 | 257 |
RAL-PP | 0 | 158 |
QMUL | 0 | 57 |
DUNE_CA_SFU | 0 | 12 |
DUNE_FR_CCIN2P3_DISK | 0 | 1 |
Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)
File reset events, by site
Site | Allocated | Outputting |
---|
UK_RAL-PPD | 596 | 0 |
US_NotreDame | 22 | 0 |
US_FNAL-T1 | 7 | 0 |
ES_PIC | 6 | 0 |
US_FNAL-FermiGrid | 2 | 0 |
Jobscript
#!/bin/bash
#
#These must be defined
if [ -z $UTIL_TAR ]; then
echo "Fatal must provide UTIL_TAR env var"
exit 1
fi
export PYTHONPATH=$UTIL_TAR:$PYTHONPATH
if [ -z $UNET_DIR ]; then
echo "fatal must provide UNET_DIR env var"
exit 1
fi
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup metacat
setup python v3_9_15
export METACAT_SERVER_URL=https://metacat.fnal.gov:9443/dune_meta_prod/app
export METACAT_AUTH_SERVER_URL=https://metacat.fnal.gov:8143/auth/dune
if [ -z ${JUSTIN_PROCESSORS} ]; then
JUSTIN_PROCESSORS=1
fi
echo "Justin processors: ${JUSTIN_PROCESSORS}"
export TF_NUM_THREADS=${JUSTIN_PROCESSORS}
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS}
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS}
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS}
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS}
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}
echo "Will use justin-get-file"
DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
if [ "${DID_PFN_RSE}" == "" ] ; then
echo "Could not get file"
exit 0
fi
export pfn=`echo ${DID_PFN_RSE} | cut -f2 -d' '`
export did=`echo ${DID_PFN_RSE} | cut -f1 -d' '`
echo "Justin specific env vars"
env | grep JUSTIN
now=$(date -u +"%Y%m%dT%H%M%SZ")
jobid=`echo "${JUSTIN_JOBSUB_ID:-1}" | cut -f1 -d'@' | sed -e "s/\./_/"`
stageid=${JUSTIN_STAGE_ID:-1}
a=1
###Subshell for lar
#(
if [ -n "$DUNESW_TAR" ]; then
stat ${DUNESW_TAR}
if [ $? -ne 0 ]; then
echo "Failed to stat $DUNESW_TAR. Exiting safely"
exit 0
fi
export PRODUCTS=$DUNESW_TAR:$PRODUCTS
echo "Set dunesw to $DUNESW_TAR"
fi
if [ -n "$WIRECELL_TAR" ]; then
stat ${WIRECELL_TAR}
if [ $? -ne 0 ]; then
echo "Failed to stat $WIRECELL_TAR. Exiting safely"
exit 0
fi
export PRODUCTS=$WIRECELL_TAR:$PRODUCTS
echo "Set wirecell to $WIRECELL_TAR"
fi
#Setup recent lar software suite
DUNESW_VERSION=${DUNESW_VERSION:-v10_02_02d00}
setup dunesw \
"${DUNESW_VERSION}" \
-q "${DUNE_QUALIFIER:-e26:prof}"
setup_exit=$?
if [ $? -ne 0 ]; then
echo "Failed to setup dunesw $DUNESW_VERSION $DUNE_QUALIFIER"
exit $setup_exit
fi
echo "DUNESW loc:"
ups active | grep dunesw
echo "WIRECELL loc:"
ups active | grep wirecell
echo "printing env"
env
echo "DUNE specific env vars"
env | grep DUNE
# User can specify to pip install pyyaml
if [ -n "$pipyaml" ]; then
echo "Installing yaml"
pip install --user pyyaml
echo "Done"
# elif [ -n "${YAML_TAR}" ]; then
# echo "Using venv at ${YAML_TAR}"
# source ${YAML_TAR}/yaml_venv/bin/activate
fi
input_filename=`echo $did | cut -f2 -d':'`
echo "input file: $input_filename"
echo "jobsub_id: ${JUSTIN_JOBSUB_ID:-1}"
#Get run/subrun from parent if using input -- else from justin job ID/process
if [ -z "$MCJob" ]; then
echo "Getting run subrun from $did"
run_subrun=`python -m beam_job_utils get_run_subrun -i $did`
get_run_ret=$?
if [ $get_run_ret -ne 0 ]; then
echo "error in get_run_subrun"
fi
echo $run_subrun
run=`echo $run_subrun | cut -f1 -d' '`
subrun=`echo $run_subrun | cut -f2 -d' '`
else
run=`echo $jobid | cut -f1 -d'_'` ##TODO -- check order here
subrun=`echo $jobid | cut -f2 -d'_'`
fi
nevents=${NEVENTS:--1} #Set from env override or -1
NSKIP=${NSKIP:-0}
echo "nevents: $nevents"
overrides="core.data_tier=full-reconstructed \
core.application.version=${DUNESW_VERSION} \
core.application.name=reco \
core.application.family=dunesw \
core.application=dunesw.reco \
"
#User specifies if theyd like to use an input file or not
if [ -n "$MCJob" ]; then
inputflag=""
else
inputflag="-i $pfn --parent $did"
fi
echo "input flag: $inputflag"
# TODO -- metadata file configurable
python -m beam_job_utils run_job \
--json ${UTIL_TAR}/${JSONFILE:-pdsp_beam_base_meta.json} \
--overrides ${overrides} \
--run $run \
--subrun $subrun \
--yaml ${UTIL_TAR}/${YAMLFILE:-pdsp_beam_gen.yaml} \
--nevents $nevents \
--set_esr \
$inputflag
exitcode=$?
if [ $exitcode -ne 0 ]; then
echo "Error running. Exiting with ${exitcode}"
exit $exitcode
fi
#)
#if [ -n $TESTFILE ]; then
# echo $TESTFILE
# ln -s $TESTFILE extracted.h5
# ls -lht
#fi
h5files=extracted*h5
for i in $h5files; do
echo $i
mv $i `echo $i | sed -e "s/\.h5/_${now}_${jobid}_${stageid}.h5/"`
done
###subshell for convert
#(
# echo "installing torch"
# pip install --user torch h5py
#
# exitcode=$?
# if [ $exitcode -ne 0 ]; then
# echo "Error installing torch. Exiting with ${exitcode}"
# exit $exitcode
# fi
#
#
# ### Run convert on the output
# #python $UNET_DIR/convert_to_pt.py extracted*h5
# #exitcode=$?
# #if [ $exitcode -ne 0 ]; then
# # echo "Error converting to torch. Exiting with ${exitcode}"
# # exit $exitcode
# #fi
# ### Run convert on the output
# for f in `ls extracted*h5`; do
# echo $f
# python $UNET_DIR/convert_to_pt.py $f #$files #$pfn #extracted*h5
# exitcode=$?
# if [ $exitcode -ne 0 ]; then
# echo "Error converting to torch. Exiting with ${exitcode}"
# exit $exitcode
# fi
# done
#)
echo "$pfn" > justin-processed-pfns.txt