Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)
Jobscript
#!/bin/bash
:<<'EOF'
To use this jobscript to process 10 files from the dc4-vd-coldbox-bottom
data and put the output in the usertests namespace (MetaCat) and
scope (Rucio), and in the usertests:output-test-01 dataset in MetaCat and
Rucio, use this command to create the workflow:
justin simple-workflow \
--mql \
"files from dune:all where core.run_type='dc4-vd-coldbox-bottom' and dune.campaign='dc4' limit 10" \
--jobscript dc4-vd-coldbox-bottom.jobscript --max-distance 30 --rss-mb 4000 \
--scope usertests --output-pattern '*_reco_data_*.root:output-test-01'
The following optional environment variables can be set when creating the
workflow/stage: FCL_FILE, NUM_EVENTS, DUNE_VERSION, DUNE_QUALIFIER
EOF
# fcl file and DUNE software version/qualifier to be used
FCL_FILE=crp4_data_reco.fcl
DUNE_VERSION=${DUNE_VERSION:-v09_89_01d00}
DUNE_QUALIFIER=${DUNE_QUALIFIER:-e26:prof}
# number of events to process from the input file
if [ "$NUM_EVENTS" != "" ] ; then
events_option="-n $NUM_EVENTS"
fi
# First get an unprocessed file from this stage
did_pfn_rse=`$JUSTIN_PATH/justin-get-file`
if [ "$did_pfn_rse" = "" ] ; then
echo "Nothing to process - exit jobscript"
exit 0
fi
# Keep a record of all input DIDs, for pdjson2meta file -> DID mapping
echo "$did_pfn_rse" | cut -f1 -d' ' >>all-input-dids.txt
# pfn is also needed when creating justin-processed-pfns.txt
pfn=`echo $did_pfn_rse | cut -f2 -d' '`
echo "Input PFN = $pfn"
# Setup DUNE environment
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
export PRODUCTS=$INPUT_TAR_DIR_LOCAL:${PRODUCTS}
echo "PRODUCTS: ${PRODUCTS}"
setup dunesw "$DUNE_VERSION" -q "$DUNE_QUALIFIER"
echo "DUNESW_DIR: ${DUNESW_DIR}"
# Properly setup custom code with INPUT_TAR_DIR_LOCAL
export DUNESW_DIR=${INPUT_TAR_DIR_LOCAL}/dunesw/${DUNE_VERSION}
export DUNESW_FQ_DIR=${DUNESW_DIR}/slf7.x86_64.e26.prof
export DUNESW_LIB=${DUNESW_FQ_DIR}/lib
export DUNERECO_DIR=${INPUT_TAR_DIR_LOCAL}/dunereco/${DUNE_VERSION}
export DUNERECO_LIB=${DUNERECO_DIR}/include
export DUNERECO_FQ_DIR=${DUNERECO_DIR}/slf7.x86_64.e26.prof
export DUNERECO_LIB=${DUNERECO_FQ_DIR}/lib
export WIRECELL_PATH=${DUNE_PARDATA_DIR}/WireCellData
export WIRECELL_PATH=${WIRECELL_DIR}/Linux64bit+3.10-2.17-e26-prof/share/wirecell:${WIRECELL_PATH}
export WIRECELL_PATH=${DUNERECO_DIR}/wire-cell-cfg:${WIRECELL_PATH}
export DUNEPROTOTYPES_DIR=${INPUT_TAR_DIR_LOCAL}/duneprototypes/${DUNE_VERSION}
export DUNEPROTOTYPES_INC=${DUNEPROTOTYPES_DIR}/include
export DUNEPROTOTYPES_FQ_DIR=${DUNEPROTOTYPES_DIR}/slf7.x86_64.e26.prof
export DUNEPROTOTYPES_LIB=${DUNEPROTOTYPES_FQ_DIR}/lib
export FHICL_FILE_PATH=${DUNESW_DIR}/fcl:${FHICL_FILE_PATH}
export FHICL_FILE_PATH=${DUNEPROTOTYPES_DIR}/fcl:${FHICL_FILE_PATH}
export FHICL_FILE_PATH=${DUNERECO_DIR}/fcl:${FHICL_FILE_PATH}
#echo "DUNESW_DIR: ${DUNESW_DIR}"
#echo "DUNERECO_DIR: ${DUNERECO_DIR}"
#echo "DUNEPROTOTYPES_DIR: ${DUNEPROTOTYPES_DIR}"
#echo "WIRECELL_PATH: ${WIRECELL_PATH}"
#echo "FHICL_FILE_PATH: ${FHICL_FILE_PATH}"
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}
# Construct outFile from input $pfn
now=$(date -u +"%Y-%m-%dT_%H%M%SZ")
Ffname=`echo $pfn | awk -F/ '{print $NF}'`
fname=`echo $Ffname | awk -F. '{print $1}'`
outFile=${fname}_crp4_data_reco_${now}.root
campaign="justIN.r${JUSTIN_WORKFLOW_ID}s${JUSTIN_STAGE_ID}"
(
# Do the scary preload stuff in a subshell!
export LD_PRELOAD=${XROOTD_LIB}/libXrdPosixPreload.so
echo "$LD_PRELOAD"
lar -c $FCL_FILE $events_option -T $outFile "$pfn" > ${fname}_crp4_data_reco_${now}.log 2>&1
)
# Subshell exits with exit code of last command
larExit=$?
echo "lar exit code $larExit"
echo "$pfn" > justin-processed-pfns.txt
ls -lRS
# Create compressed tar file with all log files
tar zcf `echo "$JUSTIN_JOBSUB_ID.logs.tgz" | sed 's/@/_/g'` *.log
#exit $jobscriptExit
exit $larExit
justIN time: 2024-11-23 12:02:46 UTC justIN version: 01.01.09