Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)
Jobscript
#!/bin/bash
:<<'EOF'
To use this jobscript to process n files through the reco2 stage
and put the output in pnfs scratch,
use this command to create the workflow:
justin simple-workflow \
--mql "$MQL_QUERY" \
--jobscript my-reco1.jobscript --max-distance 30 --rss-mb 4000 -- env NUM_EVENTS=10\
--scope usertests --output-pattern "*_reco_data_*.root:afm-ana-test" --lifetime-days 1
The following optional environment variables can be set when creating the
workflow/stage: FCL_FILE, NUM_EVENTS, DUNE_VERSION, DUNE_QUALIFIER
EOF
echo "AFM ana jobscript."
# fcl file and DUNE software version/qualifier to be used
FCL_FILE=${FCL_FILE:-rsj_standard_ana_dune10kt_1x2x6.fcl}
DUNE_VERSION=${DUNE_VERSION:-v09_75_03d00}
DUNE_QUALIFIER=${DUNE_QUALIFIER:-e20:prof}
# number of events to process from the input file
if [ "$NUM_EVENTS" != "" ] ; then
events_option="-n $NUM_EVENTS"
fi
# First get an unprocessed file from this stage
did_pfn_rse=`$JUSTIN_PATH/justin-get-file`
if [ "$did_pfn_rse" = "" ] ; then
echo "No input files provided."
exit 0
fi
# Keep a record of all input DIDs, for pdjson2meta file -> DID mapping
echo "$did_pfn_rse" | cut -f1 -d' ' >>all-input-dids.txt
# pfn is also needed when creating justin-processed-pfns.txt
pfn=`echo $did_pfn_rse | cut -f2 -d' '`
echo "Input PFN = $pfn"
# Setup DUNE environment
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
# the xroot lib for streaming non-root files is in testproducts,
# so add it to the start of the path
export PRODUCTS=/cvmfs/dune.opensciencegrid.org/products/dune/testproducts:${PRODUCTS}
setup dunesw "$DUNE_VERSION" -q "$DUNE_QUALIFIER"
export TF_NUM_THREADS=${JUSTIN_PROCESSORS}
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS}
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS}
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS}
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS}
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}
# Construct outFile from input $pfn
now=$(date -u +"%Y-%m-%dT_%H%M%SZ")
Ffname=`echo $pfn | awk -F/ '{print $NF}'`
fname=`echo $Ffname | awk -F. '{print $1}'`
outFile=${fname}_reco_data_${now}.root
campaign="justIN.w${JUSTIN_WORKFLOW_ID}s${JUSTIN_STAGE_ID}"
(
# Do the scary preload stuff in a subshell!
export LD_PRELOAD=${XROOTD_LIB}/libXrdPosixPreload.so
echo "$LD_PRELOAD"
lar -c $FCL_FILE $events_option -o $outFile "$pfn" > ${fname}_reco_${now}.log 2>&1
)
echo '=== Start last 50 lines of lar log file ==='
tail -50 ${fname}_reco_${now}.log
echo '=== End last 50 lines of lar log file ==='
# Subshell exits with exit code of last command
larExit=$?
echo "lar exit code $larExit"
jobscriptExit=1
if [ $larExit -eq 0 ] ; then
# write metadata file if lar succeeded
extractor_prod.py --infile "$outFile" --no_crc --appname reco \
--appversion ${DUNE_VERSION} --appfamily art \
--campaign ${campaign} > $outFile.ext.json
extractorExit=$?
echo "extractor_prod.py exit code $extractorExit"
# Run pdjson2meta. THIS SHOULD MOVE TO SOMEWHERE LIKE duneutil ?
/cvmfs/dune.opensciencegrid.org/products/dune/justin/pro/NULL/jobutils/pdjson2metadata \
$outFile.ext.json all-input-dids.txt > $outFile.json
p2mExit=$?
echo "pdjson2metadata exit code $p2mExit"
if [ $extractorExit -eq 0 -a $p2mExit -eq 0 ] ; then
echo "Metadata extraction succeeds"
echo "$pfn" > justin-processed-pfns.txt
echo "===Metadata JSON==="
cat $outFile.json
echo
echo "==================="
jobscriptExit=0
fi
fi
ls -lRS
# Create compressed tar file with all log files
tar zcf `echo "$JUSTIN_JOBSUB_ID.logs.tgz" | sed 's/@/_/g'` *.log
exit $jobscriptExit
justIN time: 2024-11-25 06:28:39 UTC justIN version: 01.01.09