Workflow 2802, Stage 1
Priority | 50 |
Processors | 1 |
Wall seconds | 18000 |
RSS bytes | 4193255424 (3999 MiB) |
Max distance for inputs | 30.0 |
Enabled input RSEs |
CERN_PDUNE_EOS, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MANCHESTER, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
Enabled output RSEs |
CERN_PDUNE_EOS, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MANCHESTER, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
Enabled sites |
BR_CBPF, CA_SFU, CA_Victoria, CERN, CH_UNIBE-LHEP, CZ_FZU, ES_CIEMAT, ES_PIC, FR_CCIN2P3, IN_TIFR, IT_CNAF, NL_NIKHEF, NL_SURFsara, UK_Bristol, UK_Brunel, UK_Durham, UK_Edinburgh, UK_Imperial, UK_Lancaster, UK_Liverpool, UK_Manchester, UK_Oxford, UK_RAL-Tier1, UK_Sheffield, US_BNL, US_Caltech, US_Colorado, US_FNAL-FermiGrid, US_FNAL-T1, US_Michigan, US_MIT, US_Nebraska, US_NotreDame, US_PuertoRico, US_SU-ITS, US_Swan, US_UChicago, US_UConn-HPC, US_UCSD, US_Wisconsin |
Scope | hd-protodune-det-reco |
Events for this stage |
Output patterns
| Destination | Pattern | Lifetime | For next stage |
---|
1 | Rucio hd-protodune-det-reco:hd-protodune_reconstruction_keepup_20240810_set1_v09_91_02d01_v0_2802 | *keepup.root | 7776000 | False |
2 | Rucio hd-protodune-det-reco:hd-protodune_reconstruction_keepup_ntuples_20240810_set1_v09_91_02d01_v0_2802 | *hists.root | 7776000 | False |
Environment variables
Name | Value |
---|
METADATA_DIR | /cvmfs/fifeuser2.opensciencegrid.org/sw/dune/71f643ddd59465043e3cd3712a1e24b9cd0fa631 |
Condor Class Ads
Name | Value |
---|
HAS_CVMFS_dune_osgstorage_org | true |
File states
Total files | Finding | Unallocated | Allocated | Outputting | Processed | Not found | Failed |
---|
|
13275 | 0 | 0 | 8 | 0 | 13247 | 19 | 1 |
Job states
Total | Submitted | Started | Processing | Outputting | Finished | Notused | Aborted | Stalled | Jobscript error | Outputting failed | None processed |
---|
15892 | 0 | 0 | 0 | 0 | 14943 | 0 | 221 | 524 | 1 | 0 | 203 |
RSEs used
Name | Inputs | Outputs |
---|
DUNE_CERN_EOS | 13845 | 1101 |
MANCHESTER | 0 | 8575 |
PRAGUE | 0 | 6221 |
SURFSARA | 0 | 5444 |
NIKHEF | 0 | 3977 |
DUNE_FR_CCIN2P3_DISK | 0 | 461 |
DUNE_ES_PIC | 0 | 406 |
RAL_ECHO | 0 | 314 |
RAL-PP | 0 | 11 |
Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)
File reset events, by site
Site | Allocated | Outputting |
---|
UK_Brunel | 213 | 0 |
UK_Sheffield | 45 | 2 |
NL_NIKHEF | 45 | 34 |
NL_SURFsara | 43 | 27 |
UK_Manchester | 36 | 20 |
CZ_FZU | 27 | 31 |
CERN | 11 | 7 |
IT_CNAF | 6 | 5 |
ES_PIC | 6 | 1 |
UK_RAL-Tier1 | 5 | 5 |
UK_Lancaster | 4 | 2 |
FR_CCIN2P3 | 3 | 3 |
UK_Liverpool | 2 | 2 |
UK_Durham | 1 | 0 |
UK_Imperial | 1 | 1 |
Jobscript
#!/bin/bash
#
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup metacat
export METACAT_SERVER_URL=https://metacat.fnal.gov:9443/dune_meta_prod/app
export METACAT_AUTH_SERVER_URL=https://metacat.fnal.gov:8143/auth/dune
if [ -n "${DUNESW_DIR}" ]; then
stat ${DUNESW_DIR}
if [ $? -ne 0 ]; then
echo "failed to stat dunesw dir"
exit 1
fi
export PRODUCTS=$DUNESW_DIR:$PRODUCTS
fi
if [ -n "${LARRECO_DIR}" ]; then
stat ${LARRECO_DIR}
if [ $? -ne 0 ]; then
echo "failed to stat larreco dir"
exit 1
fi
export PRODUCTS=$LARRECO_DIR:$PRODUCTS
fi
if [ -n "${DUNEPROTOTYPES_DIR}" ]; then
stat ${DUNEPROTOTYPES_DIR}
if [ $? -ne 0 ]; then
echo "failed to stat dunedetdataformats dir"
exit 1
fi
export PRODUCTS=$DUNEPROTOTYPES_DIR:$PRODUCTS
fi
echo "PRODUCTS $PRODUCTS"
#Setup recent lar software suite
DUNE_VERSION=${DUNE_VERSION:-v09_91_02d01}
setup dunesw \
"${DUNE_VERSION}" \
-q "${DUNE_QUALIFIER:-e26:prof}"
if [ $? -ne 0 ]; then
echo "Failed to setup dunesw $DUNE_VERSION $DUNE_QUALIFIER"
exit 1
fi
if [ -n "${USE_INPUT_FCL}" ]; then
if [ -z ${INPUT_DIR} ]; then
echo "Error, INPUT_DIR is undefined but user requested USE_INPUT_FCL"
exit 1
fi
stat ${INPUT_DIR}
if [ $? -ne 0 ]; then
echo "Failed to stat input dir. Exiting safely"
exit 0
fi
FHICL_FILE_PATH=${INPUT_DIR}:${FHICL_FILE_PATH}
echo "FCL PATH: $FHICL_FILE_PATH"
fi
if [ -n "${METADATA_DIR}" ]; then
stat ${METADATA_DIR}
if [ $? -ne 0 ]; then
echo "failed to stat metadata dir"
fi
echo "metadata dir contents:"
ls $METADATA_DIR
PYTHONPATH=${METADATA_DIR}:$PYTHONPATH
fi
FCL1=${FCL1:-"standard_reco_stage1_protodunehd_keepup.fcl"}
echo "FCL1 dump:" ${FCL1}
fhicl-dump ${FCL1}
if [ $? -ne 0 ]; then
echo "fhicl-dump ${FCL1} failed"
exit 1
fi
FCL2=${FCL2:-"standard_reco_stage2_calibration_protodunehd_keepup.fcl"}
echo "FCL2 dump:" ${FCL2}
fhicl-dump ${FCL2}
if [ $? -ne 0 ]; then
echo "fhicl-dump ${FCL2} failed"
exit 1
fi
echo "DUNESW loc:"
ups active | grep dunesw
if [ -z ${JUSTIN_PROCESSORS} ]; then
JUSTIN_PROCESSORS=1
fi
echo "Justin processors: ${JUSTIN_PROCESSORS}"
export TF_NUM_THREADS=${JUSTIN_PROCESSORS}
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS}
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS}
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS}
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS}
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}
echo "printing env"
env
echo "Will use justin-get-file"
#
DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
##Check that any file was returned
if [ "${DID_PFN_RSE}" == "" ] ; then
echo "Could not get file"
exit 0
fi
pfn=`echo ${DID_PFN_RSE} | cut -f2 -d' '`
did=`echo ${DID_PFN_RSE} | cut -f1 -d' '`
echo "pfn: ${pfn}"
echo "did: ${did}"
now=$(date -u +"%Y%m%dT%H%M%SZ")
nevents=${NEVENTS:--1}
extra_line=""
if [ -n "${SKIPFCL2}" ]; then
jobsub_id=`echo ${JUSTIN_JOBSUB_ID:-1.1@1} | cut -f1 -d'@' | sed -e"s/\./_/"`
extra_line="-T pdhd_${jobsub_id}_${JUSTIN_WORKFLOW_ID}_${now}_decoder.root"
fi
echo "Running reco stage1"
touch reco.log
starttime=`date +"%s"`.0
LD_PRELOAD=$XROOTD_LIB/libXrdPosixPreload.so lar \
-c ${FCL1} \
-n ${nevents} \
${extra_line} ${pfn} #>reco.log 2>&1
larExit=$?
endtime=`date +"%s"`.0
if [ $larExit -ne 0 ]; then
echo "Error in reco1"
cat reco.log
exit $larExit
fi
if [ -n "${SKIPFCL2}" ]; then
echo "$pfn" > justin-processed-pfns.txt
exit 0
fi
output_stage1_file=`ls *stage1.root`
starttime=`date +"%s"`.0
lar -c ${FCL2} \
$output_stage1_file #>reco.log 2>&1
larExit=$?
endtime=`date +"%s"`.0
if [ $larExit -ne 0 ]; then
echo "Error in reco2"
cat reco.log
exit $larExit
fi
output_reco_file=`ls *keepup.root`
output_mr_file=`ls *keepup_hists.root`
new_mr_file=`echo $output_reco_file | sed -e "s/keepup/keepup_hists/"`
mv $output_mr_file $new_mr_file
output_mr_file=$new_mr_file
echo "Output files:"
echo "\tReco: ${output_reco_file}"
echo "\tHists: ${output_mr_file}"
echo "Forming reco metadata"
python -m meta_maker --start_time $starttime --end_time $endtime --file_format "artroot" \
--app_family "dunesw" --app_name "reco" --app_version ${DUNE_VERSION} \
--data_tier "full-reconstructed" --get_events -p "$did" \
--campaign "hd-protodune-reco-keepup-v0" \
--fcl $FCL2 \
--past_fcls $FCL1 --past_apps "reco1" \
-f "${JUSTIN_SCOPE}:$output_reco_file" -j "${output_reco_file}.json"
if [ $? -ne 0 ]; then
echo "Error in reco metadata"
exit 1
fi
echo "Ran successfully"
## TODO -- CHECK
cat ${output_reco_file}.json
echo "Forming hist metadata"
python -m meta_maker --start_time $starttime --end_time $endtime --file_format "root" \
--app_family "dunesw" --app_name "reco" --app_version ${DUNE_VERSION} \
--data_tier "root-tuple-virtual" -p "$did" \
--campaign "hd-protodune-reco-keepup-v0" \
--fcl $FCL2 \
--past_fcls $FCL1 --past_apps "reco1" \
-f "${JUSTIN_SCOPE}:$output_mr_file" -j "${output_mr_file}.json"
#--parent_as_json \
if [ $? -ne 0 ]; then
echo "Error in hist metadata"
exit 1
fi
echo "formed"
cat ${output_mr_file}.json
echo "$pfn" > justin-processed-pfns.txt