justIN           Dashboard       Workflows       Jobs       AWT       Sites       Storages       Docs       Login

Workflow 2339, Stage 1

Priority50
Processors1
Wall seconds80000
RSS bytes4193255424 (3999 MiB)
Max distance for inputs30.0
Enabled input RSEs CERN_PDUNE_EOS, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_LANCASTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MANCHESTER, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled output RSEs CERN_PDUNE_EOS, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_LANCASTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MANCHESTER, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled sites CA_SFU, CA_Victoria, CERN, CH_UNIBE-LHEP, CZ_FZU, ES_CIEMAT, ES_PIC, FR_CCIN2P3, IN_TIFR, IT_CNAF, NL_NIKHEF, NL_SURFsara, UK_Bristol, UK_Brunel, UK_Durham, UK_Edinburgh, UK_Imperial, UK_Lancaster, UK_Liverpool, UK_Manchester, UK_Oxford, UK_QMUL, UK_RAL-Tier1, UK_Sheffield, US_BNL, US_Caltech, US_Colorado, US_FNAL-FermiGrid, US_FNAL-T1, US_Michigan, US_MIT, US_Nebraska, US_NotreDame, US_PuertoRico, US_SU-ITS, US_Swan, US_UChicago, US_UConn-HPC, US_UCSD, US_Wisconsin
Scopeusertests
Events for this stage

Output patterns

 DestinationPatternLifetimeFor next stage
1https://fndcadoor.fnal.gov:2880/dune/scratch/users/calcuttj/justin/keepup_calib_tests_061824_2/02339/1*keepup.root
2https://fndcadoor.fnal.gov:2880/dune/scratch/users/calcuttj/justin/keepup_calib_tests_hists_061824_2/02339/1*hists.root

Environment variables

NameValue
DUNESW_DIR/cvmfs/fifeuser2.opensciencegrid.org/sw/dune/1278f2de98395119b04ecc3e0704f4dc96722ea2
LARRECO_DIR/cvmfs/fifeuser1.opensciencegrid.org/sw/dune/aa06030a4a5fd60378390331c4033b08c6680a0b
METADATA_DIR/cvmfs/fifeuser2.opensciencegrid.org/sw/dune/f03367f34ddfb69d43216cdfec78c499daf5977d

Condor Class Ads

NameValue
HAS_CVMFS_dune_osgstorage_orgTrue

File states

Total filesFindingUnallocatedAllocatedOutputtingProcessedNot foundFailed
133200001282149

Job states

TotalSubmittedStartedProcessingOutputtingFinishedNotusedAbortedStalledJobscript errorOutputting failedNone processed
301100002284140549117389
Files processed00100100200200300300400400500500600600700700800800Jun-18 06:00Jun-18 15:00Jun-19 00:00Jun-19 09:00Jun-19 18:00Jun-20 03:00Jun-20 12:00Jun-20 21:00Jun-21 06:00Jun-21 15:00Jun-22 00:00Jun-22 09:00Jun-22 18:00Jun-23 03:00Jun-23 12:00Jun-23 21:00Jun-24 06:00Jun-24 15:00Jun-25 00:00Jun-25 09:00Files processedBin start timesNumber per binCERNUK_BrunelUK_DurhamUK_ImperialUK_SheffieldNL_SURFsaraFR_CCIN2P3NL_NIKHEFUK_ManchesterIT_CNAFUK_LancasterUK_QMULUK_LiverpoolES_PIC
Replicas per RSE1331490.025244.51331269.975244.50000000000003Replicas per RSEDUNE_CERN_EOS (50%)FNAL_DCACHE (50%)

RSEs used

NameInputsOutputs
DUNE_CERN_EOS19040

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

File reset events, by site

SiteAllocatedOutputting
CERN951
NL_SURFsara920
NL_NIKHEF731
IT_CNAF400
FR_CCIN2P3331
UK_Manchester280
UK_Sheffield190
UK_Imperial130
UK_RAL-Tier1932
UK_Brunel80
UK_Edinburgh70
UK_QMUL40
ES_PIC30
UK_Liverpool01

Jobscript

#!/bin/bash
#

source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup metacat
export METACAT_SERVER_URL=https://metacat.fnal.gov:9443/dune_meta_prod/app
export METACAT_AUTH_SERVER_URL=https://metacat.fnal.gov:8143/auth/dune

if [ -n "${DUNESW_DIR}" ]; then
  stat ${DUNESW_DIR}
  if [ $? -ne 0 ]; then
    echo "failed to stat dunesw dir"
  fi

  export PRODUCTS=$DUNESW_DIR:$PRODUCTS
fi

if [ -n "${LARRECO_DIR}" ]; then
  stat ${LARRECO_DIR}
  if [ $? -ne 0 ]; then
    echo "failed to stat larreco dir"
  fi

  export PRODUCTS=$LARRECO_DIR:$PRODUCTS
fi


echo "PRODUCTS $PRODUCTS"

#Setup recent lar software suite
DUNE_VERSION=${DUNE_VERSION:-v09_90_02d00}
setup dunesw \
   "${DUNE_VERSION}" \
   -q "${DUNE_QUALIFIER:-e26:prof}"

if [ $? -ne 0 ]; then
  echo "Failed to setup dunesw $DUNE_VERSION $DUNE_QUALIFIER"
  exit 1
fi

if [ -n "${USE_INPUT_FCL}" ]; then
  
  if [ -z ${INPUT_DIR} ]; then
    echo "Error, INPUT_DIR is undefined but user requested USE_INPUT_FCL"
    exit 1
  fi

  stat ${INPUT_DIR}
  if [ $? -ne 0 ]; then
    echo "Failed to stat input dir. Exiting safely"
    exit 0
  fi

  FHICL_FILE_PATH=${INPUT_DIR}:${FHICL_FILE_PATH}
  echo "FCL PATH: $FHICL_FILE_PATH"
fi

if [ -n "${METADATA_DIR}" ]; then
  stat ${METADATA_DIR}
  if [ $? -ne 0 ]; then
    echo "failed to stat metadata dir"
  fi

  echo "metadata dir contents:"
  ls $METADATA_DIR
  PYTHONPATH=${METADATA_DIR}:$PYTHONPATH
fi


FCL=${FCL:-"standard_reco_calibration_protodunehd_keepup.fcl"}
echo "FCL dump:"
fhicl-dump ${FCL}

echo "DUNESW loc:"
ups active | grep dunesw

if [ -z ${JUSTIN_PROCESSORS} ]; then
  JUSTIN_PROCESSORS=1
fi

echo "Justin processors: ${JUSTIN_PROCESSORS}"

export TF_NUM_THREADS=${JUSTIN_PROCESSORS}   
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS} 
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS} 
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS} 
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS} 
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}  

echo "printing env"
env

echo "Will use justin-get-file"
#
DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
##Check that any file was returned
if [ "${DID_PFN_RSE}" == "" ] ; then
  echo "Could not get file"
  exit 0
fi

pfn=`echo ${DID_PFN_RSE} | cut -f2 -d' '`
did=`echo ${DID_PFN_RSE} | cut -f1 -d' '`
echo "pfn: ${pfn}"
echo "did: ${did}"
now=$(date -u +"%Y%m%dT%H%M%SZ")

nevents=${NEVENTS:--1}

echo "Running reco"
touch reco.log
starttime=`date +"%s"`.0
LD_PRELOAD=$XROOTD_LIB/libXrdPosixPreload.so lar \
    -c ${FCL} \
    -n ${nevents} \
    ${pfn} #>reco.log 2>&1
larExit=$?
endtime=`date +"%s"`.0

if [ $larExit -ne 0 ]; then
  echo "Error in reco"
  cat reco.log
  exit $larExit
fi

output_reco_file=`ls *keepup.root`
output_mr_file=`ls *keepup_hists.root`

echo "Output files:"
echo "\tReco: ${output_reco_file}"
echo "\tHists: ${output_mr_file}"

echo "Forming reco metadata"
python -m meta_maker --start_time $starttime --end_time $endtime --file_format "artroot" \
                     --app_family "dunesw" --app_name "reco" --app_version ${DUNE_VERSION} \
                     --data_tier "full-reconstructed" --get_events -p "$did" \
                     --fcl $FCL \
                     -f "${JUSTIN_SCOPE}:$output_reco_file" -j "${output_reco_file}.json"
echo "Ran successfully"
## TODO -- CHECK
cat ${output_reco_file}.json

echo "Forming hist metadata"
python -m meta_maker --start_time $starttime --end_time $endtime --file_format "root" \
                     --app_family "dunesw" --app_name "reco" --app_version ${DUNE_VERSION} \
                     --data_tier "root-tuple" -p $output_reco_file.json \
                     --parent_as_json \
                     --fcl $FCL \
                     -f "${JUSTIN_SCOPE}:$output_mr_file" -j "${output_mr_file}.json"
echo "formed"
cat ${output_mr_file}.json

####TEMPORARY FOR HIT CHECKER RUNNING
#output_reco_file=`ls *keepup.root`
#echo "Reco? ${output_reco_file}"
#output_mr_file=`echo ${output_reco_file}  | sed -e "s/keepup/keepup_hists/"`
#echo lar -c run_pdhd_hit_checker.fcl \
#     -T ${output_mr_file} \
#     ${output_reco_file}
#lar -c run_pdhd_hit_checker.fcl \
#     -T ${output_mr_file} \
#     ${output_reco_file}
#larExit=$?
#
#if [ $larExit -ne 0 ]; then
#  echo "Error in hit check"
#  exit $larExit
#fi
#



echo "$pfn" > justin-processed-pfns.txt
justIN time: 2024-09-29 11:15:16 UTC       justIN version: 01.01.08