justIN           Dashboard       Workflows       Jobs       AWT       Sites       Storages       Docs       Login

Workflow 1616, Stage 1

Priority50
Processors1
Wall seconds80000
RSS bytes2621440000 (2500 MiB)
Max distance for inputs30.0
Enabled input RSEs
Enabled output RSEs
Enabled sites
Scopefardet-vd
Events for this stage

Output patterns

 DestinationPatternLifetimeFor next stage
0Rucio fardet-vd:fardet-vd-reco2_ritm1780305_nu_numu2nue_nue2nutau_fhc_skip10000_limit5000_1616*reco2.root2592000False
0Rucio fardet-vd:fardet-vd-reco2ana_ritm1780305_nu_numu2nue_nue2nutau_fhc_skip10000_limit5000_1616*reco2_ana.root2592000False
0Rucio fardet-vd:fardet-vd-pandora_ritm1780305_nu_numu2nue_nue2nutau_fhc_skip10000_limit5000_1616*.pndr2592000False
0Rucio fardet-vd:fardet-vd-validation_ritm1780305_nu_numu2nue_nue2nutau_fhc_skip10000_limit5000_1616*Validation.root2592000False

Environment variables

NameValue
CONVERT_DIR/cvmfs/fifeuser2.opensciencegrid.org/sw/dune/075c0e56c49d2ffca32bc443c9c2fcc3efb66d49
DETPRODVD
HCPRODFHC

File states

Total filesFindingUnallocatedAllocatedOutputtingProcessedNot foundFailed
5000000044280572

Job states

TotalSubmittedStartedProcessingOutputtingFinishedNotusedAbortedStalledJobscript errorOutputting failedNone processed
155380000522802645443127325020
Files processed0020020040040060060080080010001000120012001400140016001600180018002000200022002200240024002600260028002800Feb-21 10:00Feb-21 14:00Feb-21 18:00Feb-21 22:00Feb-22 02:00Feb-22 06:00Feb-22 10:00Feb-22 14:00Feb-22 18:00Feb-22 22:00Feb-23 02:00Feb-23 06:00Feb-23 10:00Feb-23 14:00Feb-23 18:00Feb-23 22:00Feb-24 02:00Feb-24 06:00Feb-24 10:00Files processedBin start timesNumber per binUK_RAL-PPDUS_UChicagoUK_SheffieldCZ_FZUUK_RAL-Tier1US_FNAL-T1US_FNAL-FermiG…US_FNAL-FermiGridUS_BNLUS_SU-ITSCERNCA_VictoriaFR_CCIN2P3US_UConn-HPCCH_UNIBE-LHEPUK_LiverpoolUK_QMULCA_SFUES_CIEMATUK_ManchesterUS_NotreDameIT_CNAFUS_WisconsinUK_OxfordUK_LancasterNL_NIKHEFUK_BristolUS_ColoradoUK_BrunelES_PICUK_ImperialUS_PuertoRicoUK_Edinburgh
Replicas per RSE5000490.025244.55000269.975244.50000000000003Replicas per RSEDUNE_FR_CCIN2P3_DISK (50%)DUNE_US_FNAL_DISK_STAGE (50%)

RSEs used

NameInputsOutputs
DUNE_FR_CCIN2P3_DISK729382
DUNE_US_FNAL_DISK_STAGE51515502
RAL_ECHO03519
DUNE_US_BNL_SDCC02214
RAL-PP01970
NIKHEF01859
PRAGUE01375
DUNE_CERN_EOS01331
DUNE_ES_PIC0306
SURFSARA0138
DUNE_UK_LANCASTER_CEPH031

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

File reset events, by site

SiteAllocatedOutputting
US_FNAL-FermiGrid187842
NL_NIKHEF10559
US_BNL105138
UK_RAL-Tier1104981
US_UChicago101346
US_SU-ITS9493
CZ_FZU92152
UK_Manchester89358
CERN55131
US_Colorado3965
US_NotreDame3832
UK_Imperial3463
UK_RAL-PPD33145
US_Wisconsin2469
UK_QMUL2477
IT_CNAF1368
CA_Victoria1129
CH_UNIBE-LHEP928
UK_Oxford930
ES_CIEMAT925
UK_Lancaster85
UK_Brunel723
UK_Sheffield617
US_PuertoRico64
US_FNAL-T1540
ES_PIC535
UK_Bristol428
US_UConn-HPC314
UK_Liverpool114
CA_SFU112
UK_Edinburgh119
NL_SURFsara055
FR_CCIN2P3010
US_UCSD03

Jobscript

#!/bin/bash
#
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup metacat
export METACAT_SERVER_URL=https://metacat.fnal.gov:9443/dune_meta_prod/app
export METACAT_AUTH_SERVER_URL=https://metacat.fnal.gov:8143/auth/dune


##TODO -- write usage



DETECTOR="${DETPROD:-HD}"
HC="${HCPROD:-FHC}"

## Build up the fcls to use
if [ "${DETECTOR}" = "HD" ]; then
  ANAFCL="standard_ana_dune10kt_1x2x6.fcl"
  MDDETTYPE="fardet-hd"

  if [ "$HC" = "FHC" ]; then
    RECOFCL="standard_reco2_dune10kt_nu_1x2x6.fcl"
  elif [ "$HC" = "RHC" ]; then
    RECOFCL="standard_reco2_dune10kt_anu_1x2x6.fcl"
  fi

elif [ "${DETECTOR}" = "VD" ]; then
  ANAFCL="anatree_dunevd10kt_1x8x6_3view_30deg_geov3.fcl"
  MDDETTYPE="fardet-vd"

  if [ "$HC" = "FHC" ]; then
    RECOFCL="reco2_dunevd10kt_nu_1x8x6_3view_30deg_geov3.fcl"
  elif [ "$HC" = "RHC" ]; then
    RECOFCL="reco2_dunevd10kt_anu_1x8x6_3view_30deg_geov3.fcl"
  fi

elif [ "${DETECTOR}" = "TEST" ]; then
  RECOFCL="${INPUT_TAR_DIR_LOCAL}/test.fcl"

elif [ "${DETECTOR}" = "TEST2" ]; then
  RECOFCL="${INPUT_TAR_DIR_LOCAL}/test2.fcl"

else
  echo "NEED TO REQUEST EITHER HD OR VD. USER REQUESTED ${DETECTOR}"
  exit 1
fi

echo "Using Reco fcl: ${RECOFCL}"
echo "and Ana fcl: ${ANAFCL}"


#Setup recent lar software suite
setup dunesw \
   "${DUNE_VERSION:-v09_81_00d02}" \
   -q "${DUNE_QUALIFIER:-e26:prof}"
echo "printing env"
env


##Force certain env vars to force good behavior
#export TF_NUM_THREADS=$JUSTIN_PROCESSORS    
#export OPENBLAS_NUM_THREADS=$JUSTIN_PROCESSORS  
#export JULIA_NUM_THREADS=$JUSTIN_PROCESSORS  
#export MKL_NUM_THREADS=$JUSTIN_PROCESSORS  
#export NUMEXPR_NUM_THREADS=$JUSTIN_PROCESSORS  
#export OMP_NUM_THREADS=$JUSTIN_PROCESSORS   

if [ -z ${JUSTIN_PROCESSORS} ]; then
  JUSTIN_PROCESSORS=1
fi

echo "Justin processors: ${JUSTIN_PROCESSORS}"

export TF_NUM_THREADS=${JUSTIN_PROCESSORS}   
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS} 
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS} 
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS} 
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS} 
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}  

#
echo "Will use justin-get-file"
#
DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
#getfileExit=$?

#if [ $getfileExit -ne 0 ] ; then
#  # Success !
#  # Error -- exit immediately 
#  jobscriptExit=1
#  echo "Error in justin-get-file"
#  exit $jobscriptExit
#fi

##Check that any file was returned
if [ "${DID_PFN_RSE}" == "" ] ; then
  echo "Could not get file"
  exit 0
fi

FILE=`echo ${DID_PFN_RSE} | cut -f2 -d' '`
DID=`echo ${DID_PFN_RSE} | cut -f1 -d' '`
echo "DID: ${DID}"

metacat file show -mj ${DID} > old_md.json
mcExit=$?
if [ $mcExit -eq 0 ] ; then
  echo "old metadata:"
  cat old_md.json
else
  echo "Could not retrieve old metadata"
  exit 1 
fi

#
#
now=$(date -u +"%Y%m%dT%H%M%SZ")
#OUTFILE="${OUTPREFIX:-fd_mc_prod_test}.${JUSTIN_REQUEST_ID}"
#OUTFILE="$OUTFILE.$JUSTIN_JOBSUB_ID.${now}"
#OUTFILE=`echo $OUTFILE | sed -e 's/@/./'`
#
####Run reco2
lar -c "${RECOFCL}" \
    -n "${NEVENTS:--1}" \
    "$FILE" >reco2.log 2>&1
    #-o "$OUTFILE.root" \
    #-n "${NEVENTS:--1}" \
    #"$FILE" >$OUTFILE.log 2>&1

larExit=$?
echo "Reco step lar exit code $larExit"

if [ $larExit -eq 0 ] ; then
  # Success !
  # Log the file for justin and move on to the next step
  #echo "$FILE" > justin-processed-pfns.txt
  echo "Moving on to analysis "
else
  # Error -- exit immediately 
  #jobscriptExit=1
  tail -100 reco2.log
  exit $larExit 
fi

if [ "${DETECTOR}" = "TEST" ] || [ "${DETECTOR}" = "TEST2" ]; then
  echo "Done testing. Exiting"
  exit 0
fi
ORIG_OUTFILE=`ls *reco2.root`
OUTFILE=`echo "${ORIG_OUTFILE}_${now}_reco2" | sed -e 's/reco2.root//'`

mv ${ORIG_OUTFILE} ${OUTFILE}.root

### Get the metadata
##TODO -- convert this is the right file extensions
extractor_prod.py --infile ${OUTFILE}.root --campaign ${CAMPAIGN:-fd_mc_2023a_reco2} \
                  --requestid ritm1780305 --no_crc > ${OUTFILE}.root.json
extractorExit=$?
if [ $extractorExit -eq 0 ] ; then
  # Success !
  echo "Extracted metadata"
else
  # Error -- exit immediately 
  jobscriptExit=1
  echo "Failed to extract md"
  exit $extractorExit
fi

### Convert the metadata to metacat
##TODO -- make sure the RCDS behavior is correct
python $CONVERT_DIR/convert_metadata.py -i ${OUTFILE}.root.json \
                                                -c ${RECOFCL} -j old_md.json \
                                                --app "art.reco2" \
                                                --app_ver "${DUNE_VERSION:-v09_81_00d02}" \
                                                --det "${MDDETTYPE}" \
                                                --parent ${DID}
converterExit=$?
if [ $converterExit -eq 0 ] ; then
  # Success !
  echo "Converted metadata"
else
  # Error -- exit immediately 
  jobscriptExit=1
  echo "Failed to convert md"
  exit $converterExit
fi

##If running VD, there will also be pandora files. Rename these
if [ "${DETECTOR}" = "VD" ]; then
  all_good=true
  mv Validation.root "${OUTFILE}_Validation.root" || all_good=false
  mv Pandora_Events.pndr "${OUTFILE}_Pandora_Events.pndr" || all_good=false

  ##Copy over the metadata for pndr
  ##Edit the fields corresepondingly
  sed ${OUTFILE}.root.json -e 's/full-reconstructed/pandora-info/' > ${OUTFILE}_Pandora_Events.pndr.json || all_good=false
  sed -i ${OUTFILE}_Pandora_Events.pndr.json -e 's/artroot/binary/' || all_good=false
  sed -i ${OUTFILE}_Pandora_Events.pndr.json -e '/art.file_format/d' || all_good=false
  
  ##Copy over the metadata for validation.root 
  ##Edit the fields corresepondingly
  sed ${OUTFILE}.root.json -e 's/full-reconstructed/pandora-info/' > ${OUTFILE}_Validation.root.json || all_good=false
  sed -i ${OUTFILE}_Validation.root.json -e 's/artroot/root/' || all_good=false
  sed -i ${OUTFILE}_Validation.root.json -e '/art.file_format/d' || all_good=false

  if [ $all_good = false ]; then
    echo "Something failed when editing pandora metadata"
    jobscriptExit=1
    exit $jobscriptExit
  fi

fi

ANAOUTFILE="${OUTFILE}_ana"
echo "Will output ana to $ANAOUTFILE"

###Run ana 
lar -c "${ANAFCL}" \
    -n "${NEVENTS:--1}" \
    -T "$ANAOUTFILE.root" \
    "$OUTFILE.root" >ana.log 2>&1


larExit=$?
echo "Ana step lar exit code $larExit"

### TODO: Handle this correctly
if [ $larExit -eq 0 ] ; then
  # Success !
  # Log the file for justin and move on to the next step
  echo "$FILE" > justin-processed-pfns.txt
else
  # Error -- exit immediately 
  jobscriptExit=1
  tail -100 ana.log
  exit $larExit
fi

sed_good=true
sed ${OUTFILE}.root.json -e 's/full-reconstructed/root-tuple-virtual/' > ${ANAOUTFILE}.root.json || sed_good=false
sed -i ${ANAOUTFILE}.root.json -e 's/artroot/root/' || sed_good=false
sed -i ${ANAOUTFILE}.root.json -e '/art.file_format/d' || sed_good=false
sed -i ${ANAOUTFILE}.root.json -e '/art.process/d' || sed_good=false
sed -i ${ANAOUTFILE}.root.json -e "s/${RECOFCL}/${ANAFCL}/" || sed_good=false
sed -i ${ANAOUTFILE}.root.json -e '/campaign/!s/reco2/anatree/' || sed_good=false
sed -i ${ANAOUTFILE}.root.json -e "s/${DID}/${OUTFILE}.root/" || sed_good=false

if [ $sed_good = false ]; then
  echo "Something failed when editing ana metadata"
  jobscriptExit=1
  exit $jobscriptExit
fi

ls
exit 0
justIN time: 2024-09-29 11:14:19 UTC       justIN version: 01.01.08