justIN           Dashboard       Workflows       Jobs       AWT       Sites       Storages       Docs       Login

Workflow 2174, Stage 1

Priority50
Processors1
Wall seconds80000
RSS bytes5767168000 (5500 MiB)
Max distance for inputs30.0
Enabled input RSEs
Enabled output RSEs
Enabled sites
Scopeusertests
Events for this stage

Output patterns

 DestinationPatternLifetimeFor next stage
0Rucio usertests:pdhd_1gev_rerun_test_sce_plus_reco_skip0_2174*reco.root604800False
0Rucio usertests:pdhd_1gev_rerun_test_sce_plus_pdspana_skip0_2174*pdspana.root604800False

Environment variables

NameValue
DUNESIM_TAR/cvmfs/fifeuser1.opensciencegrid.org/sw/dune/5aa1c030e818b0176375b02e7ca9d1d1c99f27fa
FCL_DIR/cvmfs/fifeuser2.opensciencegrid.org/sw/dune/120c67294d536d3c4cc0fbab7129d1deeffd2a82
INPUT_DIR/cvmfs/fifeuser2.opensciencegrid.org/sw/dune/99ff67fc99d00f6015d396607593e600a84e7b63
TYPEplus

File states

Total filesFindingUnallocatedAllocatedOutputtingProcessedNot foundFailed
77100000771000

Job states

TotalSubmittedStartedProcessingOutputtingFinishedNotusedAbortedStalledJobscript errorOutputting failedNone processed
117650000917702388200000
Files processed0010001000200020003000300040004000May-16 16:00May-16 20:00May-17 00:00May-17 04:00May-17 08:00May-17 12:00May-17 16:00May-17 20:00May-18 00:00May-18 04:00May-18 08:00May-18 12:00May-18 16:00May-18 20:00May-19 00:00May-19 04:00May-19 08:00May-19 12:00May-19 16:00May-19 20:00May-20 00:00Files processedBin start timesNumber per binUK_BrunelIT_CNAFCERNES_PICUK_SheffieldNL_SURFsaraUK_LancasterNL_NIKHEFUK_RAL-Tier1UK_QMULUK_EdinburghUK_ManchesterUK_ImperialCZ_FZUUK_LiverpoolUK_OxfordUK_Durham
Replicas per RSE7710380.00057375369.7499999985656Replicas per RSEMANCHESTER (100%)

RSEs used

NameInputsOutputs
MANCHESTER101060
SURFSARA06318
RAL_ECHO02712
QMUL01930
DUNE_CERN_EOS01842
NIKHEF01310
RAL-PP0940
PRAGUE0156
DUNE_ES_PIC0152
DUNE_FR_CCIN2P3_DISK060

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

File reset events, by site

SiteAllocatedOutputting
NL_SURFsara9141
UK_RAL-Tier13550
CERN2250
UK_Manchester1781
UK_QMUL1590
NL_NIKHEF1551
IT_CNAF1500
UK_Brunel841
UK_Sheffield290
UK_Edinburgh260
UK_Oxford240
ES_PIC220
UK_Liverpool200
UK_Lancaster180
CZ_FZU130
UK_Imperial110
UK_Durham90

Jobscript

#!/bin/bash
#

check_tar() {
  stat ${1}
  if [ $? -ne 0 ]; then
    echo "Failed to stat ${1}. Exiting safely"
    exit 0
  fi
}


#These must be defined
if [ -z $FCL_DIR ]; then
  echo "Fatal Must provide FCL_DIR env var"
  exit 1
fi

if [ -z $DUNESIM_TAR ]; then
  echo "Fatal Must provide DUNESIM_TAR env var"
  exit 1
fi

if [ -z $INPUT_DIR ]; then
  echo "Fatal Must provide INPUT_DIR env var"
  exit 1
fi

check_tar ${FCL_DIR}
check_tar ${DUNESIM_TAR}
check_tar ${INPUT_DIR}

#stat ${FCL_DIR}
#if [ $? -ne 0 ]; then
#  echo "Failed to stat $FCL_DIR. Exiting safely"
#  exit 0
#fi
#
#stat ${DUNESIM_TAR}
#if [ $? -ne 0 ]; then
#  echo "Failed to stat $DUNESIM_TAR. Exiting safely"
#  exit 0
#fi

TYPE=${TYPE:-"nominal"}
if [ $TYPE == "nominal" ]; then
g4_stage2_fcl=${FCL_DIR}/filtered_g4_stage2.fcl
reco_fcl=${FCL_DIR}/filtered_reco.fcl
base_reco_name=filtered_reco.fcl
base_g4_name=filtered_g4_stage2.fcl
elif [ $TYPE == "plus" ]; then
g4_stage2_fcl=${FCL_DIR}/plus_sigma_sce_g4_stage2.fcl
reco_fcl=${FCL_DIR}/plus_sigma_sce_reco.fcl
base_reco_name=plus_sigma_sce_reco.fcl
base_g4_name=plus_sigma_sce_g4_stage2.fcl
elif [ $TYPE == "minus" ]; then
g4_stage2_fcl=${FCL_DIR}/minus_sigma_sce_g4_stage2.fcl
reco_fcl=${FCL_DIR}/minus_sigma_sce_reco.fcl
base_reco_name=minus_sigma_sce_reco.fcl
base_reco_name=minus_sigma_sce_reco.fcl
base_g4_name=minus_sigma_sce_g4_stage2.fcl
fi

echo "$TYPE sce"
echo "g4_stage2: $g4_stage2_fcl"
echo "reco: $reco_fcl"

source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup metacat
export METACAT_SERVER_URL=https://metacat.fnal.gov:9443/dune_meta_prod/app
export METACAT_AUTH_SERVER_URL=https://metacat.fnal.gov:8143/auth/dune

export PRODUCTS=$DUNESIM_TAR:$PRODUCTS

#Setup recent lar software suite
#Set up local
DUNE_VERSION=${DUNE_VERSION:-v09_85_00d00}
setup dunesw \
   "${DUNE_VERSION}" \
   -q "${DUNE_QUALIFIER:-e26:prof}"

setup_exit=$?
if [ $? -ne 0 ]; then
  echo "Failed to setup dunesw $DUNE_VERSION $DUNE_QUALIFIER"
  exit $setup_exit
fi


echo "DUNESW loc:"
ups active | grep dunesw

if [ -z ${JUSTIN_PROCESSORS} ]; then
  JUSTIN_PROCESSORS=1
fi

echo "Justin processors: ${JUSTIN_PROCESSORS}"

export TF_NUM_THREADS=${JUSTIN_PROCESSORS}   
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS} 
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS} 
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS} 
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS} 
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}  

# Set fhicl file path to include the RCDS fcls
export FHICL_FILE_PATH=${FCL_DIR}:${FHICL_FILE_PATH}

echo "printing env"
env

echo "Justin specific env vars"
env | grep JUSTIN

echo "Will use justin-get-file"
#

DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
##Check that any file was returned
if [ "${DID_PFN_RSE}" == "" ] ; then
  echo "Could not get file"
  exit 0
fi


pfn=`echo ${DID_PFN_RSE} | cut -f2 -d' '`
did=`echo ${DID_PFN_RSE} | cut -f1 -d' '`
now=$(date -u +"%Y%m%dT%H%M%SZ")

##TODO -- edit this
jobid=`echo "${JUSTIN_JOBSUB_ID:-1}" | awk -F '.' '{print $1}'`
G4Stage1File="PDSP_RerunG4_${jobid}_${JUSTIN_STAGE_ID}_${JUSTIN_WORKFLOW_ID}_${now}.root"


nevents=${NEVENTS:--1}

##Only doing ntuple on input
if [ -n "$ANAONLY" ]; then
  AnaFile=`echo ${did} | cut -f2 -d':' | sed -e "s/.root/_pdspana.root/"`
  echo "Running pdspana only. Output: $AnaFile"
  recostart=`date +"%s"`.0
  lar -c ${FCL_DIR}/pduneana_filtered.fcl \
      -T ${AnaFile} \
       -n ${nevents} \
      ${pfn} >ana.log 2>&1
  larExit=$?
  recoend=`date +"%s"`.0
  
  if [ $larExit -ne 0 ]; then
    echo "Error in ana production"
    cat ana.log
    exit $larExit
  fi
  echo "Ran successfully"

  echo "Forming ana metadata"
  
  echo "${did}" > input_dids.list
  
  ### Get the metadata
  python ${INPUT_DIR}/ntuple_prod_utils.py \
    metadata \
    --root_file ${AnaFile} \
    --dids input_dids.list \
    --version ${DUNE_VERSION} \
    --fcl_name pduneana_filtered.fcl \
    --log_file ana.log \
    -o $AnaFile.json
  mdExit=$?
  if [ $mdExit -ne 0 ]; then
    echo "Error in ntuple metdata production"
    exit $mdExit
  fi
  
  echo "formed"
  cat ${AnaFile}.json




  echo "$pfn" > justin-processed-pfns.txt
  exit 0
fi



### FILTERED G4 STAGE 1 ###
echo "Running rerun g4"
#touch g4.log
starttime=`date +"%s"`.0
lar -c ${FCL_DIR}/rerun_g4_stage1.fcl \
    -n ${nevents} \
    -o ${G4Stage1File} \
    ${pfn} >g4.log 2>&1
larExit=$?
endtime=`date +"%s"`.0

if [ $larExit -ne 0 ]; then
  echo "Error in stage 1 production"
  cat g4.log
  exit $larExit
fi
echo "Ran successfully"
##########################

#------------ Generic Path -------------------#
G4Stage2File=`echo ${G4Stage1File} | sed -e "s/.root/_${TYPE}_g4_stage2.root/"`
echo "Running ${TYPE} G4 Stage 2"
lar -c $g4_stage2_fcl \
    -o ${G4Stage2File} \
    ${G4Stage1File} >g4_s2.log 2>&1
larExit=$?

if [ $larExit -ne 0 ]; then
  echo "Error in stage 2 production"
  cat g4_s2.log
  exit $larExit
fi
echo "Ran successfully"

DetsimFile=`echo ${G4Stage2File} | sed -e "s/.root/_detsim.root/"`
echo "Running Detsim"
lar -c ${FCL_DIR}/filtered_detsim.fcl \
    -o ${DetsimFile} \
    ${G4Stage2File} >detsim.log 2>&1
larExit=$?

if [ $larExit -ne 0 ]; then
  echo "Error in detsim production"
  cat detsim.log
  exit $larExit
fi
echo "Ran successfully"

RecoFile=`echo ${DetsimFile} | sed -e "s/.root/_reco.root/"`
echo "Running ${TYPE} SCE Reco"
lar -c $reco_fcl \
    -o ${RecoFile} \
    ${DetsimFile} >reco.log 2>&1
larExit=$?

if [ $larExit -ne 0 ]; then
  echo "Error in reco production"
  cat reco.log
  exit $larExit
fi
echo "Ran successfully"

AnaFile=`echo ${RecoFile} | sed -e "s/.root/_pdspana.root/"`
echo "Running sigma pdspana"
recostart=`date +"%s"`.0
lar -c ${FCL_DIR}/pduneana_filtered.fcl \
    -T ${AnaFile} \
    ${RecoFile} >ana.log 2>&1
larExit=$?
recoend=`date +"%s"`.0

if [ $larExit -ne 0 ]; then
  echo "Error in sigma ana production"
  cat ana.log
  exit $larExit
fi
echo "Ran successfully"

# Make metadata
overrides="core.data_tier=full-reconstructed \
 core.application.version=${DUNE_VERSION} \
 dune.config_file=${base_reco_name} \
 core.start_time=${recostart} \
 core.end_time=${recoend} \
 core.application.name=reco \
 core.application=art.reco \
 dune_mc.space_charge=yes \

"

namespace=${JUSTIN_SCOPE:-"usertests"}

echo "Forming reco metadata"
#-- ${filenum} \
python ${INPUT_DIR}/pdhd_meta_writer.py \
       --json ${INPUT_DIR}/pdhd_base_meta.json \
       --overrides ${overrides} \
       --nevents ${nevents} \
       --jobid ${JUSTIN_JOBSUB_ID} \
       --parent ${did} \
       --past_fcls rerun_g4_stage1.fcl \
                   $base_g4_name \
                   filtered_detsim.fcl \
       --past_apps g4_stage1 g4_stage2 detsim \
       --inherit_run \
       --exclude "dune.requestid" \
       -o ${RecoFile}.json


if [ $? -ne 0 ]; then
  echo "Error writing reco json"
  exit 1
fi

cat ${RecoFile}.json
echo "formed"

echo "Forming ana metadata"

echo "${namespace}:$RecoFile" > input_dids.list

### Get the metadata
python ${INPUT_DIR}/ntuple_prod_utils.py \
  metadata \
  --root_file ${AnaFile} \
  --dids input_dids.list \
  --version ${DUNE_VERSION} \
  --fcl_name pduneana_filtered.fcl \
  --log_file ana.log \
  --parent_metas ${RecoFile}.json \
  -o $AnaFile.json
mdExit=$?
if [ $mdExit -ne 0 ]; then
  echo "Error in ntuple metdata production"
  exit $mdExit
fi

echo "formed"
cat ${AnaFile}.json


###############################################
echo "$pfn" > justin-processed-pfns.txt
justIN time: 2024-11-17 03:17:18 UTC       justIN version: 01.01.09