justIN           Dashboard       Workflows       Jobs       AWT       Sites       Storages       Docs       Login

Workflow 2175, Stage 1

Priority50
Processors1
Wall seconds80000
RSS bytes5767168000 (5500 MiB)
Max distance for inputs30.0
Enabled input RSEs
Enabled output RSEs
Enabled sites
Scopeusertests
Events for this stage

Output patterns

 DestinationPatternLifetimeFor next stage
0Rucio usertests:pdhd_1gev_rerun_test_sce_plus_reco_skip0_2175*reco.root604800False
0Rucio usertests:pdhd_1gev_rerun_test_sce_plus_pdspana_skip0_2175*pdspana.root604800False

Environment variables

NameValue
DUNESIM_TAR/cvmfs/fifeuser1.opensciencegrid.org/sw/dune/5aa1c030e818b0176375b02e7ca9d1d1c99f27fa
FCL_DIR/cvmfs/fifeuser2.opensciencegrid.org/sw/dune/120c67294d536d3c4cc0fbab7129d1deeffd2a82
INPUT_DIR/cvmfs/fifeuser2.opensciencegrid.org/sw/dune/99ff67fc99d00f6015d396607593e600a84e7b63
TYPEplus

File states

Total filesFindingUnallocatedAllocatedOutputtingProcessedNot foundFailed
74140000741301

Job states

TotalSubmittedStartedProcessingOutputtingFinishedNotusedAbortedStalledJobscript errorOutputting failedNone processed
1133200008855024688100
Files processed00400400800800120012001600160020002000240024002800280032003200May-16 16:00May-16 19:00May-16 22:00May-17 01:00May-17 04:00May-17 07:00May-17 10:00May-17 13:00May-17 16:00May-17 19:00May-17 22:00May-18 01:00May-18 04:00May-18 07:00May-18 10:00May-18 13:00May-18 16:00Files processedBin start timesNumber per binUK_BrunelCERNES_PICUK_SheffieldCZ_FZUNL_SURFsaraUK_LancasterNL_NIKHEFUK_QMULUK_ManchesterIT_CNAFUK_ImperialUK_OxfordUK_RAL-Tier1UK_EdinburghUK_LiverpoolUK_Durham
Replicas per RSE7414380.00057375369.7499999985656Replicas per RSEDUNE_UK_LANCASTER_CEPH (100%)

RSEs used

NameInputsOutputs
DUNE_UK_LANCASTER_CEPH98840
SURFSARA06236
RAL_ECHO02274
DUNE_CERN_EOS02032
QMUL01746
NIKHEF01298
RAL-PP0826
PRAGUE0194
DUNE_ES_PIC0160
DUNE_FR_CCIN2P3_DISK060

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

File reset events, by site

SiteAllocatedOutputting
NL_SURFsara8900
UK_RAL-Tier13500
CERN3280
NL_NIKHEF1640
IT_CNAF1541
UK_QMUL1520
UK_Manchester1490
UK_Brunel1010
UK_Edinburgh380
CZ_FZU260
UK_Sheffield230
ES_PIC190
UK_Oxford190
UK_Imperial160
UK_Durham150
UK_Lancaster140
UK_Liverpool100

Jobscript

#!/bin/bash
#

check_tar() {
  stat ${1}
  if [ $? -ne 0 ]; then
    echo "Failed to stat ${1}. Exiting safely"
    exit 0
  fi
}


#These must be defined
if [ -z $FCL_DIR ]; then
  echo "Fatal Must provide FCL_DIR env var"
  exit 1
fi

if [ -z $DUNESIM_TAR ]; then
  echo "Fatal Must provide DUNESIM_TAR env var"
  exit 1
fi

if [ -z $INPUT_DIR ]; then
  echo "Fatal Must provide INPUT_DIR env var"
  exit 1
fi

check_tar ${FCL_DIR}
check_tar ${DUNESIM_TAR}
check_tar ${INPUT_DIR}

#stat ${FCL_DIR}
#if [ $? -ne 0 ]; then
#  echo "Failed to stat $FCL_DIR. Exiting safely"
#  exit 0
#fi
#
#stat ${DUNESIM_TAR}
#if [ $? -ne 0 ]; then
#  echo "Failed to stat $DUNESIM_TAR. Exiting safely"
#  exit 0
#fi

TYPE=${TYPE:-"nominal"}
if [ $TYPE == "nominal" ]; then
g4_stage2_fcl=${FCL_DIR}/filtered_g4_stage2.fcl
reco_fcl=${FCL_DIR}/filtered_reco.fcl
base_reco_name=filtered_reco.fcl
base_g4_name=filtered_g4_stage2.fcl
elif [ $TYPE == "plus" ]; then
g4_stage2_fcl=${FCL_DIR}/plus_sigma_sce_g4_stage2.fcl
reco_fcl=${FCL_DIR}/plus_sigma_sce_reco.fcl
base_reco_name=plus_sigma_sce_reco.fcl
base_g4_name=plus_sigma_sce_g4_stage2.fcl
elif [ $TYPE == "minus" ]; then
g4_stage2_fcl=${FCL_DIR}/minus_sigma_sce_g4_stage2.fcl
reco_fcl=${FCL_DIR}/minus_sigma_sce_reco.fcl
base_reco_name=minus_sigma_sce_reco.fcl
base_reco_name=minus_sigma_sce_reco.fcl
base_g4_name=minus_sigma_sce_g4_stage2.fcl
fi

echo "$TYPE sce"
echo "g4_stage2: $g4_stage2_fcl"
echo "reco: $reco_fcl"

source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup metacat
export METACAT_SERVER_URL=https://metacat.fnal.gov:9443/dune_meta_prod/app
export METACAT_AUTH_SERVER_URL=https://metacat.fnal.gov:8143/auth/dune

export PRODUCTS=$DUNESIM_TAR:$PRODUCTS

#Setup recent lar software suite
#Set up local
DUNE_VERSION=${DUNE_VERSION:-v09_85_00d00}
setup dunesw \
   "${DUNE_VERSION}" \
   -q "${DUNE_QUALIFIER:-e26:prof}"

setup_exit=$?
if [ $? -ne 0 ]; then
  echo "Failed to setup dunesw $DUNE_VERSION $DUNE_QUALIFIER"
  exit $setup_exit
fi


echo "DUNESW loc:"
ups active | grep dunesw

if [ -z ${JUSTIN_PROCESSORS} ]; then
  JUSTIN_PROCESSORS=1
fi

echo "Justin processors: ${JUSTIN_PROCESSORS}"

export TF_NUM_THREADS=${JUSTIN_PROCESSORS}   
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS} 
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS} 
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS} 
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS} 
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}  

# Set fhicl file path to include the RCDS fcls
export FHICL_FILE_PATH=${FCL_DIR}:${FHICL_FILE_PATH}

echo "printing env"
env

echo "Justin specific env vars"
env | grep JUSTIN

echo "Will use justin-get-file"
#

DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
##Check that any file was returned
if [ "${DID_PFN_RSE}" == "" ] ; then
  echo "Could not get file"
  exit 0
fi


pfn=`echo ${DID_PFN_RSE} | cut -f2 -d' '`
did=`echo ${DID_PFN_RSE} | cut -f1 -d' '`
now=$(date -u +"%Y%m%dT%H%M%SZ")

##TODO -- edit this
jobid=`echo "${JUSTIN_JOBSUB_ID:-1}" | awk -F '.' '{print $1}'`
G4Stage1File="PDSP_RerunG4_${jobid}_${JUSTIN_STAGE_ID}_${JUSTIN_WORKFLOW_ID}_${now}.root"


nevents=${NEVENTS:--1}

##Only doing ntuple on input
if [ -n "$ANAONLY" ]; then
  AnaFile=`echo ${did} | cut -f2 -d':' | sed -e "s/.root/_pdspana.root/"`
  echo "Running pdspana only. Output: $AnaFile"
  recostart=`date +"%s"`.0
  lar -c ${FCL_DIR}/pduneana_filtered.fcl \
      -T ${AnaFile} \
       -n ${nevents} \
      ${pfn} >ana.log 2>&1
  larExit=$?
  recoend=`date +"%s"`.0
  
  if [ $larExit -ne 0 ]; then
    echo "Error in ana production"
    cat ana.log
    exit $larExit
  fi
  echo "Ran successfully"

  echo "Forming ana metadata"
  
  echo "${did}" > input_dids.list
  
  ### Get the metadata
  python ${INPUT_DIR}/ntuple_prod_utils.py \
    metadata \
    --root_file ${AnaFile} \
    --dids input_dids.list \
    --version ${DUNE_VERSION} \
    --fcl_name pduneana_filtered.fcl \
    --log_file ana.log \
    -o $AnaFile.json
  mdExit=$?
  if [ $mdExit -ne 0 ]; then
    echo "Error in ntuple metdata production"
    exit $mdExit
  fi
  
  echo "formed"
  cat ${AnaFile}.json




  echo "$pfn" > justin-processed-pfns.txt
  exit 0
fi



### FILTERED G4 STAGE 1 ###
echo "Running rerun g4"
#touch g4.log
starttime=`date +"%s"`.0
lar -c ${FCL_DIR}/rerun_g4_stage1.fcl \
    -n ${nevents} \
    -o ${G4Stage1File} \
    ${pfn} >g4.log 2>&1
larExit=$?
endtime=`date +"%s"`.0

if [ $larExit -ne 0 ]; then
  echo "Error in stage 1 production"
  cat g4.log
  exit $larExit
fi
echo "Ran successfully"
##########################

#------------ Generic Path -------------------#
G4Stage2File=`echo ${G4Stage1File} | sed -e "s/.root/_${TYPE}_g4_stage2.root/"`
echo "Running ${TYPE} G4 Stage 2"
lar -c $g4_stage2_fcl \
    -o ${G4Stage2File} \
    ${G4Stage1File} >g4_s2.log 2>&1
larExit=$?

if [ $larExit -ne 0 ]; then
  echo "Error in stage 2 production"
  cat g4_s2.log
  exit $larExit
fi
echo "Ran successfully"

DetsimFile=`echo ${G4Stage2File} | sed -e "s/.root/_detsim.root/"`
echo "Running Detsim"
lar -c ${FCL_DIR}/filtered_detsim.fcl \
    -o ${DetsimFile} \
    ${G4Stage2File} >detsim.log 2>&1
larExit=$?

if [ $larExit -ne 0 ]; then
  echo "Error in detsim production"
  cat detsim.log
  exit $larExit
fi
echo "Ran successfully"

RecoFile=`echo ${DetsimFile} | sed -e "s/.root/_reco.root/"`
echo "Running ${TYPE} SCE Reco"
lar -c $reco_fcl \
    -o ${RecoFile} \
    ${DetsimFile} >reco.log 2>&1
larExit=$?

if [ $larExit -ne 0 ]; then
  echo "Error in reco production"
  cat reco.log
  exit $larExit
fi
echo "Ran successfully"

AnaFile=`echo ${RecoFile} | sed -e "s/.root/_pdspana.root/"`
echo "Running sigma pdspana"
recostart=`date +"%s"`.0
lar -c ${FCL_DIR}/pduneana_filtered.fcl \
    -T ${AnaFile} \
    ${RecoFile} >ana.log 2>&1
larExit=$?
recoend=`date +"%s"`.0

if [ $larExit -ne 0 ]; then
  echo "Error in sigma ana production"
  cat ana.log
  exit $larExit
fi
echo "Ran successfully"

# Make metadata
overrides="core.data_tier=full-reconstructed \
 core.application.version=${DUNE_VERSION} \
 dune.config_file=${base_reco_name} \
 core.start_time=${recostart} \
 core.end_time=${recoend} \
 core.application.name=reco \
 core.application=art.reco \
 dune_mc.space_charge=yes \

"

namespace=${JUSTIN_SCOPE:-"usertests"}

echo "Forming reco metadata"
#-- ${filenum} \
python ${INPUT_DIR}/pdhd_meta_writer.py \
       --json ${INPUT_DIR}/pdhd_base_meta.json \
       --overrides ${overrides} \
       --nevents ${nevents} \
       --jobid ${JUSTIN_JOBSUB_ID} \
       --parent ${did} \
       --past_fcls rerun_g4_stage1.fcl \
                   $base_g4_name \
                   filtered_detsim.fcl \
       --past_apps g4_stage1 g4_stage2 detsim \
       --inherit_run \
       --exclude "dune.requestid" \
       -o ${RecoFile}.json


if [ $? -ne 0 ]; then
  echo "Error writing reco json"
  exit 1
fi

cat ${RecoFile}.json
echo "formed"

echo "Forming ana metadata"

echo "${namespace}:$RecoFile" > input_dids.list

### Get the metadata
python ${INPUT_DIR}/ntuple_prod_utils.py \
  metadata \
  --root_file ${AnaFile} \
  --dids input_dids.list \
  --version ${DUNE_VERSION} \
  --fcl_name pduneana_filtered.fcl \
  --log_file ana.log \
  --parent_metas ${RecoFile}.json \
  -o $AnaFile.json
mdExit=$?
if [ $mdExit -ne 0 ]; then
  echo "Error in ntuple metdata production"
  exit $mdExit
fi

echo "formed"
cat ${AnaFile}.json


###############################################
echo "$pfn" > justin-processed-pfns.txt
justIN time: 2024-11-17 03:10:15 UTC       justIN version: 01.01.09