justIN           Dashboard       Workflows       Jobs       AWT       Sites       Storages       Docs       Login

Workflow 1782, Stage 1

Priority50
Processors1
Wall seconds21000
RSS bytes5767168000 (5500 MiB)
Max distance for inputs100.0
Enabled input RSEs
Enabled output RSEs
Enabled sites
Scopehd-protodune
Events for this stage

Output patterns

 DestinationPatternLifetimeFor next stage
0Rucio hd-protodune:pdhd_1gev_beam_cosmics_official_sce_e500_reco_limit_end_skip64000_1782*sce_E500*reco.root7776000False
0Rucio hd-protodune:pdhd_1gev_beam_cosmics_official_sce_off_reco_limit_end_skip64000_1782*sce_off*reco.root7776000False
0Rucio hd-protodune:pdhd_1gev_beam_cosmics_official_sce_e500_pandora_limit_end_skip64000_1782*sce_E500*.pndr7776000False
0Rucio hd-protodune:pdhd_1gev_beam_cosmics_official_sce_off_pandora_limit_end_skip64000_1782*sce_off*.pndr7776000False

Environment variables

NameValue
INPUT_DIR/cvmfs/fifeuser2.opensciencegrid.org/sw/dune/dced620ad7bd201a97f7ad5df759f50927786057
STARTLINE64000

File states

Total filesFindingUnallocatedAllocatedOutputtingProcessedNot foundFailed
7783900007783900

Job states

TotalSubmittedStartedProcessingOutputtingFinishedNotusedAbortedStalledJobscript errorOutputting failedNone processed
8329300007923403394710630
Files processed00100010002000200030003000400040005000500060006000Apr-08 06:00Apr-08 15:00Apr-09 00:00Apr-09 09:00Apr-09 18:00Apr-10 03:00Apr-10 12:00Apr-10 21:00Apr-11 06:00Apr-11 15:00Apr-12 00:00Apr-12 09:00Apr-12 18:00Apr-13 03:00Apr-13 12:00Apr-13 21:00Apr-14 06:00Apr-14 15:00Apr-15 00:00Apr-15 09:00Apr-15 18:00Files processedBin start timesNumber per binFR_CCIN2P3NL_NIKHEFUS_UChicagoUK_LiverpoolUK_LancasterUS_FNAL-T1UK_BrunelES_PICUK_ImperialCZ_FZUUK_OxfordUK_QMULUK_BristolCA_VictoriaUK_SheffieldUS_BNLUS_WisconsinNL_SURFsaraUS_FNAL-FermiG…US_FNAL-FermiGridUK_EdinburghCA_SFUUK_ManchesterUS_ColoradoUS_UConn-HPCUS_PuertoRicoUS_UCSDIT_CNAFUS_NotreDame

RSEs used

NameInputsOutputs
MONTECARLO809570
SURFSARA085054
DUNE_US_BNL_SDCC056132
DUNE_US_FNAL_DISK_STAGE054408
QMUL032620
RAL-PP018943
RAL_ECHO018937
NIKHEF015740
DUNE_FR_CCIN2P3_DISK015458
PRAGUE09996
DUNE_ES_PIC02274
DUNE_CERN_EOS01554

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

File reset events, by site

SiteAllocatedOutputting
US_NotreDame10550
US_FNAL-FermiGrid59511
US_UCSD2252
US_PuertoRico1580
US_UConn-HPC1524
NL_SURFsara1500
US_FNAL-T11400
UK_Brunel1060
UK_Sheffield850
UK_Imperial780
ES_PIC641
US_UChicago331
UK_Bristol290
US_Wisconsin270
CA_Victoria193
NL_NIKHEF120
FR_CCIN2P3120
UK_Manchester110
UK_Oxford70
CA_SFU50
US_BNL50
UK_Lancaster41
CZ_FZU40
US_Colorado31
UK_Liverpool30
UK_Edinburgh30
IT_CNAF20

Jobscript

#!/bin/bash
#
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh

#First check the existence of this,
#there are sometimes spurrious site issues preventing access
stat ${INPUT_DIR}/all_h4_prod_nums.txt
if [ $? -ne 0 ]; then
  echo "Failed to stat input list. Exiting safely"
  exit 0
fi

#Setup recent lar software suite
DUNE_VERSION=${DUNE_VERSION:-v09_85_00d00}
setup dunesw \
   "${DUNE_VERSION}" \
   -q "${DUNE_QUALIFIER:-e26:prof}"

if [ $? -ne 0 ]; then
  echo "Failed to setup dunesw $DUNE_VERSION $DUNE_QUALIFIER"
fi

echo "DUNESW loc:"
ups active | grep dunesw

if [ -z ${JUSTIN_PROCESSORS} ]; then
  JUSTIN_PROCESSORS=1
fi

echo "Justin processors: ${JUSTIN_PROCESSORS}"

export TF_NUM_THREADS=${JUSTIN_PROCESSORS}   
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS} 
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS} 
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS} 
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS} 
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}  

echo "printing env"
env

echo "Will use justin-get-file"
DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
if [ "${DID_PFN_RSE}" == "" ] ; then
  echo "Could not get file"
  exit 0
fi
pfn=`echo ${DID_PFN_RSE} | cut -f2 -d' '`

if [ -z ${LINENUMBER} ] ; then
  LINENUMBER=$pfn
fi

STARTLINE=${STARTLINE:-0}
echo "Adding ${STARTLINE} to linenumber ${LINENUMBER}"
LINENUMBER=$(( 10#$LINENUMBER + $STARTLINE ))
echo $LINENUMBER

# Get the file number and iteration within the file that we want to use
get_nums=true
line=`sed -n "${LINENUMBER}p" ${INPUT_DIR}/all_h4_prod_nums.txt` || get_nums=false
echo "using ${LINENUMBER} from input file"
echo ${line}
split=($line)
iter=${split[1]} || get_nums=false
filenum=${split[0]} || get_nums=false
nevents=${split[2]} || get_nums=false

#art Event counting is 1-indexed,
#but need 0-indexed for particle vector
#so add 1 here -- 1 is subtracted in module
eventnum=$[ iter * 10 + 1 ] || get_nums=false
#run=$((10**6 + 1)) || get_nums=false
#subrun=$(($run*10**5 + $filenum)) || get_nums=false
if [ $get_nums = false ]; then
  echo "Failed to get numbers from input file"
  exit 1
fi
echo "File: ${filenum}"
echo "Iter: ${iter}"
echo "eventum: ${eventnum}"
echo "subrun: ${subrun}"


echo "jobsub_id: ${JUSTIN_JOBSUB_ID:-1}"
run=`echo "${JUSTIN_JOBSUB_ID:-1}" | awk -F '.' '{print $1}'`
echo "run: $run"


  #-e "s/StartEvent: 0/StartEvent: ${iter}/" \
  #-e "s/StartEvent: 1/StartEvent: ${eventnum}/" \
sed ${INPUT_DIR}/pdhd_prod.fcl \
  -e "s/_1.root/_${filenum}.root/" \
  -e "s/firstRun: 1/firstRun: ${run}/" \
  -e "s/firstSubRun: 1/firstSubRun: ${filenum}/" \
  -e "s/firstEvent: 1/firstEvent: ${eventnum}/" > prod.fcl

if [ $? -ne 0 ]; then
  echo "Could not edit pdhd_prod.fcl numbers"
  exit 1
fi

echo "prod.fcl"
cat prod.fcl


nevents=${NEVENTS:-$nevents} #Set from env override or from above
echo "nevents: $nevents"

if [ $nevents -eq 0 ]; then
  echo "No events in this line -- exiting safely"
  echo "$pfn" > justin-processed-pfns.txt
  exit
fi

# Run Generator
echo "Running prod"
now=$(date -u +"%Y%m%dT%H%M%SZ")
prodname="prod_beam_p1GeV_cosmics_protodunehd_${now}_${run}_${pfn}"
lar -c prod.fcl \
    -o ${prodname}.root \
    -n ${nevents} >prod.log 2>&1
prodExit=$?
if [ $prodExit -ne 0 ]; then
  echo "Error in prod"
  tail -100 prod.log
  exit $prodExit
fi


# Stage 1 G4
echo "Running g4stage1"
g4stage1_name="${prodname}_g4_stage1"
g4start=`date +"%s"`.0
lar -c standard_g4_protodunehd_stage1.fcl \
    ${prodname}.root \
    -o ${g4stage1_name}.root >g4stage1.log 2>&1
g4stage1Exit=$?
if [ $g4stage1Exit -ne 0 ]; then
  echo "Error in g4stage1"
  tail -100 g4stage1.log
  exit $g4stage1Exit
fi
g4end=`date +"%s"`.0

# Make metadata
#overrides="dune_mc.h4_input_file=H4_v34b_1GeV_-27.7_10M_${filenum}.root \
# core.start_time=${g4start} \
# core.end_time=${g4end} \
#"
#python ${INPUT_DIR}/pdhd_meta_writer.py \
#       --json ${INPUT_DIR}/pdhd_base_meta.json \
#       --overrides ${overrides} \
#       --run ${filenum} \
#       --event ${eventnum} \
#       --nevents ${nevents} \
#       -o ${g4stage1_name}.root.json
#if [ $? -ne 0 ]; then
#  echo "Error writing g4stage1 json"
#  exit 1
#fi

if [ -z ${NO_SCE_OFF} ]; then
  # Stage 2 G4 -- No SCE
  echo "Running g4stage2"
  g4stage2_name="${g4stage1_name}_g4_stage2_sce_off"
  lar -c  standard_g4_protodunehd_stage2.fcl \
      ${g4stage1_name}.root \
      -o ${g4stage2_name}.root >g4stage2.log 2>&1
  g4stage2Exit=$?
  if [ $g4stage2Exit -ne 0 ]; then
    echo "Error in g4stage2"
    tail -100 g4stage2.log
    exit $g4stage2Exit
  fi
  
  # Detsim
  echo "Running detsim"
  detsim_name="${g4stage2_name}_detsim"
  lar -c standard_detsim_protodunehd.fcl \
      ${g4stage2_name}.root \
      -o ${detsim_name}.root  >detsim.log 2>&1
  detsimExit=$?
  if [ $detsimExit -ne 0 ]; then
    echo "Error in detsim"
    tail -100 detsim.log
    exit $detsimExit
  fi
  
  # Reco
  echo "Running reco"
  reco_name="${detsim_name}_reco"
  recostart=`date +"%s"`.0
  lar -c standard_reco_protodunehd.fcl \
      ${detsim_name}.root \
      -o ${reco_name}.root >reco.log 2>&1
  recoExit=$?
  if [ $recoExit -ne 0 ]; then
    echo "Error in reco"
    tail -100 reco.log
    exit $recoExit
  fi
  recoend=`date +"%s"`.0
  
  # Make metadata
  overrides="core.data_tier=full-reconstructed \
   core.application.version=${DUNE_VERSION} \
   dune.config_file=standard_reco_protodunehd.fcl \
   core.start_time=${recostart} \
   core.end_time=${recoend} \
   core.application.name=reco \
   core.application=art.reco \
   dune_mc.h4_input_file=H4_v34b_1GeV_-27.7_10M_${filenum}.root \
  "
  
  namespace=${JUSTIN_SCOPE:-"usertests"}
  
  #-- ${filenum} \
  python ${INPUT_DIR}/pdhd_meta_writer.py \
         --json ${INPUT_DIR}/pdhd_base_meta.json \
         --overrides ${overrides} \
         --event ${eventnum} \
         --nevents ${nevents} \
         --filenum ${filenum} \
         --jobid ${JUSTIN_JOBSUB_ID} \
         --past_fcls prod_beam_cosmics_1GeV_protodunehd.fcl \
                     standard_g4_protodunehd_stage1.fcl \
                     standard_g4_protodunehd_stage2.fcl \
                     standard_detsim_protodunehd.fcl \
         --past_apps gen g4_stage1 g4_stage2 detsim \
         -o ${reco_name}.root.json
  
  
  if [ $? -ne 0 ]; then
    echo "Error writing reco json"
    exit 1
  fi

  ## TODO -- CHECK WITH LEIGH IF WE NEED EVERY PNDR FILE
  mv Pandora_Events.pndr ${reco_name}_Pandora_Events.pndr
  if [ $? -ne 0 ]; then
    echo "Error mving/renaming pndr file"
    exit 1
  fi

  ## Write-out PNDR metadata
  ## Copy from reco json but change data tier and file format
  overrides="core.data_tier=pandora_info \
   core.file_format=binary \
  "
  python ${INPUT_DIR}/pdhd_meta_writer.py \
         --json ${reco_name}.root.json \
         --overrides ${overrides} \
         --filenum ${filenum} \
         --event ${eventnum} \
         --nevents ${nevents} \
         --jobid ${JUSTIN_JOBSUB_ID} \
         -o ${reco_name}_Pandora_Events.pndr.json

  echo "All logs:"
  echo "-----------G4 STAGE2------------"
  cat g4stage2.log
  echo "--------------------------------"
  echo "-----------DETSIM---------------"
  cat detsim.log
  echo "--------------------------------"
  echo "-----------RECO-----------------"
  cat reco.log
  echo "--------------------------------"
fi

# Stage 2 G4 -- With SCE
if [ -z ${NO_SCE_ON} ]; then
 echo "Running g4stage2 sce on"
 g4stage2_name="${g4stage1_name}_g4_stage2_sce_E500"
 lar -c  standard_g4_protodunehd_stage2_sce_E500.fcl \
     ${g4stage1_name}.root \
     -o ${g4stage2_name}.root >g4stage2.log 2>&1
 g4stage2Exit=$?
 if [ $g4stage2Exit -ne 0 ]; then
   echo "Error in g4stage2"
   tail -100 g4stage2.log
   exit $g4stage2Exit
 fi
 
 # Detsim
 echo "Running detsim sce on"
 detsim_name="${g4stage2_name}_detsim"
 lar -c standard_detsim_protodunehd.fcl \
     ${g4stage2_name}.root \
     -o ${detsim_name}.root  >detsim.log 2>&1
 detsimExit=$?
 if [ $detsimExit -ne 0 ]; then
   echo "Error in detsim"
   tail -100 detsim.log
   exit $detsimExit
 fi
 
 # Reco -- With SCE Corrections
 ##TODO -- MAKE SURE TO REPLACE WITH THE SCE ON ONE
 echo "Running reco sce on"
 reco_name="${detsim_name}_reco"
 #reco_sce_fcl=${INPUT_DIR}/test_sce_E500.fcl
 reco_sce_fcl=standard_reco_protodunehd_sce_E500.fcl
 recostart=`date +"%s"`.0
 lar -c  $reco_sce_fcl \
     ${detsim_name}.root \
     -o ${reco_name}.root >reco.log 2>&1
 recoExit=$?
 if [ $recoExit -ne 0 ]; then
   echo "Error in reco"
   tail -100 reco.log
   exit $recoExit
 fi
 recoend=`date +"%s"`.0
 
 # Make metadata
 overrides="core.data_tier=full-reconstructed \
  core.application.version=${DUNE_VERSION} \
  dune.config_file=${reco_sce_fcl} \
  core.start_time=${recostart} \
  core.end_time=${recoend} \
  core.application.name=reco \
  core.application=art.reco \
  dune_mc.space_charge=yes \
  dune_mc.h4_input_file=H4_v34b_1GeV_-27.7_10M_${filenum}.root \
 "
 
 namespace=${JUSTIN_SCOPE:-"usertests"}
 
 python ${INPUT_DIR}/pdhd_meta_writer.py \
        --json ${INPUT_DIR}/pdhd_base_meta.json \
        --overrides ${overrides} \
        --filenum ${filenum} \
        --event ${eventnum} \
        --nevents ${nevents} \
        --jobid ${JUSTIN_JOBSUB_ID} \
        --past_fcls prod_beam_cosmics_1GeV_protodunehd.fcl \
                    standard_g4_protodunehd_stage1.fcl \
                    standard_g4_protodunehd_stage2_sce_E500.fcl \
                    standard_detsim_protodunehd.fcl \
        --past_apps gen g4_stage1 g4_stage2 detsim \
        -o ${reco_name}.root.json
        #--parent "${namespace}:${g4stage1_name}.root" \
  if [ $? -ne 0 ]; then
    echo "Error writing reco json"
    exit 1
  fi


  ## TODO -- CHECK WITH LEIGH IF WE NEED EVERY PNDR FILE
  mv Pandora_Events.pndr ${reco_name}_Pandora_Events.pndr
  if [ $? -ne 0 ]; then
    echo "Error mving/renaming pndr file"
    exit 1
  fi

  ## Write-out PNDR metadata
  ## Copy from reco json but change data tier and file format
  overrides="core.data_tier=pandora_info \
   core.file_format=binary \
  "
  python ${INPUT_DIR}/pdhd_meta_writer.py \
         --json ${reco_name}.root.json \
         --overrides ${overrides} \
         --filenum ${filenum} \
         --event ${eventnum} \
         --nevents ${nevents} \
         --jobid ${JUSTIN_JOBSUB_ID} \
         -o ${reco_name}_Pandora_Events.pndr.json

  echo "All logs:"
  echo "-----------G4 STAGE2------------"
  cat g4stage2.log
  echo "--------------------------------"
  echo "-----------DETSIM---------------"
  cat detsim.log
  echo "--------------------------------"
  echo "-----------RECO-----------------"
  cat reco.log
  echo "--------------------------------"

fi

echo "$pfn" > justin-processed-pfns.txt
justIN time: 2024-11-17 03:59:23 UTC       justIN version: 01.01.09