justIN           Dashboard       Workflows       Jobs       AWT       Sites       Storages       Docs       Login

21 July 2025: This instance at RAL is read-only. Please do not try submitting new workflows for now.

Workflow 7874, Stage 1

Priority50
Processors1
Wall seconds80000
Image/cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest
RSS bytes2097152000 (2000 MiB)
Max distance for inputs100.0
Enabled input RSEs CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled output RSEs CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled sites BR_CBPF, CA_Victoria, CERN, CH_UNIBE-LHEP, CZ_FZU, ES_CIEMAT, ES_PIC, FR_CCIN2P3, IN_TIFR, IT_CNAF, NL_NIKHEF, NL_SURFsara, UK_Bristol, UK_Brunel, UK_Durham, UK_Edinburgh, UK_Lancaster, UK_Manchester, UK_Oxford, UK_QMUL, UK_RAL-PPD, UK_RAL-Tier1, UK_Sheffield, US_Caltech, US_Colorado, US_FNAL-FermiGrid, US_FNAL-T1, US_Michigan, US_MIT, US_Nebraska, US_NotreDame, US_PuertoRico, US_SU-ITS, US_Swan, US_UChicago, US_UConn-HPC, US_UCSD, US_Wisconsin
Scopeusertests
Events for this stage

Output patterns

 DestinationPatternLifetimeFor next stageRSE expression
1https://fndcadoor.fnal.gov:2880/dune/scratch/users/ahimmel/justin/07874/1larsmc_*.root
2https://fndcadoor.fnal.gov:2880/dune/scratch/users/ahimmel/justin/07874/1otherfiles_*.tgz

Environment variables

NameValue
INPUT_TAR_DIR/cvmfs/fifeuser2.opensciencegrid.org/sw/dune/184bb80486b0553a1fe3a909c5796c3d641d2d95

File states

Total filesFindingUnallocatedAllocatedOutputtingProcessedNot foundFailed
60000600

Job states

TotalSubmittedStartedProcessingOutputtingFinishedNotusedAbortedStalledJobscript errorOutputting failedNone processed
18000018000000
Files processed00112233445566Jun-23 20:00Jun-23 21:00Jun-23 22:00Files processedBin start timesNumber per binUK_RAL-Tier1

RSEs used

NameInputsOutputs
MONTECARLO60
None012

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

Jobscript

#!/bin/bash
# lar_smc.jobscript  run one tiny MC job

#set -euo pipefail
echo "Node $(hostname)    start $(date)"

# --- Check that INPUT_TAR_DIR is set -----------------------------------
if [[ -z "${INPUT_TAR_DIR:-}" ]]; then
    echo "ERROR: INPUT_TAR_DIR environment variable is not set" >&2
    exit 101
fi

echo "INPUT_TAR_DIR: $INPUT_TAR_DIR"

if [[ ! -d "$INPUT_TAR_DIR" ]]; then
    echo "ERROR: INPUT_TAR_DIR is set but directory does not exist" >&2
    exit 102
fi

export UPS_OVERRIDE="-H Linux64bit+3.10-2.17"
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup -B geant4 v4_10_6_p01c -q+e20:prof
setup -B root v6_20_08a -q+e20:p383b:prof
setup -B cmake v3_19_6
export CXX=`which g++`
export CC=`which gcc`
export G4INSTALL=/cvmfs/larsoft.opensciencegrid.org/products/geant4/v4_10_6_p01/Linux64bit+2.6-2.12-e19-prof
export G4DIR=$G4INSTALL/lib64

export LARSMC_HOME="$INPUT_TAR_DIR"
export LD_LIBRARY_PATH="$LARSMC_HOME/lib:$LD_LIBRARY_PATH"

EXE="$LARSMC_HOME/TBMC"

# --- Environment validation ------------------------------------------------
echo "=== Environment Check ==="
echo "Hostname: $(hostname)"
echo "Working directory: $(pwd)"
echo "User: $(whoami)"
echo "Available memory: $(free -h | grep '^Mem:' || echo 'unknown')"
echo "CPU info: $(nproc) cores"
echo "Load average: $(uptime)"
echo "=========================="

# --- Determine which MC job this is ---------------------------------------
did_pfn_rse=`$JUSTIN_PATH/justin-get-file`

# Check if no input files are available
if [[ -z "$did_pfn_rse" ]]; then
    echo "No input files available - exiting successfully"
    exit 0
fi

did=`echo $did_pfn_rse | cut -f1 -d' '`
pfn=`echo $did_pfn_rse | cut -f2 -d' '`
rse=`echo $did_pfn_rse | cut -f3 -d' '`

echo "did=$did"
echo "pfn=$pfn"
echo "rse=$rse"

# --- Set the GDML based on the number in PFN -----------------------------
# Check if GDML is already set
if [[ -n "${GDML:-}" ]]; then
    echo "GDML already set to: $GDML"
else
    # Extract the number from pfn (assuming pfn is just an integer)
    pfn_number=$pfn

    # Hard-coded list of GDML files in order
    declare -a gdml_files=(
        "lars_tallbo_true_h2d22_v2.gdml"
        "lars_tallbo_true_h25d34_v2.gdml"
        "lars_tallbo_true_h50d97_v2.gdml"
        "lars_tallbo_true_h76d33_v2.gdml"
        "lars_tallbo_true_h101d83_v2.gdml"
        "lars_tallbo_true_h127d22_v2.gdml"
    )

    # Check if PFN number is valid
    if [[ $pfn_number -lt 1 || $pfn_number -gt ${#gdml_files[@]} ]]; then
        echo "ERROR: PFN number $pfn_number is out of range. Must be between 1 and ${#gdml_files[@]}." >&2
        echo "Available GDML files: ${#gdml_files[@]}" >&2
        for i in "${!gdml_files[@]}"; do
            echo "  PFN $((i+1)): ${gdml_files[$i]}" >&2
        done
        exit 103
    fi

    # Select GDML file (array is 0-indexed, pfn starts at 1)
    gdml_index=$((pfn_number - 1))
    export GDML=${gdml_files[$gdml_index]}

    echo "PFN number: $pfn_number"
    echo "Selected GDML: $GDML"
fi

echo "Completed setup"
echo "Working directory: $(pwd)"
echo "Available disk space:"
df -h .

# --- Set output filename based on GDML unique identifier -------------------
# Extract the unique part from the GDML filename (e.g., "h2d22" from "lars_tallbo_true_h2d22_v2.gdml")
gdml_uid=$(echo "$GDML" | sed 's/lars_tallbo_true_\(.*\)_v2\.gdml/\1/')
OUTPUT_FILENAME="larsmc_${gdml_uid}.root"
OTHER_FILES_TAR="otherfiles_${gdml_uid}.tgz"

echo "GDML unique ID: $gdml_uid"
echo "Output filename: $OUTPUT_FILENAME"
echo "Other files archive: $OTHER_FILES_TAR"

# --- Check that LARSMC_HOME has TBMC executable --------------------------------
if [[ ! -x "$EXE" ]]; then
    echo "ERROR: LARSMC_HOME does not contain LArSMC executable" >&2
    echo "Expected executable at: $EXE" >&2
    echo "Current contents of LARSMC_HOME:" >&2
    ls -l $LARSMC_HOME >&2
    exit 104
fi

# --- Check that GDML file exists -----------------------------------------------
if [[ -z "${GDML:-}" ]]; then
    echo "ERROR: GDML environment variable is not set" >&2
    exit 105
fi
if [[ ! -f "$LARSMC_HOME/gdml/$GDML" ]]; then
    echo "ERROR: GDML file $GDML not found in LARSMC_HOME/gdml/" >&2
    echo "Current contents of $LARSMC_HOME/gdml:" >&2
    ls -l $LARSMC_HOME/gdml >&2
    exit 106
fi

# --- Check if we have enough time left (assuming we know wall time limit) ---
if [[ -n "${JUSTIN_WALL_SECONDS:-}" ]]; then
    start_time=$(date +%s)
    echo "Job started at timestamp: $start_time"
    echo "Wall time limit: ${JUSTIN_WALL_SECONDS} seconds"
    
    # Reserve some time for cleanup and output
    reserved_time=300  # 5 minutes
    max_sim_time=$((JUSTIN_WALL_SECONDS - reserved_time))
    echo "Maximum simulation time: $max_sim_time seconds"
fi

echo "Running TBMC"
echo $EXE -g $LARSMC_HOME/gdml/$GDML -m $LARSMC_HOME/vis.mac -o $OUTPUT_FILENAME

# --- Run the actual simulation -------------------------------------------------
$EXE -g $LARSMC_HOME/gdml/$GDML -m $LARSMC_HOME/vis.mac -o $OUTPUT_FILENAME
exit_code=$?
echo "TBMC simulation completed at $(date) with exit code $exit_code"

# --- Check if simulation succeeded ---------------------------------------------
if [[ $exit_code -ne 0 ]]; then
    echo "ERROR: TBMC simulation failed with exit code $exit_code" >&2
    exit $exit_code
fi

# --- Check if the output file was created -------------------------------------
if [[ ! -f "$OUTPUT_FILENAME" ]]; then
    echo "ERROR: Output file $OUTPUT_FILENAME was not created" >&2
    echo "Current directory contents:" >&2
    ls -l >&2
    exit 107
fi

# --- Validate output file -----------------------------------------------------
output_size=$(stat -c%s "$OUTPUT_FILENAME" 2>/dev/null || echo "0")
if [[ $output_size -lt 1024 ]]; then
    echo "WARNING: Output file $OUTPUT_FILENAME is very small ($output_size bytes)"
    echo "This might indicate a problem with the simulation"
fi

echo "Output file validation:"
echo "  File: $OUTPUT_FILENAME"
echo "  Size: $output_size bytes ($(du -h $OUTPUT_FILENAME | cut -f1))"
echo "  Type: $(file $OUTPUT_FILENAME 2>/dev/null || echo 'unknown')"

# --- Record successful processing of the Monte Carlo counter file -------------
echo "$did" >> justin-processed-dids.txt
echo "Successfully processed Monte Carlo counter: $did"

# --- Create archive of other files ---------------------------------------------
echo "Creating archive of other files..."
echo "Files in working directory before archiving:"
ls -la

# Create list of files to exclude from tarball (not just the main output)
exclude_patterns=(
    "$OUTPUT_FILENAME"
    "justin-processed-dids.txt"
    "core.*"
    "*.tmp"
)

# Build exclude arguments for tar
exclude_args=()
for pattern in "${exclude_patterns[@]}"; do
    exclude_args+=(--exclude="$pattern")
done

# Create tarball with selective exclusions
if tar "${exclude_args[@]}" -czf "$OTHER_FILES_TAR" * 2>/dev/null; then
    echo "Created other files archive: $OTHER_FILES_TAR"
    echo "Archive size: $(du -h $OTHER_FILES_TAR | cut -f1)"
else
    echo "WARNING: Failed to create other files archive or no files to archive"
fi

# --- Resource Usage Summary ---
echo "=== Resource Usage Summary ==="
echo "Peak memory usage:"
if command -v ps >/dev/null 2>&1; then
    ps -o pid,vsz,rss,comm -p $$
fi
echo "Final disk usage:"
du -sh .
echo "Final disk space:"
df -h .
echo "==============================="

# --- Print elapsed time and remaining wall time -------------------------------
if [[ -n "${JUSTIN_WALL_SECONDS:-}" ]]; then
    end_time=$(date +%s)
    elapsed_time=$((end_time - start_time))
    echo "Simulation took $elapsed_time seconds"
    echo "Remaining time: $((JUSTIN_WALL_SECONDS - elapsed_time)) seconds"
fi

echo "Node $(hostname)    end $(date)"
justIN time: 2025-08-14 21:46:04 UTC       justIN version: 01.03.02