Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
2b6a44e
add data avaibility scripts
Feb 18, 2025
c79c331
remove files
Feb 18, 2025
b063746
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 18, 2025
475f7db
use pwd for .py
Feb 18, 2025
a246967
avoid strax_data
Feb 18, 2025
44db985
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 18, 2025
24c06b1
update
Feb 18, 2025
4423a6d
Merge branch 'data_avaibility_per_tag' of github.com:XENONnT/utilix i…
Feb 18, 2025
c826773
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 18, 2025
96ceb80
remove print
Feb 18, 2025
3d00cfe
u[date
Feb 18, 2025
edfa8b7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 18, 2025
15674e6
use cutax for offline
Feb 27, 2025
9f30b0e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 27, 2025
6519698
update
Feb 28, 2025
b4734cf
udpate
Feb 28, 2025
1f6dadb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 28, 2025
576e922
update
Feb 28, 2025
57dd087
Merge branch 'data_avaibility_per_tag' of github.com:XENONnT/utilix i…
Feb 28, 2025
29337b9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 28, 2025
f81f2d3
add print format for wiki
Feb 28, 2025
e153748
Merge branch 'data_avaibility_per_tag' of github.com:XENONnT/utilix i…
Feb 28, 2025
e3e4c2e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 28, 2025
1cd36b5
Merge branch 'master' into data_avaibility_per_tag
GiovanniVolta Feb 28, 2025
53562ce
add try expect for fdata corrupted an not avialbel
Feb 28, 2025
9ae197f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 28, 2025
3d89438
add try expect for fdata corrupted an not avialbel
Feb 28, 2025
1da9f35
add st to safe_is_stored
Feb 28, 2025
1a7b0b8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 28, 2025
144a42d
debug
Feb 28, 2025
9713690
Merge branch 'data_avaibility_per_tag' of github.com:XENONnT/utilix i…
Feb 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ dist/
build/
.venv
*.log
**/strax_data/
244 changes: 244 additions & 0 deletions utilix/scripts/data_avaibility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
import argparse
import sys
import platform
import strax
import straxen
import pandas as pd
import numpy as np
from datetime import datetime


# Function to parse command line arguments
def parse_args():
parser = argparse.ArgumentParser(
description="Process Strax context and calculate availability percentages."
)

parser.add_argument(
"--container",
type=str,
help="Container name for setting up the environment.",
required=True,
)

parser.add_argument(
"--cutax_location",
type=str,
help="Cutax location read it from setup.sh of container",
required=True,
)

parser.add_argument(
"--context",
choices=["online", "offline"],
required=True,
help="Choose the Strax context: online or offline.",
)

parser.add_argument(
"--global_config",
type=str,
help="Global config for offline context (required for offline).",
)

parser.add_argument(
"--include_tags", type=str, nargs="*", help='Tags to include, e.g., "*sr0*"'
)

parser.add_argument(
"--exclude_tags",
type=str,
nargs="*",
default=[
"flash",
"ramp_up",
"ramp_down",
"anode_off",
"abandon",
"hot_spot",
"missing_one_pmt",
"messy",
"bad",
],
help="Tags to exclude (default: predefined tags)",
)

parser.add_argument(
"--plugins",
type=str,
nargs="*",
default=None,
help="Plugins to include for availability calculation (if not provided, will be determined by the --check_peaks flag). "
"For using i pass, i.e., peak_basics event_basics",
)

parser.add_argument(
"--time-range",
type=str,
nargs=2,
metavar=("START_DATE", "END_DATE"),
help='Time range for filtering, format: YYYY-MM-DD YYYY-MM-DD (e.g., "2023-01-01 2023-12-31")',
)

def str2bool(value):
"""Convert string to boolean for argparse."""
if isinstance(value, bool):
return value
if value.lower() in ("true", "t", "yes", "y", "1"):
return True
elif value.lower() in ("false", "f", "no", "n", "0"):
return False
else:
raise argparse.ArgumentTypeError("Boolean value expected (True/False).")

parser.add_argument(
"--check_peaks",
type=str2bool,
nargs="?",
const=True, # If argument is given without value, default to True
default=False, # Default to False if not provided
help="Check above peaks if True, below peaks if False. "
"Below peaks: lone_hits, peaklets, merged_s2s, hitlets_nv. "
"Above peaks: peak_basics, event_basics.",
)

return parser.parse_args()


# Function to initialize Strax context
def initialize_straxen(
context_type, global_config, container, cutax=None, output_folder="./strax_data"
):

# Initialize the context arguments
context_args = {"output_folder": output_folder}

print("")
print("Login node:\n", platform.node())

# Handle Midway or Dali configurations
if "midway" in platform.node():
if container <= "2023.05.2":
context_args.update(
{"_rucio_local_path": "/project/lgrandi/rucio", "include_rucio_local": True}
)
elif "dali" in platform.node():
context_args.update(
{
"_auto_append_rucio_local": False,
"_rucio_local_path": "/dali/lgrandi/rucio",
"include_rucio_local": True,
}
)

if context_type == "online":
st = straxen.contexts.xenonnt_online(**context_args)
elif context_type == "offline":
if not global_config:
raise ValueError("Global config is required for offline context.")
st = cutax.contexts.xenonnt_offline(xedocs_version=global_config, **context_args)

if "midway" in platform.node():
st.storage.append(
strax.DataDirectory("/project2/lgrandi/xenonnt/processed/", readonly=True)
)
st.storage.append(strax.DataDirectory("/project/lgrandi/xenonnt/processed/", readonly=True))

print("")
straxen.print_versions()

print("\nStorage")
for item in st.storage:
print(f"- {item}")

return st


def safe_is_stored(st, r, p):
try:
return st.is_stored(r, p)
except (strax.DataCorrupted, strax.DataNotAvailable) as e:
print(f"Error for run {r}: {e}")
return False


# Function to calculate percentage of True values in the dataframe
def calculate_percentage(df, st, plugins):
modes = df["mode"].unique()
percentages = []

for mode in modes:
mode_df = df[df["mode"] == mode]
mode_percentages = {"Mode": mode}

for p in plugins:
is_stored = np.array([safe_is_stored(st, r, p) for r in mode_df["name"]])
tot_length = len(is_stored)
_true = np.count_nonzero(is_stored)
mode_percentages[f"{p}_available"] = (
f"{_true}/{tot_length} ({100 * _true / tot_length:.2f}%)"
)

percentages.append(mode_percentages)

return pd.DataFrame(percentages)


def main():
print("<code>")
# Get the current date
current_date = datetime.today().date()
print("\nToday's date is:", current_date)

args = parse_args()

print("")
for arg, value in vars(args).items():
print(f"{arg}: {value}")

# For `offline` context we try to install cutax
if args.context == "offline":
print("")
print(f"Setting cutax: {args.cutax_location}")
sys.path.append(args.cutax_location)
import cutax

st = initialize_straxen(args.context, args.global_config, args.container, cutax)
else:
st = initialize_straxen(args.context, args.global_config, args.container)

# Prepare arguments for `select_runs`
select_runs_kwargs = {"exclude_tags": args.exclude_tags}

# Only add include_tags if provided
if args.include_tags:
select_runs_kwargs["include_tags"] = args.include_tags

# Select runs
selection = st.select_runs(**select_runs_kwargs)

# Apply time filtering if --time-range is provided
if args.time_range:
start_date, end_date = pd.to_datetime(args.time_range)
# Ensure column is in datetime format
selection["start"] = pd.to_datetime(selection["start"])
selection = selection[(selection["start"] >= start_date) & (selection["start"] <= end_date)]

# Calculate and display the percentage table
if args.plugins != None:
percentage_df = calculate_percentage(selection, st, args.plugins)
elif args.check_peaks:
percentage_df = calculate_percentage(
selection, st, ["lone_hits", "peaklets", "merged_s2s", "hitlets_nv"]
)
elif not args.check_peaks:
percentage_df = calculate_percentage(selection, st, ["peak_basics", "event_basics"])

print("")
print(percentage_df)
print("")
print("</code>")


if __name__ == "__main__":
main()
50 changes: 50 additions & 0 deletions utilix/scripts/data_avaibility.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

# Default usage message
usage() {
echo "Usage: $0 --container <container_name> [other arguments for Python script]"
exit 1
}

# Ensure at least two arguments are provided (container and other args)
if [ $# -lt 2 ]; then
usage
fi

# Initialize variables
CONTAINER=""
OTHER_ARGS=()

# Parse command-line arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--container)
CONTAINER="$2"
shift 2 # Move past --container and its value
;;
*)
OTHER_ARGS+=("$1") # Store all other arguments
shift
;;
esac
done

# Ensure container name is provided
if [ -z "$CONTAINER" ]; then
echo "Error: --container argument is required."
usage
fi

# Set up the environment
current_dir=$(pwd)

# Run everything inside a subshell to avoid modifying the parent shell
(
# Source the setup script for the container
source /cvmfs/xenon.opensciencegrid.org/releases/nT/${CONTAINER}/setup.sh

_CUTAX_LOCATION="${CUTAX_LOCATION}/cutax"

# Run the Python script with all arguments, including --container
python3 "$current_dir/data_avaibility.py" --container "$CONTAINER" --cutax_location "${_CUTAX_LOCATION}" "${OTHER_ARGS[@]}"
)