Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions .github/workflows/regression.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: regression

# start job only for PRs when a label is added.
on: push
# pull_request:
# types: [labeled]

jobs:
regresstion:
# if: contains(github.event.pull_request.labels.*.name, 'run-regression')
Comment thread
Radonirinaunimi marked this conversation as resolved.
Outdated
name: regression
runs-on: pineko-stbc3

# container:
# image: ghcr.io/nnpdf/lhapdf:v2
# credentials:
# username: ${{ github.repository_owner }}
# password: ${{ github.token }}

steps:
- uses: actions/checkout@v2
with:
# tags needed for dynamic versioning
fetch-depth: 0
- name: Install and configure Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: false
installer-parallel: true
- name: Install dependencies 🐍
run: poetry install --no-interaction --no-root --with test -E nnpdf
- name: Install project 🐍
# it is required to repeat extras, otherwise they will be removed from
# the environment
run: poetry install --no-interaction -E nnpdf --with test
- name: Get data files 📦
id: cache-data-files
uses: actions/cache@v4
with:
path: theory_productions
key: theory_productions-v3
- name: Download data files 📦
if: steps.cache-data_files.outputs.cache-hit != 'true'
run: |
sh download_test_data.sh
- name: Restore cached numba compile code 📮
id: cache-numba
uses: actions/cache@v4
with:
path: src/pineko/__pycache__
key: numba-cache-${{ runner.os }}-${{ hashFiles('**/*.py') }}
restore-keys: numba-cache-${{ runner.os }}-
- name: Generate FK table predictions and perform regression tests 💣
run: |
sh regression_check.sh
- name: Save updated numba cache 📮
uses: actions/cache@v4
with:
path: src/pineko/__pycache__
key: numba-cache-${{ runner.os }}-${{ hashFiles('**/*.py') }}
1 change: 1 addition & 0 deletions download_test_data.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/bash
wget -r -np -nH --cut-dirs=1 -l 4 -e robots=off --no-verbose -R index.* https://data.nnpdf.science/pineko/theory_productions/
wget -r -np -nH --cut-dirs=1 -l 4 -e robots=off --no-verbose -P benchmarks -R index.* https://data.nnpdf.science/pineko/data_files/
wget -r -np -nH --cut-dirs=1 -l 4 -e robots=off --no-verbose -P benchmarks -R index.* https://data.nnpdf.science/pineko/fakepdfs/
15 changes: 15 additions & 0 deletions pineko.cli.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[general]
nnpdf=true

[paths]
# inputs
grids = "./theory_productions/data/grids"
operator_card_template_name = "_template.cli.toml"
Comment thread
Radonirinaunimi marked this conversation as resolved.
Outdated
# outputs
operator_cards = "./theory_productions/operator_cards"
ekos = "./theory_productions/data/ekos"
fktables = "./theory_productions/data/fktables"

[paths.logs]
eko = "./theory_productions/logs/eko"
fk = "./theory_productions/logs/fk"
37 changes: 36 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pandas = "^2.1"
rich = "^12.5.1"
click = "^8.0.4"
tomli = "^2.0.1"
nnpdf-data = { version = "*", optional = true}
nnpdf-data = { version = ">=0.0.3", optional = true}

[tool.poetry.group.docs]
optional = true
Expand All @@ -56,6 +56,7 @@ pytest-cov = "^4.0.0"
pytest-env = "^0.6.2"
pylint = "^3.1.0"
banana-hep = "^0.6.13"
pineappl-cli = "^0.8.7"

[tool.poetry.group.dev.dependencies]
pdbpp = "^0.10.3"
Expand Down
86 changes: 86 additions & 0 deletions regression_check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/bash

set -euo pipefail

THEORY_ID=40008005
PDF_NAME="NNPDF40_nnlo_as_01180"

LIST_DIS_DATASETS=("HERA_CC_318GEV_EP-SIGMARED")
LIST_HADRONIC_DATASETS=("ATLAS_Z0_7TEV_36PB_ETA")

dis_predictions() {
THEORYID=$1
DIS_DATASETS=$2
NFONLL_ID=$(($THEORYID*100))

for dataset in "${DIS_DATASETS[@]}"; do
pineko fonll -c pineko.cli.toml tcards $THEORYID
pineko fonll -c pineko.cli.toml ekos --overwrite $THEORYID $dataset
pineko fonll -c pineko.cli.toml fks --overwrite $THEORYID $dataset
pineko fonll -c pineko.cli.toml combine --overwrite $THEORYID $dataset \
--FFNS3 $NFONLL_ID \
--FFN03 $(($NFONLL_ID+1)) \
--FFNS4zeromass $(($NFONLL_ID+2)) \
--FFNS4massive $(($NFONLL_ID+3)) \
--FFN04 $(($NFONLL_ID+4)) \
--FFNS5zeromass $(($NFONLL_ID+5)) \
--FFNS5massive $(($NFONLL_ID+6))
done
}

hadronic_predictions() {
THEORYID=$1
HADRONIC_DATASETS=$2

for dataset in "${HADRONIC_DATASETS[@]}"; do
pineko theory -c pineko.cli.toml opcards --overwrite $THEORYID $dataset
pineko theory -c pineko.cli.toml ekos --overwrite $THEORYID $dataset
pineko theory -c pineko.cli.toml fks --overwrite $THEORYID $dataset
done

# Compare the Hadronic FK tables with the Grids
grids=(theory_productions/data/grids/"$THEORYID"/*.pineappl.lz4)
for gridpath in "${grids[@]}"; do
gridname=$(basename "$gridpath")
pineko compare ./theory_productions/data/fktables/"$THEORYID"/"$gridname" \
./theory_productions/data/grids/"$THEORYID"/"$gridname" 3 0 \
$PDF_NAME --threshold 1
done
}

compare_predictions() {
REFERED_FK=$1
CURRENT_FK=$2

# Extract the predictions - the last column
diffs=($(pineappl diff $REFERED_FK $CURRENT_FK $PDF_NAME | awk 'NR>2 {print $NF}'))

preds_length=${#diffs[@]} # Get the length of the predictions
for ((bin=0; bin<preds_length; bin++)); do
pred_value=${diffs[bin]}
value=$(printf "%.16f" "$pred_value") # Make sure it is in float representation
# https://www.shell-tips.com/bash/math-arithmetic-calculation/#gsc.tab=0
abs_diff=$(echo "scale=10; if ($value< 0) -($value) else $value" | bc)
check_diff=$(echo "$abs_diff > 0.001" | bc) # Set threshold to 1 permille

if [[ $check_diff -eq 1 ]]; then
echo "Bin $bin: ($REFERED_FK) and ($CURRENT_FK) differ more than 1 permille."
exit 1
fi
done
}

compare_fks_with_reference() {
THEORYID=$1
fktables=(./theory_productions/data/fktables/"$THEORYID"/*.pineappl.lz4)
for fktable_path in "${fktables[@]}"; do
fkname=$(basename "$fktable_path")
fkref="./theory_productions/data/fktables/$THEORYID/$fkname"
fkcur="./theory_productions/reference_fks/$THEORYID/$fkname"
compare_predictions "$fkref" "$fkcur"
done
}

dis_predictions $THEORY_ID $LIST_DIS_DATASETS
hadronic_predictions $THEORY_ID $LIST_HADRONIC_DATASETS
compare_fks_with_reference $THEORY_ID
9 changes: 7 additions & 2 deletions src/pineko/cli/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
@click.argument("pdfs", type=click.STRING, nargs=-1)
@click.option("--xir", default=1.0, help="renormalization scale variation")
@click.option("--xif", default=1.0, help="factorization scale variation")
def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif):
@click.option(
"--threshold", default=5.0, help="threshold in permille to accept Grid -> FK"
)
def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif, threshold):
"""Compare process level PineAPPL grid and derived FK Table.

The comparison between the grid stored at PINEAPPL_PATH, and the FK table
Expand All @@ -40,5 +43,7 @@ def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif):
pdf2 = pdfs[1] if len(pdfs) == 2 else None
# Note that we need to cast to string before printing to avoid ellipsis ...
rich.print(
comparator.compare(pine, fk, max_as, max_al, pdf1, xir, xif, pdf2).to_string()
comparator.compare(
pine, fk, max_as, max_al, pdf1, xir, xif, threshold, pdf2
).to_string()
)
15 changes: 14 additions & 1 deletion src/pineko/comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
import rich


def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None):
class GridtoFKError(Exception):
"""Raised when the difference between the Grid and FK table is above some threshold."""


def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, threshold=5.0, pdf2=None):
"""Build comparison table.

Parameters
Expand All @@ -25,6 +29,9 @@ def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None):
renormalization scale variation
xif : float
factorization scale variation
threshold: float
check if the difference between the Grid and FK table is above the
threshold then raise an error
pdf2: str or None
PDF set for the second convolution, if different from the first

Expand Down Expand Up @@ -112,4 +119,10 @@ def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None):
df["PineAPPL"] = before
df["FkTable"] = after
df["permille_error"] = (after / before - 1.0) * 1000.0

if (df["permille_error"].abs() >= threshold).any():
raise GridtoFKError(
f"The difference between the Grid and FK is above {threshold} permille."
)

return df