From f17b5741525cfd038e858ba80b4e01a2f4e84a83 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Wed, 6 May 2026 11:13:04 +0200
Subject: [PATCH 01/30] Draft.

---
 src/hexsample/calibration.py | 118 ++++++++++++++++++++++++++---------
 src/hexsample/tasks.py       |  86 ++++++++++++-------------
 2 files changed, 133 insertions(+), 71 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index d70d04f..c46a1b5 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -23,13 +23,16 @@
 import pathlib
 from enum import Enum
 from typing import Tuple
+from time import time
 
 import h5py
 import numpy as np
 from aptapy.hist import Histogram3d
 from scipy.sparse import csr_matrix
 from scipy.sparse.linalg import lsmr
+from iminuit import Minuit
 
+from .hexagon import HexagonalGrid
 from .clustering import Cluster
 from .digi import DigiEventRectangular
 from .recon import DEFAULT_IONIZATION_POTENTIAL
@@ -78,7 +81,7 @@ class CalibrationUnits(str, Enum):
     ENC = "Electrons"
     NOISE = "ADC counts"
     PEDESTAL = "ADC counts"
-    GAIN = "ADC counts / electron"
+    GAIN = ""
 
 
 CALIBRATION_UNITS = {
@@ -639,44 +642,103 @@ def analyze_cluster(self, cluster: Cluster) -> None:
         self._event_count += 1
         self.cal_matrix.num_events += 1
 
+    def _fit_lh_cluster(self, a, col, row, mean):
+        t0 = time()
+        grid = HexagonalGrid()
+        neigh = grid.neighbors(col, row)
+        cols, rows = np.array(neigh).T
+        cols = np.insert(cols, 0, col)
+        rows = np.insert(rows, 0, row)
+        idxs = rows * 304 + cols
+        a_sub = a[:, idxs]
+        col_0 = a_sub[:, 0]
+        mask = col_0.nonzero()[0]
+        a_sub = a_sub[mask]
+        def nll_mono(pars):
+            sum_energy = a_sub @ pars
+            p = np.exp(- (sum_energy - mean)**2 / (2 * 20 **2))
+            return -np.sum(np.log(p + 1e-10))
+        
+        m = Minuit(nll_mono, np.ones(7))
+        m.migrad()
+        t1 = time()
+        print(f"Fit time: {t1 - t0} seconds")
+        return m
+
+    
     def fit(self) -> CalibrationMatrix:
-        """Perform the least squares fit to determine the gain of each pixel.
+        """Perform the likelihood fit to determine the gain of each pixel.
         """
         if self._event_count == 0:
             raise ValueError("No events have been analyzed, cannot perform the fit.")
+        # Create the sparse matrix
         nrows, ncols = self.cal_matrix.shape
         # Create the sparse matrix for the least squares fit. This object allows to store
         # and use efficiently the large and sparse matrix that we need for the fit.
         shape = (self._event_count, nrows * ncols)
+
         a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape)
-        # Create the vector of the expected number of electrons.
-        b = np.full(self._event_count, self._energy / DEFAULT_IONIZATION_POTENTIAL)
-        # Perform the fit
-        results = lsmr(a, b)
-        # Get the best-fit weight vector and reshape it to the shape of the calibration matrix.
-        weight = results[0].reshape((nrows, ncols))
-        signal_power = np.array(a.multiply(a).sum(axis=0)).reshape((nrows, ncols))
-        # Calculate the number of degrees of freedom and the mean squared error.
-        dof = self._event_count - (ncols * nrows)
-        mse = results[3]**2 / max(dof, 1)
-        # Calculate the uncertainty of the gain best-fit values.
-        sigma_w = np.sqrt(mse / (signal_power + 1e-15))
-        with np.errstate(divide='ignore', invalid='ignore'):
-            sigma_g_rel = sigma_w / np.abs(weight)
-        # Mask for the pixels that have a weight value close to zero (no events)
-        mask = np.abs(weight) > 1e-10
-        values = self.cal_matrix.values.copy()
-        entries = self.cal_matrix.entries
-        # Set the gain value for the pixels that pass the quality cut.
-        values[mask] = 1 / weight[mask]
-        # Set the entries to zero for the pixels that don't pass the quality cut.
-        entries[~mask] = 0
-        # Write back through the setter so updates persist on the shared object.
-        self.cal_matrix.values = values
-        self.cal_matrix.errors = np.where(mask, sigma_g_rel * values, self.cal_matrix.errors)
-        self.cal_matrix.entries = entries
+        # Calculate the mean to normalize the results
+        MEAN = np.array(a.sum(axis=1)).flatten().mean()
+        
+        # Let's try to analyze a small region around the center
+        COL0, ROW0 = 152, 176
+        m = self._fit_lh_cluster(a, COL0, ROW0, MEAN)
+        # values = self.cal_matrix.values.copy()
+        # # values[rows, cols] = 1. / np.array(m.values)
+        # self.cal_matrix.values = values
+
+        print("Fit results:")
+        for i in range(7):
+            print(f"Pixel {i}: Gain = {1/m.values[i]:.2f} +/- {m.errors[i]/m.values[i]**2:.2f}")
+
         return self.cal_matrix
 
+        
+
+
+        
+
+
+
+    # def fit(self) -> CalibrationMatrix:
+    #     """Perform the least squares fit to determine the gain of each pixel.
+    #     """
+    #     if self._event_count == 0:
+    #         raise ValueError("No events have been analyzed, cannot perform the fit.")
+    #     nrows, ncols = self.cal_matrix.shape
+    #     # Create the sparse matrix for the least squares fit. This object allows to store
+    #     # and use efficiently the large and sparse matrix that we need for the fit.
+    #     shape = (self._event_count, nrows * ncols)
+    #     a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape)
+    #     # Create the vector of the expected number of electrons.
+    #     b = np.full(self._event_count, self._energy / DEFAULT_IONIZATION_POTENTIAL)
+    #     # Perform the fit
+    #     results = lsmr(a, b)
+    #     # Get the best-fit weight vector and reshape it to the shape of the calibration matrix.
+    #     weight = results[0].reshape((nrows, ncols))
+    #     signal_power = np.array(a.multiply(a).sum(axis=0)).reshape((nrows, ncols))
+    #     # Calculate the number of degrees of freedom and the mean squared error.
+    #     dof = self._event_count - (ncols * nrows)
+    #     mse = results[3]**2 / max(dof, 1)
+    #     # Calculate the uncertainty of the gain best-fit values.
+    #     sigma_w = np.sqrt(mse / (signal_power + 1e-15))
+    #     with np.errstate(divide='ignore', invalid='ignore'):
+    #         sigma_g_rel = sigma_w / np.abs(weight)
+    #     # Mask for the pixels that have a weight value close to zero (no events)
+    #     mask = np.abs(weight) > 1e-10
+    #     values = self.cal_matrix.values.copy()
+    #     entries = self.cal_matrix.entries
+    #     # Set the gain value for the pixels that pass the quality cut.
+    #     values[mask] = 1 / weight[mask]
+    #     # Set the entries to zero for the pixels that don't pass the quality cut.
+    #     entries[~mask] = 0
+    #     # Write back through the setter so updates persist on the shared object.
+    #     self.cal_matrix.values = values
+    #     self.cal_matrix.errors = np.where(mask, sigma_g_rel * values, self.cal_matrix.errors)
+    #     self.cal_matrix.entries = entries
+    #     return self.cal_matrix
+
 
 class CalibrateENC:
     """Class for calibrating the equivalent noise charge (ENC) of the readout chip.
diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index 463686f..79e5dc7 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -721,52 +721,52 @@ def calibrate_gain(
     # calibration to correct the bias in the gain matrix.
     # To calculate the mean value, we are excluding the outliers by considering only the values
     # between the 1st and the 99th percentile.
-    gain_sim = CalibrationMatrix(header["num_cols"], header["num_rows"])
-    vals = gain_matrix.values
-    lower_bound, upper_bound = np.nanpercentile(vals, [1, 99])
-    gain_sim.set_value(np.mean(vals[(vals > lower_bound) & (vals < upper_bound)]))
-    simulation_readout = HexagonalReadoutRectangular(HexagonalLayout(header["layout"]),
-        header["num_cols"], header["num_rows"], header["pitch"],
-        enc=noise_matrix, gain=gain_sim, pedestal=pedestal_matrix)
-    output = HEXSAMPLE_DATA / "_tmp_simulation_bias.h5"
-    # Simulate events with the best-fit gain matrix to correct the bias.
-    logger.info("Simulating file to correct the bias...")
-    simulate(
-        source=Source(Line(energy), DiskBeam(radius=0.15)),
-        sensor=Sensor(),
-        readout=simulation_readout,
-        num_events=num_events,
-        output_file_path=output)
-    tmp_input_file = digi_input_file_class("rectangular")(output)
-    tmp_gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], energy)
-    # Re-run the gain calibration on the simulated events to calculate the correction factor.
-    logger.info("Starting the event loop for the simulated file...")
-    for _, event in tqdm(enumerate(tmp_input_file)):
-        try:
-            cluster = clustering.run(event)
-        except IndexError:
-            continue
-        tmp_gain_calibration.analyze_cluster(cluster)
-    logger.info("Calculating the gain matrix from the simulation...")
-    tmp_gain_matrix = tmp_gain_calibration.fit()
-    # Calculate the correction factor from the simulation.
-    logger.info("Calculating the correction factor...")
-    mask = tmp_gain_matrix.entries > 0
-    # Calculate the residuals between the MC and calibrated gain matrices from the simulation
-    # and calculate the mean residual to be used as a correction factor.
-    residuals = (tmp_gain_matrix.values[mask] - gain_sim.values[mask]) / gain_sim.values[mask]
-    # Exclude the outliers by considering only the values between the 1st and the 99th percentile.
-    lower_bound, upper_bound = np.percentile(residuals, [1, 99])
-    mean_residual = np.mean(residuals[(residuals > lower_bound) & (residuals < upper_bound)])
-    # Apply the correction factor to the gain matrix and save it to a HDF5 file.
-    mask_gain = gain_matrix.entries > 0
-    gain_matrix.values[mask_gain] = gain_matrix.values[mask_gain] / (1 + mean_residual)
+    # gain_sim = CalibrationMatrix(header["num_cols"], header["num_rows"])
+    # vals = gain_matrix.values
+    # lower_bound, upper_bound = np.nanpercentile(vals, [1, 99])
+    # gain_sim.set_value(np.mean(vals[(vals > lower_bound) & (vals < upper_bound)]))
+    # simulation_readout = HexagonalReadoutRectangular(HexagonalLayout(header["layout"]),
+    #     header["num_cols"], header["num_rows"], header["pitch"],
+    #     enc=noise_matrix, gain=gain_sim, pedestal=pedestal_matrix)
+    # output = HEXSAMPLE_DATA / "_tmp_simulation_bias.h5"
+    # # Simulate events with the best-fit gain matrix to correct the bias.
+    # logger.info("Simulating file to correct the bias...")
+    # simulate(
+    #     source=Source(Line(energy), DiskBeam(radius=0.15)),
+    #     sensor=Sensor(),
+    #     readout=simulation_readout,
+    #     num_events=num_events,
+    #     output_file_path=output)
+    # tmp_input_file = digi_input_file_class("rectangular")(output)
+    # tmp_gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], energy)
+    # # Re-run the gain calibration on the simulated events to calculate the correction factor.
+    # logger.info("Starting the event loop for the simulated file...")
+    # for _, event in tqdm(enumerate(tmp_input_file)):
+    #     try:
+    #         cluster = clustering.run(event)
+    #     except IndexError:
+    #         continue
+    #     tmp_gain_calibration.analyze_cluster(cluster)
+    # logger.info("Calculating the gain matrix from the simulation...")
+    # tmp_gain_matrix = tmp_gain_calibration.fit()
+    # # Calculate the correction factor from the simulation.
+    # logger.info("Calculating the correction factor...")
+    # mask = tmp_gain_matrix.entries > 0
+    # # Calculate the residuals between the MC and calibrated gain matrices from the simulation
+    # # and calculate the mean residual to be used as a correction factor.
+    # residuals = (tmp_gain_matrix.values[mask] - gain_sim.values[mask]) / gain_sim.values[mask]
+    # # Exclude the outliers by considering only the values between the 1st and the 99th percentile.
+    # lower_bound, upper_bound = np.percentile(residuals, [1, 99])
+    # mean_residual = np.mean(residuals[(residuals > lower_bound) & (residuals < upper_bound)])
+    # # Apply the correction factor to the gain matrix and save it to a HDF5 file.
+    # mask_gain = gain_matrix.entries > 0
+    # gain_matrix.values[mask_gain] = gain_matrix.values[mask_gain] / (1 + mean_residual)
     output_file_path = input_file_path.replace(".h5", "_matrix_gain.h5")
-    logger.info(f"Saving corrected gain matrix to {output_file_path}...")
+    # logger.info(f"Saving corrected gain matrix to {output_file_path}...")
     gain_matrix.to_hdf5(output_file_path, CalibrationType.GAIN, False)
     # Close the input files.
-    tmp_input_file.close()
-    os.remove(output)
+    # tmp_input_file.close()
+    # os.remove(output)
     input_file.close()
     logger.info("Done!")
     return output_file_path

From 6756c6495c632e0d496257e5984dc2f76f3d71d2 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Wed, 6 May 2026 15:10:58 +0200
Subject: [PATCH 02/30] Draft.

---
 src/hexsample/calibration.py | 109 +++++++++++++++++++++++++----------
 src/hexsample/tasks.py       |   5 +-
 2 files changed, 84 insertions(+), 30 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index c46a1b5..c651655 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -24,6 +24,8 @@
 from enum import Enum
 from typing import Tuple
 from time import time
+from tqdm import tqdm
+from itertools import product
 
 import h5py
 import numpy as np
@@ -642,30 +644,42 @@ def analyze_cluster(self, cluster: Cluster) -> None:
         self._event_count += 1
         self.cal_matrix.num_events += 1
 
-    def _fit_lh_cluster(self, a, col, row, mean):
-        t0 = time()
-        grid = HexagonalGrid()
-        neigh = grid.neighbors(col, row)
-        cols, rows = np.array(neigh).T
-        cols = np.insert(cols, 0, col)
-        rows = np.insert(rows, 0, row)
-        idxs = rows * 304 + cols
-        a_sub = a[:, idxs]
-        col_0 = a_sub[:, 0]
-        mask = col_0.nonzero()[0]
+    def fit_lh_large(self, a, cols, rows, mean):
+        idxs_sub = (rows * 304 + cols).astype(int)
+        total_event_sum = np.array(a.sum(axis=1)).flatten()
+        a_sub = a[:, idxs_sub]
+        sub_event_sum = np.array(a_sub.sum(axis=1)).flatten()
+        mask = (sub_event_sum == total_event_sum) & (sub_event_sum > 0)
         a_sub = a_sub[mask]
+        # Remove no active pixels
+        pixel_sum = np.array(a_sub.sum(axis=0)).flatten()
+        active_mask = pixel_sum > 0
+
+        active_idxs = np.where(active_mask)[0]
+        a_final = a_sub[:, active_idxs]
+
+        if a_final.nnz == 0 or a_final.shape[1] == 0:
+            return None
+
         def nll_mono(pars):
-            sum_energy = a_sub @ pars
+            sum_energy = a_final @ pars
             p = np.exp(- (sum_energy - mean)**2 / (2 * 20 **2))
             return -np.sum(np.log(p + 1e-10))
-        
-        m = Minuit(nll_mono, np.ones(7))
+        init = np.ones(len(active_idxs))
+        m = Minuit(nll_mono, init)
+        m.errordef = Minuit.LIKELIHOOD
         m.migrad()
-        t1 = time()
-        print(f"Fit time: {t1 - t0} seconds")
-        return m
 
-    
+        full_region_gain = np.full(len(idxs_sub), np.nan)
+        full_region_error = np.full(len(idxs_sub), np.nan)
+
+        if m.valid:
+            full_region_gain[active_idxs] = 1 / np.array(m.values)
+            full_region_error[active_idxs] = m.errors / np.array(m.values)**2
+        
+        return full_region_gain, full_region_error, idxs_sub
+
+
     def fit(self) -> CalibrationMatrix:
         """Perform the likelihood fit to determine the gain of each pixel.
         """
@@ -680,17 +694,54 @@ def fit(self) -> CalibrationMatrix:
         a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape)
         # Calculate the mean to normalize the results
         MEAN = np.array(a.sum(axis=1)).flatten().mean()
-        
-        # Let's try to analyze a small region around the center
-        COL0, ROW0 = 152, 176
-        m = self._fit_lh_cluster(a, COL0, ROW0, MEAN)
-        # values = self.cal_matrix.values.copy()
-        # # values[rows, cols] = 1. / np.array(m.values)
-        # self.cal_matrix.values = values
-
-        print("Fit results:")
-        for i in range(7):
-            print(f"Pixel {i}: Gain = {1/m.values[i]:.2f} +/- {m.errors[i]/m.values[i]**2:.2f}")
+
+        values = self.cal_matrix.values.copy()
+        errors = self.cal_matrix.errors.copy()
+
+
+        NCOLS = 304
+        NROWS = 352
+        SIZE = 10
+        OVERLAP = 2
+        STRIDE = SIZE - OVERLAP
+        col_starts = np.arange(0, NCOLS - OVERLAP, STRIDE)
+        row_starts = np.arange(0, NROWS - OVERLAP, STRIDE)
+
+        weighted_gains = np.zeros((NROWS, NCOLS))
+        sum_weights = np.zeros((NROWS, NCOLS))
+
+        total_pixels = len(col_starts) * len(row_starts)
+        with tqdm(total=total_pixels, desc="Processing Pixels") as pbar:
+            for r_start in row_starts:
+                r_end = min(r_start + SIZE, NROWS)
+                actual_h = r_end - r_start
+                for c_start in col_starts:
+                    c_end = min(c_start + SIZE, NCOLS)
+                    actual_w = c_end - c_start
+                    pbar.update(actual_w * (actual_h if c_start == col_starts[0] else 0))
+                    cc, rr = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end))
+
+                    cols_block = cc.flatten()
+                    rows_block = rr.flatten()
+
+                    results = self.fit_lh_large(a, cols_block, rows_block, MEAN)
+                    if results is not None:
+                        gain, error, _ = results
+
+                        weights = 1 / (error**2 + 1e-10)
+                        mask = ~np.isnan(gain)
+
+                        np.add.at(weighted_gains, (rows_block[mask], cols_block[mask]), gain[mask] * weights[mask])
+                        np.add.at(sum_weights, (rows_block[mask], cols_block[mask]), weights[mask])
+
+
+        values = np.divide(weighted_gains, sum_weights, out=np.full_like(weighted_gains, np.nan), where=sum_weights > 0)
+        errors = np.full_like(weighted_gains, np.nan)
+        np.sqrt(sum_weights, out=errors, where=sum_weights > 0)
+        np.divide(1, errors, out=errors, where=sum_weights > 0)
+
+        self.cal_matrix.values = values
+        self.cal_matrix.errors = errors
 
         return self.cal_matrix
 
diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index 79e5dc7..9e072bb 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -958,7 +958,7 @@ def calibview(
         logger.info(f"  {key}: {value}")
     unit = CALIBRATION_UNITS.get(matrix.metadata["calibration_type"]).value
     # Calculate the quantiles of the values in the matrix to set the limits for the plots.
-    rel_error_mask = matrix.errors / matrix.values < rel_error
+    rel_error_mask = abs(matrix.errors / matrix.values) < rel_error
     hits_mask = matrix.entries >= min_hits
     mask = rel_error_mask & hits_mask
     if not np.any(mask):
@@ -996,6 +996,9 @@ def calibview(
         plt.colorbar(label=mc_unit)
         # Plot the distribution of the Monte Carlo truth values.
         mc_edges = np.linspace(np.nanmin(mc_vals), np.nanmax(mc_vals), 100)
+        # If Monte Carlo distribution is uniform, we need to modify the edges
+        if mc_edges[0] == mc_edges[-1]:
+            mc_edges = np.linspace(mc_edges[0] - mc_edges[0]*0.1, mc_edges[-1] + mc_edges[-1]*0.1, 100)
         mc_vals_hist = Histogram1d(mc_edges, label="MC Distribution", xlabel=mc_unit).fill(mc_vals)
         plt.figure("Distribution of Monte Carlo truth values")
         mc_vals_hist.plot(statistics=True)

From f9218f13ec42d7be02cfa173960e4149f4eb8482 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Wed, 6 May 2026 15:55:06 +0200
Subject: [PATCH 03/30] Draft.

---
 src/hexsample/calibration.py | 52 ++++++++++++++----------------------
 1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index c651655..3c56bdc 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -683,8 +683,6 @@ def nll_mono(pars):
     def fit(self) -> CalibrationMatrix:
         """Perform the likelihood fit to determine the gain of each pixel.
         """
-        if self._event_count == 0:
-            raise ValueError("No events have been analyzed, cannot perform the fit.")
         # Create the sparse matrix
         nrows, ncols = self.cal_matrix.shape
         # Create the sparse matrix for the least squares fit. This object allows to store
@@ -698,41 +696,35 @@ def fit(self) -> CalibrationMatrix:
         values = self.cal_matrix.values.copy()
         errors = self.cal_matrix.errors.copy()
 
-
-        NCOLS = 304
-        NROWS = 352
+        ncols = 304
+        nrows = 352
         SIZE = 10
         OVERLAP = 2
         STRIDE = SIZE - OVERLAP
-        col_starts = np.arange(0, NCOLS - OVERLAP, STRIDE)
-        row_starts = np.arange(0, NROWS - OVERLAP, STRIDE)
+        col_starts = np.arange(0, ncols - OVERLAP, STRIDE)
+        row_starts = np.arange(0, nrows - OVERLAP, STRIDE)
 
-        weighted_gains = np.zeros((NROWS, NCOLS))
-        sum_weights = np.zeros((NROWS, NCOLS))
+        weighted_gains = np.zeros((nrows, ncols))
+        sum_weights = np.zeros((nrows, ncols))
 
-        total_pixels = len(col_starts) * len(row_starts)
-        with tqdm(total=total_pixels, desc="Processing Pixels") as pbar:
-            for r_start in row_starts:
-                r_end = min(r_start + SIZE, NROWS)
-                actual_h = r_end - r_start
-                for c_start in col_starts:
-                    c_end = min(c_start + SIZE, NCOLS)
-                    actual_w = c_end - c_start
-                    pbar.update(actual_w * (actual_h if c_start == col_starts[0] else 0))
-                    cc, rr = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end))
+        for r_start in row_starts:
+            r_end = min(r_start + SIZE, nrows)
+            for c_start in col_starts:
+                c_end = min(c_start + SIZE, ncols)
+                cc, rr = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end))
 
-                    cols_block = cc.flatten()
-                    rows_block = rr.flatten()
+                cols_block = cc.flatten()
+                rows_block = rr.flatten()
 
-                    results = self.fit_lh_large(a, cols_block, rows_block, MEAN)
-                    if results is not None:
-                        gain, error, _ = results
+                results = self.fit_lh_large(a, cols_block, rows_block, MEAN)
+                if results is not None:
+                    gain, error, _ = results
 
-                        weights = 1 / (error**2 + 1e-10)
-                        mask = ~np.isnan(gain)
+                    weights = 1 / (error**2 + 1e-10)
+                    mask = ~np.isnan(gain)
 
-                        np.add.at(weighted_gains, (rows_block[mask], cols_block[mask]), gain[mask] * weights[mask])
-                        np.add.at(sum_weights, (rows_block[mask], cols_block[mask]), weights[mask])
+                    np.add.at(weighted_gains, (rows_block[mask], cols_block[mask]), gain[mask] * weights[mask])
+                    np.add.at(sum_weights, (rows_block[mask], cols_block[mask]), weights[mask])
 
 
         values = np.divide(weighted_gains, sum_weights, out=np.full_like(weighted_gains, np.nan), where=sum_weights > 0)
@@ -744,10 +736,6 @@ def fit(self) -> CalibrationMatrix:
         self.cal_matrix.errors = errors
 
         return self.cal_matrix
-
-        
-
-
         
 
From 440dcc77cc822cb87b28872b61af2871519bf973 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Thu, 7 May 2026 14:54:53 +0200
Subject: [PATCH 04/30] Draft.

---
 src/hexsample/calibration.py | 282 ++++++++++++++++++++---------------
 src/hexsample/cli.py         |   6 +-
 src/hexsample/pipeline.py    |   5 +-
 src/hexsample/tasks.py       |  72 +--------
 4 files changed, 172 insertions(+), 193 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index 3c56bdc..cc988a7 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -22,22 +22,19 @@
 
 import pathlib
 from enum import Enum
-from typing import Tuple
-from time import time
-from tqdm import tqdm
 from itertools import product
+from typing import Tuple, Optional
+from tqdm import tqdm
 
 import h5py
 import numpy as np
 from aptapy.hist import Histogram3d
 from scipy.sparse import csr_matrix
-from scipy.sparse.linalg import lsmr
 from iminuit import Minuit
 
-from .hexagon import HexagonalGrid
 from .clustering import Cluster
 from .digi import DigiEventRectangular
-from .recon import DEFAULT_IONIZATION_POTENTIAL
+from .pdf import SpectrumPDF
 
 
 class CalibrationType(str, Enum):
@@ -614,16 +611,16 @@ class CalibrateGain(CalibrateBase):
         The energy of the monochromatic source, in eV.
     """
 
-    def __init__(self, num_cols: int, num_rows: int, energy: float) -> None:
+    def __init__(self, num_cols: int, num_rows: int, pdf: SpectrumPDF) -> None:
         """Class constructor.
         """
         super().__init__(num_cols, num_rows)
-        self._energy = energy
-
         self._event_count = 0
         self._pha = []
         self._coords = []
         self._event_rows = []
+        self._pdf = pdf
+        self._derivative = pdf.pdf.derivative(1)
 
     def analyze_cluster(self, cluster: Cluster) -> None:
         """Analyze the event cluster to update the calibration matrix.
@@ -644,139 +641,180 @@ def analyze_cluster(self, cluster: Cluster) -> None:
         self._event_count += 1
         self.cal_matrix.num_events += 1
 
-    def fit_lh_large(self, a, cols, rows, mean):
-        idxs_sub = (rows * 304 + cols).astype(int)
-        total_event_sum = np.array(a.sum(axis=1)).flatten()
-        a_sub = a[:, idxs_sub]
-        sub_event_sum = np.array(a_sub.sum(axis=1)).flatten()
-        mask = (sub_event_sum == total_event_sum) & (sub_event_sum > 0)
-        a_sub = a_sub[mask]
-        # Remove no active pixels
-        pixel_sum = np.array(a_sub.sum(axis=0)).flatten()
-        active_mask = pixel_sum > 0
+    def _cut_data(self, data: csr_matrix, cols: np.ndarray, rows: np.ndarray,
+                  event_sum: np.ndarray) -> Tuple[csr_matrix, np.ndarray]:
+        """Cut the data to select only the events that have all the charge contained
+        in the pixels of the subregion defined by the input column and row coordinates.
+        This is done to ensure that the fit is performed only on events that are fully
+        contained in the subregion.
 
-        active_idxs = np.where(active_mask)[0]
-        a_final = a_sub[:, active_idxs]
+        Arguments
+        ---------
+        data : csr_matrix
+            The sparse matrix containing the pha values for the events in the entire detector.
+        cols : np.ndarray
+            The column coordinates of the pixels in the subregion.
+        rows : np.ndarray
+            The row coordinates of the pixels in the subregion.
+        event_sum : np.ndarray
+            The total ADC count for each event.
 
-        if a_final.nnz == 0 or a_final.shape[1] == 0:
-            return None
+        Returns
+        -------
+        data_active_pixels : csr_matrix
+            The sparse matrix containing the pha values for the events in the subregion, with
+            only the active pixels.
+        mask : np.ndarray
+            The boolean mask indicating which pixels in the subregion are active.
+        """
+        # Calculate the index of the pixels in the flattened array to select the
+        # correct columns of the sparse matrix.
+        pixels_idxs = (rows * self.cal_matrix.shape[1] + cols).astype(int)
+        # Select all the events but only the columns corresponding to the pixels
+        # in the subregion defined by the indices.
+        data_region = data[:, pixels_idxs]
+        # Select only the events in the subregion where all the charge is contained
+        # in the pixels of the subregion. Add also another cut to select only the
+        # events with total charge above 0 ADC, just to be sure to remove empty events.
+        sub_region_event_sum = np.array(data_region.sum(axis=1)).flatten()
+        mask = (sub_region_event_sum == event_sum) & (sub_region_event_sum > 0)
+        data_good_events = data_region[mask]
+        # We want to remove the pixels in the subregion that never got hit, since
+        # only pixels with signal can be fitted. This operation ensures that the
+        # number of fit parameters is equal to the number of active pixels.
+        pixel_sum = np.array(data_good_events.sum(axis=0)).flatten()
+        active_mask = pixel_sum > 0
+        data_active_pixels = data_good_events[:, active_mask]
+        return data_active_pixels, active_mask
 
-        def nll_mono(pars):
-            sum_energy = a_final @ pars
-            p = np.exp(- (sum_energy - mean)**2 / (2 * 20 **2))
+    def _likelihood_fit(self, data: csr_matrix, conv_factor: float
+                        ) -> Optional[Tuple[np.ndarray, np.ndarray]]:
+        """Perform the likelihood fit for a region of the detector.
+
+        Returns
+        -------
+        gain : np.ndarray
+            The gain values for the pixels in the region, calculated from the fit.
+        error : np.ndarray
+            The error on the gain values for the pixels in the region, calculated from the fit.
+        """
+        # Define the negative log-likelihood function for the fit.
+        def nll(pars):
+            total_adc = data @ pars
+            p = self._pdf(total_adc * conv_factor)
             return -np.sum(np.log(p + 1e-10))
-        init = np.ones(len(active_idxs))
-        m = Minuit(nll_mono, init)
+
+        def nll_with_grad(pars):
+            # kept for compatibility/testing: returns (val, grad) for an array input
+            energy = (data @ pars) * conv_factor
+            p = self._pdf(energy)
+            dp = self._derivative(energy)
+            p_safe = np.maximum(p, 1e-12)
+            val = -np.sum(np.log(p_safe))
+            common_term = -(dp / p_safe) * conv_factor
+            grad = np.asarray(data.T @ common_term).ravel()
+            return val, grad
+
+        # Create scalar-callable wrappers that accept positional params as required by
+        # iminuit. These convert to numpy arrays internally and return the scalar NLL
+        # and a tuple gradient respectively.
+        def nll_scalar(*params):
+            pars = np.asarray(params, dtype=float)
+            return nll(pars)
+
+        def grad_scalar(*params):
+            pars = np.asarray(params, dtype=float)
+            # reuse the analytic gradient computation
+            _, grad = nll_with_grad(pars)
+            return tuple(float(g) for g in grad)
+
+        # Define the initial parameters for the fit.
+        init_pars = np.ones(data.shape[1])
+        # Initialize the Minuit minimizer with an explicit gradient callable.
+        # Pass starting values as positional args (unpack the numpy array).
+        m = Minuit(nll_scalar, *init_pars.tolist(), grad=grad_scalar)
+        m.limits = [(1e-10, None) for _ in range(len(init_pars))]
         m.errordef = Minuit.LIKELIHOOD
         m.migrad()
-
-        full_region_gain = np.full(len(idxs_sub), np.nan)
-        full_region_error = np.full(len(idxs_sub), np.nan)
-
+        # If the fit is successful, return the gain values and their errors.
         if m.valid:
-            full_region_gain[active_idxs] = 1 / np.array(m.values)
-            full_region_error[active_idxs] = m.errors / np.array(m.values)**2
-        
-        return full_region_gain, full_region_error, idxs_sub
-
+            gain = 1 / np.array(m.values)
+            error = m.errors / np.array(m.values)**2
+            return gain, error
+        return None
 
     def fit(self) -> CalibrationMatrix:
-        """Perform the likelihood fit to determine the gain of each pixel.
+        """Fit the collected events to determine the gain of each pixel.
+
+        Returns
+        -------
+        cal_matrix : CalibrationMatrix
+            Updated calibration matrix with the gain values calculated from the data.
         """
-        # Create the sparse matrix
+        # Create a sparse matrix where the rows correspond to the events and the columns
+        # correspond to the pixels. In each row, the non-zero entries correspond to the
+        # pha values of the pixels that are hit in the event. 
         nrows, ncols = self.cal_matrix.shape
-        # Create the sparse matrix for the least squares fit. This object allows to store
-        # and use efficiently the large and sparse matrix that we need for the fit.
         shape = (self._event_count, nrows * ncols)
-
-        a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape)
-        # Calculate the mean to normalize the results
-        MEAN = np.array(a.sum(axis=1)).flatten().mean()
-
-        values = self.cal_matrix.values.copy()
-        errors = self.cal_matrix.errors.copy()
-
-        ncols = 304
-        nrows = 352
-        SIZE = 10
-        OVERLAP = 2
-        STRIDE = SIZE - OVERLAP
-        col_starts = np.arange(0, ncols - OVERLAP, STRIDE)
-        row_starts = np.arange(0, nrows - OVERLAP, STRIDE)
-
+        data = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape)
+        # Calculate the sum of the ADC counts in each event, which is used for the data
+        # cuts in the fit, and to calculate the mean ADC count in an event.
+        event_sum = np.array(data.sum(axis=1)).flatten()
+        # Calculate the mean of the ADC counts in an event.
+        conv_factor = self._pdf.loc() / event_sum.mean()
+        # Define the size of the blocks to fit contemporarily.
+        size = 10
+        # Calculate the starting column and row indices for the blocks. We are using an
+        # overlap of 2 pixels between the blocks to ensure that pixels on the edges are
+        # correctly calibrated.
+        col_starts = np.arange(0, ncols - 2, size - 2)
+        row_starts = np.arange(0, nrows - 2, size - 2)
+        block_indices = list(product(row_starts, col_starts))
+        # Create the arrays to store the weighted gain values and the sum of the weights
+        # for each pixel. We will use these arrays to calculate a weighted average of the
+        # results, since some pixels are fitted multiple times due to the overlap.
         weighted_gains = np.zeros((nrows, ncols))
         sum_weights = np.zeros((nrows, ncols))
-
-        for r_start in row_starts:
-            r_end = min(r_start + SIZE, nrows)
-            for c_start in col_starts:
-                c_end = min(c_start + SIZE, ncols)
-                cc, rr = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end))
-
-                cols_block = cc.flatten()
-                rows_block = rr.flatten()
-
-                results = self.fit_lh_large(a, cols_block, rows_block, MEAN)
-                if results is not None:
-                    gain, error, _ = results
-
-                    weights = 1 / (error**2 + 1e-10)
-                    mask = ~np.isnan(gain)
-
-                    np.add.at(weighted_gains, (rows_block[mask], cols_block[mask]), gain[mask] * weights[mask])
-                    np.add.at(sum_weights, (rows_block[mask], cols_block[mask]), weights[mask])
-
-
-        values = np.divide(weighted_gains, sum_weights, out=np.full_like(weighted_gains, np.nan), where=sum_weights > 0)
+        # Start the loop over the blocks
+        pbar = tqdm(block_indices, desc="Fitting chip subregions", miniters=5)
+        for r_start, c_start in pbar:
+            # Calculate the end row and column indices, taking into account the possibility of being at
+            # the end of the chip.
+            r_end = min(r_start + size, nrows)
+            c_end = min(c_start + size, ncols)
+            # Create a meshgrid to get all the coordinates of the pixels in the block.
+            cols, rows = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end))
+            cols = cols.flatten()
+            rows = rows.flatten()
+            # Cut the data to select only the events that have signal in the block.
+            block_data, mask = self._cut_data(data, cols, rows, event_sum)
+            # Check on the data: if there are no good events or no active pixels,
+            # we cannot perform the fit, so we return None.
+            if block_data.nnz == 0 or block_data.shape[1] == 0:
+                continue
+            # Fit the data for the block.
+            results = self._likelihood_fit(block_data, conv_factor)
+            # If the fit is not successful, continue to the next block.
+            if results is None:
+                continue
+            gain, error = results
+            weights = 1 / (error**2 + 1e-10)
+            # Use the mask of the active pixels to update the weighted gains and the sum
+            # of the weights matrices for the pixels in the block that have been fitted.
+            np.add.at(weighted_gains, (rows[mask], cols[mask]), gain * weights)
+            np.add.at(sum_weights, (rows[mask], cols[mask]), weights)
+        pbar.close()
+        # Calculate the final values and errors as the weighted average of the fit results.
+        values = np.divide(weighted_gains, sum_weights,
+                           out=np.full_like(weighted_gains, np.nan),
+                           where=sum_weights > 0)
         errors = np.full_like(weighted_gains, np.nan)
         np.sqrt(sum_weights, out=errors, where=sum_weights > 0)
         np.divide(1, errors, out=errors, where=sum_weights > 0)
-
+        # Write the results back to the calibration matrix.
         self.cal_matrix.values = values
         self.cal_matrix.errors = errors
-
         return self.cal_matrix
-        
-
-
-
-    # def fit(self) -> CalibrationMatrix:
-    #     """Perform the least squares fit to determine the gain of each pixel.
-    #     """
-    #     if self._event_count == 0:
-    #         raise ValueError("No events have been analyzed, cannot perform the fit.")
-    #     nrows, ncols = self.cal_matrix.shape
-    #     # Create the sparse matrix for the least squares fit. This object allows to store
-    #     # and use efficiently the large and sparse matrix that we need for the fit.
-    #     shape = (self._event_count, nrows * ncols)
-    #     a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape)
-    #     # Create the vector of the expected number of electrons.
-    #     b = np.full(self._event_count, self._energy / DEFAULT_IONIZATION_POTENTIAL)
-    #     # Perform the fit
-    #     results = lsmr(a, b)
-    #     # Get the best-fit weight vector and reshape it to the shape of the calibration matrix.
-    #     weight = results[0].reshape((nrows, ncols))
-    #     signal_power = np.array(a.multiply(a).sum(axis=0)).reshape((nrows, ncols))
-    #     # Calculate the number of degrees of freedom and the mean squared error.
-    #     dof = self._event_count - (ncols * nrows)
-    #     mse = results[3]**2 / max(dof, 1)
-    #     # Calculate the uncertainty of the gain best-fit values.
-    #     sigma_w = np.sqrt(mse / (signal_power + 1e-15))
-    #     with np.errstate(divide='ignore', invalid='ignore'):
-    #         sigma_g_rel = sigma_w / np.abs(weight)
-    #     # Mask for the pixels that have a weight value close to zero (no events)
-    #     mask = np.abs(weight) > 1e-10
-    #     values = self.cal_matrix.values.copy()
-    #     entries = self.cal_matrix.entries
-    #     # Set the gain value for the pixels that pass the quality cut.
-    #     values[mask] = 1 / weight[mask]
-    #     # Set the entries to zero for the pixels that don't pass the quality cut.
-    #     entries[~mask] = 0
-    #     # Write back through the setter so updates persist on the shared object.
-    #     self.cal_matrix.values = values
-    #     self.cal_matrix.errors = np.where(mask, sigma_g_rel * values, self.cal_matrix.errors)
-    #     self.cal_matrix.entries = entries
-    #     return self.cal_matrix
 
 
 class CalibrateENC:
diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py
index 9061df3..17dd143 100644
--- a/src/hexsample/cli.py
+++ b/src/hexsample/cli.py
@@ -29,6 +29,7 @@
     calibration,
     hexagon,
     logging_,
+    pdf,
     pipeline,
     readout,
     roi,
@@ -435,13 +436,12 @@ def add_calibrate_gain_options(self, parser: argparse.ArgumentParser) -> None:
         """Add an option group for the gain calibration properties.
         """
         defaults = tasks.CalibrationGainDefaults
-        parser.add_argument("energy", type=float, help="line energy in eV")
+        parser.add_argument("pdf", type=pdf.SpectrumPDF.from_file,
+                            help="path to the spectrum PDF file")
         CliArgumentParser.add_cal_noise_file(parser, default=None, required=True)
         CliArgumentParser.add_cal_pedestal_file(parser, default=None, required=True)
         CliArgumentParser.add_zero_sup_threshold(parser,
                             default=defaults.zero_sup_threshold)
-        CliArgumentParser.add_num_events(parser, default=defaults.num_events,
-                            intent="used for the gain calibration")
 
     def add_calibrate_eta_options(self, parser: argparse.ArgumentParser) -> None:
         """Add an option group for the eta function calibration properties.
diff --git a/src/hexsample/pipeline.py b/src/hexsample/pipeline.py
index 5181a18..f2cf0a5 100644
--- a/src/hexsample/pipeline.py
+++ b/src/hexsample/pipeline.py
@@ -115,13 +115,12 @@ def calibrate_gain(**kwargs) -> str:
     """Calibrate the gain of the chip.
     """
     input_file_path = kwargs["input_file"]
-    energy = kwargs["energy"]
-    num_events = kwargs.get("num_events", tasks.CalibrationGainDefaults.num_events)
+    pdf = kwargs["pdf"]
     noise_matrix = kwargs["noise"]
     pedestal_matrix = kwargs["pedestal"]
     zero_sup_threshold = kwargs.get("zero_sup_threshold",
                                     tasks.CalibrationGainDefaults.zero_sup_threshold)
-    args = input_file_path, energy, noise_matrix, pedestal_matrix, num_events, zero_sup_threshold
+    args = input_file_path, pdf, noise_matrix, pedestal_matrix, zero_sup_threshold
     return tasks.calibrate_gain(*args)
 
 
diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index 9e072bb..291d3bf 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -21,7 +21,6 @@
 """
 
 import inspect
-import os
 import pathlib
 from dataclasses import dataclass
 from enum import Enum
@@ -70,9 +69,10 @@
     HexagonalReadoutMode,
     HexagonalReadoutRectangular,
 )
+from .pdf import SpectrumPDF
 from .recon import ReconEvent
 from .sensor import Sensor
-from .source import DiskBeam, Line, Source
+from .source import Source
 from .xpol import chip_descriptor
 
 # Make room for the output data.
@@ -654,16 +654,14 @@ class CalibrationGainDefaults:
     definition in this Python module and the command-line interface.
     """
 
-    num_events: int = 200000
     zero_sup_threshold: int = 20
 
 
 def calibrate_gain(
         input_file_path: str,
-        energy: float,
+        pdf: SpectrumPDF,
         noise_matrix: CalibrationMatrix,
         pedestal_matrix: CalibrationMatrix,
-        num_events: int = CalibrationGainDefaults.num_events,
         zero_sup_threshold: int = CalibrationGainDefaults.zero_sup_threshold
         ) -> str:
     """Calibrate gain of the readout chip using the events from a digi file.
@@ -674,9 +672,8 @@ def calibrate_gain(
     input_file_path : str
         The path to the input file.
 
-    energy : float
-        The energy of the X-ray photons in eV. This is used to convert the charge collected in
-        each pixel to the number of electron, which is necessary for the gain calibration.
+    pdf : SpectrumPDF
+        The spectrum probability density function to use for the gain calibration.
 
     noise_matrix : CalibrationMatrix
         The calibration noise matrix to use for the gain calibration.
@@ -684,9 +681,6 @@ def calibrate_gain(
     pedestal_matrix : CalibrationMatrix
         The pedestal matrix to use for the gain calibration.
 
-    num_events : int
-        The number of events to simulate to correct the bias.
-
     zero_sup_threshold : int
         The zero-suppression threshold to use for the clustering in the gain calibration.
     """
@@ -700,7 +694,7 @@ def calibrate_gain(
         header["pitch"], noise_matrix, unit_gain_map, pedestal_matrix
     readout = create_readout(readout_mode, header, *args)
     # Initialize the gain matrix and run the calibration.
-    gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], energy)
+    gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], pdf)
     clustering = ClusteringNN(readout, zero_sup_threshold=zero_sup_threshold, num_neighbors=6,
                               pos_recon_algorithm="centroid")
     logger.info("Starting the event loop...")
@@ -712,61 +706,9 @@ def calibrate_gain(
         gain_calibration.analyze_cluster(cluster)
     logger.info("Calculating the gain matrix...")
     gain_matrix = gain_calibration.fit()
-    if not np.any(gain_matrix.entries > 0):
-        raise RuntimeError("No valid gain values found during the first step of calibration," \
-        "cannot proceed further. The possible reason could be a small number of events over " \
-        "the analyzed chip region.")
-    # Create the readout object for the simulation. We are using rectangular readout just because
-    # it's faster to simulate, and using a uniform gain matrix with the mean value of the first
-    # calibration to correct the bias in the gain matrix.
-    # To calculate the mean value, we are excluding the outliers by considering only the values
-    # between the 1st and the 99th percentile.
-    # gain_sim = CalibrationMatrix(header["num_cols"], header["num_rows"])
-    # vals = gain_matrix.values
-    # lower_bound, upper_bound = np.nanpercentile(vals, [1, 99])
-    # gain_sim.set_value(np.mean(vals[(vals > lower_bound) & (vals < upper_bound)]))
-    # simulation_readout = HexagonalReadoutRectangular(HexagonalLayout(header["layout"]),
-    #     header["num_cols"], header["num_rows"], header["pitch"],
-    #     enc=noise_matrix, gain=gain_sim, pedestal=pedestal_matrix)
-    # output = HEXSAMPLE_DATA / "_tmp_simulation_bias.h5"
-    # # Simulate events with the best-fit gain matrix to correct the bias.
-    # logger.info("Simulating file to correct the bias...")
-    # simulate(
-    #     source=Source(Line(energy), DiskBeam(radius=0.15)),
-    #     sensor=Sensor(),
-    #     readout=simulation_readout,
-    #     num_events=num_events,
-    #     output_file_path=output)
-    # tmp_input_file = digi_input_file_class("rectangular")(output)
-    # tmp_gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], energy)
-    # # Re-run the gain calibration on the simulated events to calculate the correction factor.
-    # logger.info("Starting the event loop for the simulated file...")
-    # for _, event in tqdm(enumerate(tmp_input_file)):
-    #     try:
-    #         cluster = clustering.run(event)
-    #     except IndexError:
-    #         continue
-    #     tmp_gain_calibration.analyze_cluster(cluster)
-    # logger.info("Calculating the gain matrix from the simulation...")
-    # tmp_gain_matrix = tmp_gain_calibration.fit()
-    # # Calculate the correction factor from the simulation.
-    # logger.info("Calculating the correction factor...")
-    # mask = tmp_gain_matrix.entries > 0
-    # # Calculate the residuals between the MC and calibrated gain matrices from the simulation
-    # # and calculate the mean residual to be used as a correction factor.
-    # residuals = (tmp_gain_matrix.values[mask] - gain_sim.values[mask]) / gain_sim.values[mask]
-    # # Exclude the outliers by considering only the values between the 1st and the 99th percentile.
-    # lower_bound, upper_bound = np.percentile(residuals, [1, 99])
-    # mean_residual = np.mean(residuals[(residuals > lower_bound) & (residuals < upper_bound)])
-    # # Apply the correction factor to the gain matrix and save it to a HDF5 file.
-    # mask_gain = gain_matrix.entries > 0
-    # gain_matrix.values[mask_gain] = gain_matrix.values[mask_gain] / (1 + mean_residual)
     output_file_path = input_file_path.replace(".h5", "_matrix_gain.h5")
-    # logger.info(f"Saving corrected gain matrix to {output_file_path}...")
+    logger.info(f"Saving gain matrix to {output_file_path}...")
     gain_matrix.to_hdf5(output_file_path, CalibrationType.GAIN, False)
-    # Close the input files.
-    # tmp_input_file.close()
-    # os.remove(output)
     input_file.close()
     logger.info("Done!")
     return output_file_path

From 4814bdee557ace646e8035c5672fd18558dce247 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Thu, 7 May 2026 17:06:43 +0200
Subject: [PATCH 05/30] New module pdf (with tests) to interpolate spectra.

---
 src/hexsample/pdf.py | 102 +++++++++++++++++++++++++++++++++++++++++++
 tests/test_pdf.py    |  62 ++++++++++++++++++++++++++
 2 files changed, 164 insertions(+)
 create mode 100644 src/hexsample/pdf.py
 create mode 100644 tests/test_pdf.py

diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py
new file mode 100644
index 0000000..fc89282
--- /dev/null
+++ b/src/hexsample/pdf.py
@@ -0,0 +1,102 @@
+# Copyright (C) 2023--2026 the hexsample team.
+#
+# For the license terms see the file LICENSE, distributed along with this
+# software.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+"""Facilities to create and use probability density functions from spectra."""
+
+import numpy as np
+from scipy.interpolate import CubicSpline
+from scipy.stats import gaussian_kde
+
+
+class SpectrumPDF:
+
+    """Create a probability density function from a spectrum.
+
+    This class uses Gaussian kernel density estimation to create a smooth normalized
+    probability density function (PDF) from a set of values. The PDF is then interpolated
+    to allow for efficient evaluation.
+    """
+
+    def __init__(self) -> None:
+        """Class constructor
+        """
+        self.pdf = None
+
+    def fit(self, vals: np.ndarray) -> None:
+        """Fit the data to create the PDF.
+
+        Arguments
+        ---------
+        vals : np.ndarray
+            The values to fit the PDF from.
+        """
+        # Use Gaussian kernel density estimation to create a smooth PDF.
+        kde = gaussian_kde(vals)
+        # Create a grid of points for interpolation. We need to ensure that the grid exceeds
+        # the range of the data to avoid issues with extrapolation.
+        x_grid = np.linspace(min(vals)*0.9, max(vals)*1.1, 1000)
+        y_grid = kde(x_grid)
+        self.pdf = CubicSpline(x_grid, y_grid, extrapolate=True)
+
+    def mean(self) -> float:
+        """Return the mean of the PDF.
+        """
+        if self.pdf is None:
+            raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.")
+        # The location of the PDF can be estimated as the mean of the distribution.
+        x_grid = np.linspace(min(self.pdf.x), max(self.pdf.x), 1000)
+        y_grid = self.pdf(x_grid)
+        area = np.trapezoid(y_grid, x_grid)
+        normalized_pdf = y_grid / area
+        mean = np.sum(x_grid * normalized_pdf) / np.sum(normalized_pdf)
+        return mean
+
+    def to_file(self, file_path: str) -> str:
+        """Save the PDF to a file as a numpy archive.
+        
+        Arguments
+        ---------
+        file_path : str
+            The path to the file where the PDF will be saved.
+        """
+        np.savez(file_path, x=self.pdf.x, c=self.pdf.c)
+        return file_path
+
+    @classmethod
+    def from_file(cls, file_path: str) -> "SpectrumPDF":
+        """Load the PDF from a numpy archive.
+        
+        Arguments
+        ---------
+        file_path : str
+            The path to the file from which the PDF will be loaded.
+        """
+        if not file_path.endswith(".npz"):
+            raise ValueError("The file must be a numpy archive with .npz extension.")
+        data = np.load(file_path)
+        instance = cls()
+        instance.pdf = CubicSpline.construct_fast(data["c"], data["x"], extrapolate=True)
+        return instance
+
+    def __call__(self, x: np.ndarray) -> np.ndarray:
+        """Evaluate the PDF at the given points.
+        """
+        if self.pdf is None:
+            raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.")
+        return self.pdf(x)
diff --git a/tests/test_pdf.py b/tests/test_pdf.py
new file mode 100644
index 0000000..3f7217e
--- /dev/null
+++ b/tests/test_pdf.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2023--2026 the hexsample team.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+"""Test suite for hexsample.pdf
+"""
+
+import numpy as np
+from aptapy.plotting import plt
+
+from hexsample import rng
+from hexsample.pdf import SpectrumPDF
+
+rng.initialize()
+
+def test_unimodal_spectrum_pdf(size: int = 10000):
+    """Test the SpectrumPDF class with a unimodal distribution.
+    """
+    # Create a unimodal distribution.
+    vals = rng.generator.normal(loc=1.0, scale=0.1, size=size)
+    # Create the PDF and fit it to the data.
+    pdf = SpectrumPDF()
+    pdf.fit(vals)
+    # Check that the mean of the PDF is close to the mean of the data.
+    assert np.isclose(pdf.mean(), vals.mean(), atol=0.01)
+    # Plot the pdf
+    plt.figure("Unimodal distribution")
+    xx = np.linspace(vals.min()*0.8, vals.max()*1.2, 1000)
+    plt.plot(xx, pdf(xx), label="PDF")
+    plt.hist(vals, bins=50, density=True, alpha=0.5, label="Data")
+    plt.legend()
+
+
+def test_bimodal_spectrum_pdf(size: int = 100000):
+    """Test the SpectrumPDF class with a bimodal distribution.
+    """
+    # Extract values from a bimodal distribution.
+    vals = rng.generator.choice((1., 2.), p=[0.7, 0.3], size=size)
+    # Add some noise
+    vals += rng.generator.normal(scale=0.2, size=size)
+    # Create the PDF and fit it to the data.
+    pdf = SpectrumPDF()
+    pdf.fit(vals)
+    # Check that the mean of the PDF is close to the mean of the data.
+    assert np.isclose(pdf.mean(), vals.mean(), atol=0.01)
+    # Plot the pdf
+    plt.figure("Bimodal distribution")
+    xx = np.linspace(vals.min()*0.8, vals.max()*1.2, 1000)
+    plt.plot(xx, pdf(xx), label="PDF")
+    plt.hist(vals, bins=50, density=True, alpha=0.5, label="Data")
+    plt.legend()

From 5f26af17c748e898fc218bd737e0d1f7ceaa4ebd Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Thu, 7 May 2026 18:22:55 +0200
Subject: [PATCH 06/30] New calibspec command added.

---
 src/hexsample/cli.py      | 8 ++++++++
 src/hexsample/pipeline.py | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py
index 17dd143..d556f3a 100644
--- a/src/hexsample/cli.py
+++ b/src/hexsample/cli.py
@@ -154,6 +154,14 @@ def __init__(self) -> None:
         self.add_logging_level(synthesize)
         synthesize.set_defaults(runner=pipeline.synthesize_calibration_file)
 
+        # Fit a spectrum to generate a PDF for the gain calibration?
+        calibspec = subparsers.add_parser("calibspec",
+            help="generate a pdf from a spectrum to use in the gain calibration",
+            formatter_class=self._FORMATTER_CLASS)
+        self.add_input_file(calibspec)
+        self.add_logging_level(calibspec)
+        calibspec.set_defaults(runner=pipeline.calibspec)
+        
         # Inspect a calibration matrix?
         calibview = subparsers.add_parser("calibview",
             help="inspect a calibration matrix",
diff --git a/src/hexsample/pipeline.py b/src/hexsample/pipeline.py
index f2cf0a5..3a5a11f 100644
--- a/src/hexsample/pipeline.py
+++ b/src/hexsample/pipeline.py
@@ -69,6 +69,14 @@ def reconstruct(**kwargs) -> str:
     return tasks.reconstruct(*args, kwargs)
 
 
+def calibspec(**kwargs) -> str:
+    """Create a probability density function from a reconstructed spectrum
+    to use in the gain calibration.
+    """
+    input_file_path = kwargs["input_file"]
+    return tasks.calibspec(input_file_path)
+
+
 def calibrate_eta(**kwargs) -> None:
     """Calibrate the eta function using the events from a digi file.
     """

From aef6a7e125f52a333b18527e7455b95fad2a85a8 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Thu, 7 May 2026 18:23:15 +0200
Subject: [PATCH 07/30] New calibspec command added.

---
 src/hexsample/tasks.py | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index 291d3bf..ae73c35 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -62,6 +62,7 @@
 from .hexagon import HexagonalGrid, HexagonalLayout
 from .logging_ import logger
 from .mc import PhotonList
+from .pdf import SpectrumPDF
 from .readout import (
     AbstractReadout,
     HexagonalReadoutBase,
@@ -69,7 +70,6 @@
     HexagonalReadoutMode,
     HexagonalReadoutRectangular,
 )
-from .pdf import SpectrumPDF
 from .recon import ReconEvent
 from .sensor import Sensor
 from .source import Source
@@ -358,6 +358,31 @@ def reconstruct(
     return output_file_path
 
 
+def calibspec(
+        input_file_path: str
+        ) -> str:
+    """Create a probability density function from a reconstructed spectrum to use in
+    the gain calibration.
+
+    Arguments
+    ----------
+    input_file_path : str
+        The path to the input recon file.
+    """
+    name, args = current_call(1)
+    logger.info(f"Running {__name__}.{name} with arguments {args}...")
+    input_file = ReconInputFile(input_file_path)
+    energy = input_file.column("energy")
+    input_file.close()
+    pdf = SpectrumPDF()
+    logger.info("Fitting the PDF to the energy distribution...")
+    pdf.fit(energy)
+    output_file_path = input_file_path.replace(".h5", ".npz")
+    logger.info(f"Saving the PDF to {output_file_path}...")
+    pdf.to_file(output_file_path)
+    return output_file_path
+
+
 @dataclass(frozen=True)
 class CalibrationEtaDefaults:
 
@@ -803,7 +828,7 @@ def quicklook(input_file_path: str) -> None:
         The path to the input recon file.
     """
     # Open the input file
-    name, args = current_call()
+    name, args = current_call(1)
     logger.info(f"Running {__name__}.{name} with arguments {args}...")
     input_file = ReconInputFile(input_file_path)
     # Plotting the reconstructed energy and the true energy
@@ -890,7 +915,7 @@ def calibview(
         The upper quantile of the values in the matrix to be included in the statistics.
     """
     # pylint: disable=too-many-statements
-    name, args = current_call()
+    name, args = current_call(1)
     logger.info(f"Running {__name__}.{name} with arguments {args}...")
     logger.info("Matrix metadata:")
     # Log the metadata of the matrix.
@@ -940,7 +965,8 @@ def calibview(
         mc_edges = np.linspace(np.nanmin(mc_vals), np.nanmax(mc_vals), 100)
         # If Monte Carlo distribution is uniform, we need to modify the edges
         if mc_edges[0] == mc_edges[-1]:
-            mc_edges = np.linspace(mc_edges[0] - mc_edges[0]*0.1, mc_edges[-1] + mc_edges[-1]*0.1, 100)
+            val = mc_edges[0]
+            mc_edges = np.linspace(val*0.9, val*1.1, 100)
         mc_vals_hist = Histogram1d(mc_edges, label="MC Distribution", xlabel=mc_unit).fill(mc_vals)
         plt.figure("Distribution of Monte Carlo truth values")
         mc_vals_hist.plot(statistics=True)

From d2eb7367519e6670fc620880879f831f9017f1a8 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Thu, 7 May 2026 18:34:31 +0200
Subject: [PATCH 08/30] Minor.

---
 src/hexsample/pdf.py | 14 ++++++++++++++
 tests/test_pdf.py    | 15 +++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py
index fc89282..bee5baa 100644
--- a/src/hexsample/pdf.py
+++ b/src/hexsample/pdf.py
@@ -93,6 +93,20 @@ def from_file(cls, file_path: str) -> "SpectrumPDF":
         instance = cls()
         instance.pdf = CubicSpline.construct_fast(data["c"], data["x"], extrapolate=True)
         return instance
+    
+    def derivative(self, x: np.ndarray, order: int = 1) -> np.ndarray:
+        """Evaluate the derivative of the PDF at the given points.
+
+        Arguments
+        ---------
+        x : np.ndarray
+            The points at which to evaluate the derivative.
+        order : int, optional
+            The order of the derivative to evaluate (default is 1).
+        """
+        if self.pdf is None:
+            raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.")
+        return self.pdf(x, nu=order)
 
     def __call__(self, x: np.ndarray) -> np.ndarray:
         """Evaluate the PDF at the given points.
diff --git a/tests/test_pdf.py b/tests/test_pdf.py
index 3f7217e..b6a12a6 100644
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -60,3 +60,18 @@ def test_bimodal_spectrum_pdf(size: int = 100000):
     plt.plot(xx, pdf(xx), label="PDF")
     plt.hist(vals, bins=50, density=True, alpha=0.5, label="Data")
     plt.legend()
+
+def test_derivative(size: int = 100000):
+    """Test the derivative of the PDF.
+    """
+    # Create a unimodal distribution.
+    vals = rng.generator.normal(loc=1.0, scale=0.1, size=size)
+    # Create the PDF and fit it to the data.
+    pdf = SpectrumPDF()
+    pdf.fit(vals)
+    derivative = pdf.derivative
+    # Check that the derivative of the PDF is close to zero at the mean of the data.
+    plt.figure("Derivative of a gaussian PDF")
+    xx = np.linspace(vals.min()*0.8, vals.max()*1.2, 10000)
+    plt.plot(xx, derivative(xx))
+    plt.legend()

From e1d0fc949bb3318adde65de2626f4aa1ac2cc7be Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Thu, 7 May 2026 18:48:09 +0200
Subject: [PATCH 09/30] Minor.

---
 pyproject.toml               |  1 +
 src/hexsample/calibration.py | 53 ++++++++++++------------------------
 2 files changed, 18 insertions(+), 36 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 57db12f..4ae94c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
     "xraydb",
     "aptapy>=0.18.0",
     "scikit-image",
+    "iminuit",
   ]
 
 [project.optional-dependencies]
diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index cc988a7..dbaec59 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -23,14 +23,14 @@
 import pathlib
 from enum import Enum
 from itertools import product
-from typing import Tuple, Optional
-from tqdm import tqdm
+from typing import Optional, Tuple
 
 import h5py
 import numpy as np
 from aptapy.hist import Histogram3d
-from scipy.sparse import csr_matrix
 from iminuit import Minuit
+from scipy.sparse import csr_matrix
+from tqdm import tqdm
 
 from .clustering import Cluster
 from .digi import DigiEventRectangular
@@ -620,7 +620,7 @@ def __init__(self, num_cols: int, num_rows: int, pdf: SpectrumPDF) -> None:
         self._coords = []
         self._event_rows = []
         self._pdf = pdf
-        self._derivative = pdf.pdf.derivative(1)
+        self._pdf_derivative = pdf.derivative
 
     def analyze_cluster(self, cluster: Cluster) -> None:
         """Analyze the event cluster to update the calibration matrix.
@@ -703,36 +703,17 @@ def nll(pars):
             total_adc = data @ pars
             p = self._pdf(total_adc * conv_factor)
             return -np.sum(np.log(p + 1e-10))
-
-        def nll_with_grad(pars):
-            # kept for compatibility/testing: returns (val, grad) for an array input
-            energy = (data @ pars) * conv_factor
-            p = self._pdf(energy)
-            dp = self._derivative(energy)
-            p_safe = np.maximum(p, 1e-12)
-            val = -np.sum(np.log(p_safe))
-            common_term = -(dp / p_safe) * conv_factor
-            grad = np.asarray(data.T @ common_term).ravel()
-            return val, grad
-
-        # Create scalar-callable wrappers that accept positional params as required by
-        # iminuit. These convert to numpy arrays internally and return the scalar NLL
-        # and a tuple gradient respectively.
-        def nll_scalar(*params):
-            pars = np.asarray(params, dtype=float)
-            return nll(pars)
-
-        def grad_scalar(*params):
-            pars = np.asarray(params, dtype=float)
-            # reuse the analytic gradient computation
-            _, grad = nll_with_grad(pars)
-            return tuple(float(g) for g in grad)
-
+        # Define the gradient of the log-likelihood function for the fit.
+        def nll_grad(pars):
+            total_adc = data @ pars
+            p = self._pdf(total_adc * conv_factor)
+            dp = self._pdf_derivative(total_adc * conv_factor)
+            grad = -data.T @ (dp / (p + 1e-10)) * conv_factor
+            return np.asarray(grad).flatten()
         # Define the initial parameters for the fit.
         init_pars = np.ones(data.shape[1])
-        # Initialize the Minuit minimizer with an explicit gradient callable.
-        # Pass starting values as positional args (unpack the numpy array).
-        m = Minuit(nll_scalar, *init_pars.tolist(), grad=grad_scalar)
+        # Initialize the Minuit minimizer.
+        m = Minuit(nll, init_pars, grad=nll_grad)
         m.limits = [(1e-10, None) for _ in range(len(init_pars))]
         m.errordef = Minuit.LIKELIHOOD
         m.migrad()
@@ -753,7 +734,7 @@ def fit(self) -> CalibrationMatrix:
         """
         # Create a sparse matrix where the rows correspond to the events and the columns
         # correspond to the pixels. In each row, the non-zero entries correspond to the
-        # pha values of the pixels that are hit in the event. 
+        # pha values of the pixels that are hit in the event.
         nrows, ncols = self.cal_matrix.shape
         shape = (self._event_count, nrows * ncols)
         data = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape)
@@ -761,7 +742,7 @@ def fit(self) -> CalibrationMatrix:
         # cuts in the fit, and to calculate the mean ADC count in an event.
         event_sum = np.array(data.sum(axis=1)).flatten()
         # Calculate the mean of the ADC counts in an event.
-        conv_factor = self._pdf.loc() / event_sum.mean()
+        conv_factor = self._pdf.mean() / event_sum.mean()
         # Define the size of the blocks to fit contemporarily.
         size = 10
         # Calculate the starting column and row indices for the blocks. We are using an
@@ -778,8 +759,8 @@ def fit(self) -> CalibrationMatrix:
         # Start the loop over the blocks
         pbar = tqdm(block_indices, desc="Fitting chip subregions", miniters=5)
         for r_start, c_start in pbar:
-            # Calculate the end row and column indices, taking into account the possibility of being at
-            # the end of the chip.
+            # Calculate the end row and column indices, taking into account the possibility
+            # of being at the end of the chip.
             r_end = min(r_start + size, nrows)
             c_end = min(c_start + size, ncols)
             # Create a meshgrid to get all the coordinates of the pixels in the block.

From dc8b1649abbd70389f071e0da019b62ee18a7bc7 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Fri, 8 May 2026 09:50:27 +0200
Subject: [PATCH 10/30] Added plot method.

---
 src/hexsample/pdf.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py
index bee5baa..7a08cb3 100644
--- a/src/hexsample/pdf.py
+++ b/src/hexsample/pdf.py
@@ -19,7 +19,10 @@
 
 """Facilities to create and use probability density functions from spectra."""
 
+from typing import Tuple
+
 import numpy as np
+from aptapy.plotting import plt
 from scipy.interpolate import CubicSpline
 from scipy.stats import gaussian_kde
 
@@ -108,6 +111,27 @@ def derivative(self, x: np.ndarray, order: int = 1) -> np.ndarray:
             raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.")
         return self.pdf(x, nu=order)
 
+    def plot(self, xlim: Tuple[float, float] = None, **kwargs) -> None:
+        """Plot the PDF in a given range.
+
+        Arguments
+        ---------
+        xlim : tuple[float, float], optional
+            The range of x values to plot (default is None, which means the fitting range
+            of the PDF).
+        """
+        plt.figure(kwargs.get("figname", "spectrum_pdf"))
+        if xlim is None:
+            xlim = (min(self.pdf.x), max(self.pdf.x))
+        x_grid = np.linspace(xlim[0], xlim[1], 1000)
+        y_grid = self.pdf(x_grid)
+        plt.plot(x_grid, y_grid, **kwargs)
+        plt.xlabel(kwargs.get("xlabel", "Energy [eV]"))
+        plt.ylabel(kwargs.get("ylabel", "PDF"))
+        plt.xlim(xlim)
+        plt.grid(visible=True, linestyle="--", alpha=0.5, color="gray")
+        plt.tight_layout()
+
     def __call__(self, x: np.ndarray) -> np.ndarray:
         """Evaluate the PDF at the given points.
         """

From c70248e0b6f13d6ab3ff6aab58b0b338c4266024 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Fri, 8 May 2026 11:24:30 +0200
Subject: [PATCH 11/30] Minor.

---
 src/hexsample/clustering.py | 2 ++
 src/hexsample/tasks.py      | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py
index 9fe2eda..268d1e1 100644
--- a/src/hexsample/clustering.py
+++ b/src/hexsample/clustering.py
@@ -323,6 +323,8 @@ def run(self, event) -> Cluster:
         # pylint: disable = invalid-name
         elif isinstance(event, DigiEventRectangular):
             seed_col, seed_row = event.highest_pixel()
+            if readout.is_at_border(seed_col, seed_row):
+                return None
             col = [seed_col]
             row = [seed_row]
             for _col, _row in readout.neighbors(seed_col, seed_row):
diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index ae73c35..d8265d9 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -728,6 +728,8 @@ def calibrate_gain(
             cluster = clustering.run(event)
         except IndexError:
             continue
+        if cluster is None:
+            continue
         gain_calibration.analyze_cluster(cluster)
     logger.info("Calculating the gain matrix...")
     gain_matrix = gain_calibration.fit()

From 98162722dbf49c38dcc22d5893b247e61aa34d8e Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Fri, 8 May 2026 15:50:41 +0200
Subject: [PATCH 12/30] Parallelization implemented for gain calibration.

---
 src/hexsample/calibration.py | 277 +++++++++++++++++++++--------------
 1 file changed, 163 insertions(+), 114 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index dbaec59..4037495 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -20,6 +20,7 @@
 """Calibration facilities.
 """
 
+import gc
 import pathlib
 from enum import Enum
 from itertools import product
@@ -29,7 +30,8 @@
 import numpy as np
 from aptapy.hist import Histogram3d
 from iminuit import Minuit
-from scipy.sparse import csr_matrix
+from joblib import Parallel, delayed
+from scipy.sparse import csc_matrix, csr_matrix
 from tqdm import tqdm
 
 from .clustering import Cluster
@@ -595,6 +597,129 @@ def fit(self) -> Tuple[CalibrationMatrix, CalibrationMatrix]:
         return self.noise_cal, self.pedestal_cal
 
 
+def _likelihood_fit(data: csr_matrix, conv_factor: float, pdf: SpectrumPDF,
+                    pdf_derivative: callable) -> Optional[Tuple[np.ndarray, np.ndarray]]:
+    """Perform the likelihood fit for a region of the detector.
+
+    Returns
+    -------
+    gain : np.ndarray
+        The gain values for the pixels in the region, calculated from the fit.
+    error : np.ndarray
+        The error on the gain values for the pixels in the region, calculated from the fit.
+    """
+    # Define the negative log-likelihood function for the fit.
+    def nll(pars):
+        total_adc = data @ pars
+        p = pdf(total_adc * conv_factor)
+        return -np.sum(np.log(p + 1e-10))
+    # Define the gradient of the log-likelihood function for the fit.
+    def nll_grad(pars):
+        total_adc = data @ pars
+        p = pdf(total_adc * conv_factor)
+        dp = pdf_derivative(total_adc * conv_factor)
+        grad = -data.T @ (dp / (p + 1e-10)) * conv_factor
+        return np.asarray(grad).flatten()
+    # Define the initial parameters for the fit.
+    init_pars = np.ones(data.shape[1])
+    # Initialize the Minuit minimizer.
+    m = Minuit(nll, init_pars, grad=nll_grad)
+    m.limits = [(1e-10, None) for _ in range(len(init_pars))]
+    m.errordef = Minuit.LIKELIHOOD
+    m.migrad()
+    # If the fit is successful, return the gain values and their errors, otherwise
+    # return None.
+    if m.valid:
+        gain = 1 / np.array(m.values)
+        error = m.errors / np.array(m.values)**2
+        return gain, error
+    return None
+
+
+def _cut_data(data: csc_matrix, ncols: int, cols: np.ndarray, rows: np.ndarray,
+                event_sum: np.ndarray) -> Tuple[csr_matrix, np.ndarray]:
+    """Cut the data to select only the events that have all the charge contained
+    in the pixels of the subregion defined by the input column and row coordinates.
+    This is done to ensure that the fit is performed only on events that are fully
+    contained in the subregion.
+
+    Arguments
+    ---------
+    data : csr_matrix
+        The sparse matrix containing the pha values for the events in the entire detector.
+    cols : np.ndarray
+        The column coordinates of the pixels in the subregion.
+    rows : np.ndarray
+        The row coordinates of the pixels in the subregion.
+    event_sum : np.ndarray
+        The total ADC count for each event.
+
+    Returns
+    -------
+    data_active_pixels : csr_matrix
+        The sparse matrix containing the pha values for the events in the subregion, with
+        only the active pixels.
+    mask : np.ndarray
+        The boolean mask indicating which pixels in the subregion are active.
+    """
+    # Calculate the index of the pixels in the flattened array to select the
+    # correct columns of the sparse matrix.
+    pixels_idxs = (rows * ncols + cols).astype(int)
+    # Select all the events but only the columns corresponding to the pixels
+    # in the subregion defined by the indices.
+    data_region = data[:, pixels_idxs]
+    # Re-convert to CSR format for efficient row slicing.
+    data_region = data_region.tocsr()
+    # Select only the events in the subregion where all the charge is contained
+    # in the pixels of the subregion. Add also another cut to select only the
+    # events with total charge above 0 ADC, just to be sure to remove empty events.
+    sub_region_event_sum = data_region.sum(axis=1).A1
+    mask = (sub_region_event_sum == event_sum) & (sub_region_event_sum > 0)
+    data_good_events = data_region[mask]
+    # We want to remove the pixels in the subregion that never got hit, since
+    # only pixels with signal can be fitted. This operation ensures that the
+    # number of fit parameters is equal to the number of active pixels.
+    pixel_sum = data_good_events.sum(axis=0).A1
+    active_mask = pixel_sum > 0
+    data_active_pixels = data_good_events[:, active_mask]
+    return data_active_pixels, active_mask
+
+
+def worker_fit_block(r_start: int, c_start: int, size: int, nrows: int, ncols: int,
+                     data: csc_matrix, event_sum: np.ndarray, conv_factor: float,
+                     pdf: SpectrumPDF, pdf_derivative: callable
+                     ) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]:
+    """Method to perform the fit for a block of pixels in the detector.
+    This method is meant to be used as a function to be executed in parallel for different
+    blocks of pixels.
+
+    .. note:: This function must be defined outside the class to be picklable and usable
+            in parallel execution.
+    """
+    # Calculate the end row and column indices, taking into account the possibility
+    # of being at the end of the chip.
+    r_end = min(r_start + size, nrows)
+    c_end = min(c_start + size, ncols)
+    # Create a meshgrid to get all the coordinates of the pixels in the block.
+    cols, rows = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end))
+    cols = cols.flatten()
+    rows = rows.flatten()
+    # Cut the data to select only the events that have signal in the block.
+    data_fit, active_mask = _cut_data(data, ncols, cols, rows, event_sum)
+    # Check on the data: if there are no good events or no active pixels,
+    # we cannot perform the fit, so we return None.
+    if data_fit.nnz == 0 or data_fit.shape[1] == 0:
+        return None
+    # Fit the data of the block.
+    result = _likelihood_fit(data_fit, conv_factor, pdf, pdf_derivative)
+    # If the fit is not successful, return None and continue with the next block.
+    if result is None:
+        return None
+    # If the fit is successful, return the gain values and their errors and the
+    # coordinates of the active pixels.
+    return result[0], result[1], rows[active_mask], cols[active_mask]
+
+
 class CalibrateGain(CalibrateBase):
 
     """Calibrate the gain of the detector by analyzing the events in a DigiFile. This class takes a
@@ -630,10 +755,10 @@ def analyze_cluster(self, cluster: Cluster) -> None:
         rows = cluster.row
         # Update the arrays for the least squares fit.
         self._pha.extend(cluster.pha)
+        ncols = self.cal_matrix.shape[1]
         for col, row in zip(cols, rows):
             # Calculate the index of the pixel in the flattened array
-            i = row * self.cal_matrix.shape[1] + col
-            self._coords.append(i)
+            self._coords.append(row * ncols + col)
             self._event_rows.append(self._event_count)
             # Update the matrix with the number of events for each pixel
             self.cal_matrix.entries[row, col] += 1
@@ -641,89 +766,6 @@ def analyze_cluster(self, cluster: Cluster) -> None:
         self._event_count += 1
         self.cal_matrix.num_events += 1
 
-    def _cut_data(self, data: csr_matrix, cols: np.ndarray, rows: np.ndarray,
-                  event_sum: np.ndarray) -> Tuple[csr_matrix, np.ndarray]:
-        """Cut the data to select only the events that have all the charge contained
-        in the pixels of the subregion defined by the input column and row coordinates.
-        This is done to ensure that the fit is performed only on events that are fully
-        contained in the subregion.
-
-        Arguments
-        ---------
-        data : csr_matrix
-            The sparse matrix containing the pha values for the events in the entire detector.
-        cols : np.ndarray
-            The column coordinates of the pixels in the subregion.
-        rows : np.ndarray
-            The row coordinates of the pixels in the subregion.
-        event_sum : np.ndarray
-            The total ADC count for each event.
-
-        Returns
-        -------
-        data_active_pixels : csr_matrix
-            The sparse matrix containing the pha values for the events in the subregion, with
-            only the active pixels.
-        mask : np.ndarray
-            The boolean mask indicating which pixels in the subregion are active.
-        """
-        # Calculate the index of the pixels in the flattened array to select the
-        # correct columns of the sparse matrix.
-        pixels_idxs = (rows * self.cal_matrix.shape[1] + cols).astype(int)
-        # Select all the events but only the columns corresponding to the pixels
-        # in the subregion defined by the indices.
-        data_region = data[:, pixels_idxs]
-        # Select only the events in the subregion where all the charge is contained
-        # in the pixels of the subregion. Add also another cut to select only the
-        # events with total charge above 0 ADC, just to be sure to remove empty events.
-        sub_region_event_sum = np.array(data_region.sum(axis=1)).flatten()
-        mask = (sub_region_event_sum == event_sum) & (sub_region_event_sum > 0)
-        data_good_events = data_region[mask]
-        # We want to remove the pixels in the subregion that never got hit, since
-        # only pixels with signal can be fitted. This operation ensures that the
-        # number of fit parameters is equal to the number of active pixels.
-        pixel_sum = np.array(data_good_events.sum(axis=0)).flatten()
-        active_mask = pixel_sum > 0
-        data_active_pixels = data_good_events[:, active_mask]
-        return data_active_pixels, active_mask
-
-    def _likelihood_fit(self, data: csr_matrix, conv_factor: float
-                        ) -> Optional[Tuple[np.ndarray, np.ndarray]]:
-        """Perform the likelihood fit for a region of the detector.
-
-        Returns
-        -------
-        gain : np.ndarray
-            The gain values for the pixels in the region, calculated from the fit.
-        error : np.ndarray
-            The error on the gain values for the pixels in the region, calculated from the fit.
-        """
-        # Define the negative log-likelihood function for the fit.
-        def nll(pars):
-            total_adc = data @ pars
-            p = self._pdf(total_adc * conv_factor)
-            return -np.sum(np.log(p + 1e-10))
-        # Define the gradient of the log-likelihood function for the fit.
-        def nll_grad(pars):
-            total_adc = data @ pars
-            p = self._pdf(total_adc * conv_factor)
-            dp = self._pdf_derivative(total_adc * conv_factor)
-            grad = -data.T @ (dp / (p + 1e-10)) * conv_factor
-            return np.asarray(grad).flatten()
-        # Define the initial parameters for the fit.
-        init_pars = np.ones(data.shape[1])
-        # Initialize the Minuit minimizer.
-        m = Minuit(nll, init_pars, grad=nll_grad)
-        m.limits = [(1e-10, None) for _ in range(len(init_pars))]
-        m.errordef = Minuit.LIKELIHOOD
-        m.migrad()
-        # If the fit is successful, return the gain values and their errors.
-        if m.valid:
-            gain = 1 / np.array(m.values)
-            error = m.errors / np.array(m.values)**2
-            return gain, error
-        return None
-
     def fit(self) -> CalibrationMatrix:
         """Fit the collected events to determine the gain of each pixel.
 
@@ -737,14 +779,23 @@ def fit(self) -> CalibrationMatrix:
         # pha values of the pixels that are hit in the event.
         nrows, ncols = self.cal_matrix.shape
         shape = (self._event_count, nrows * ncols)
-        data = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape)
+        # Convert the lists to numpy array of int16 to save memory.
+        data_pha = np.array(self._pha, dtype=np.int16)
+        data_coords = np.array(self._coords, dtype=np.int32)
+        data_event_rows = np.array(self._event_rows, dtype=np.int32)
+        # Empty the lists to free memory.
+        self._pha = []
+        self._coords = []
+        self._event_rows = []
+        # Create the sparse matrix with the collected data.
+        data_csr = csr_matrix((data_pha, (data_event_rows, data_coords)), shape=shape)
         # Calculate the sum of the ADC counts in each event, which is used for the data
         # cuts in the fit, and to calculate the mean ADC count in an event.
-        event_sum = np.array(data.sum(axis=1)).flatten()
+        event_sum = data_csr.sum(axis=1).A1
         # Calculate the mean of the ADC counts in an event.
         conv_factor = self._pdf.mean() / event_sum.mean()
         # Define the size of the blocks to fit contemporarily.
-        size = 10
+        size = 6
         # Calculate the starting column and row indices for the blocks. We are using an
         # overlap of 2 pixels between the blocks to ensure that pixels on the edges are
         # correctly calibrated.
@@ -756,35 +807,33 @@ def fit(self) -> CalibrationMatrix:
         # results, since some pixels are fitted multiple times due to the overlap.
         weighted_gains = np.zeros((nrows, ncols))
         sum_weights = np.zeros((nrows, ncols))
-        # Start the loop over the blocks
-        pbar = tqdm(block_indices, desc="Fitting chip subregions", miniters=5)
-        for r_start, c_start in pbar:
-            # Calculate the end row and column indices, taking into account the possibility
-            # of being at the end of the chip.
-            r_end = min(r_start + size, nrows)
-            c_end = min(c_start + size, ncols)
-            # Create a meshgrid to get all the coordinates of the pixels in the block.
-            cols, rows = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end))
-            cols = cols.flatten()
-            rows = rows.flatten()
-            # Cut the data to select only the events that have signal in the block.
-            block_data, mask = self._cut_data(data, cols, rows, event_sum)
-            # Check on the data: if there are no good events or no active pixels,
-            # we cannot perform the fit, so we return None.
-            if block_data.nnz == 0 or block_data.shape[1] == 0:
-                continue
-            # Fit the data for the block.
-            results = self._likelihood_fit(block_data, conv_factor)
-            # If the fit is not successful, continue to the next block.
-            if results is None:
+        # Convert the CSR matrix to CSC format to optimize column slicing during data cuts.
+        data_csc = data_csr.tocsc()
+        # Free the memory used by the CSR matrix and temporary arrays.
+        del data_csr, data_pha, data_coords, data_event_rows
+        gc.collect()
+        # Start the parallel processing of the blocks.
+        args = (size, nrows, ncols, data_csc, event_sum, conv_factor,
+                self._pdf, self._pdf_derivative)
+        bar_format = "{desc}: {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]"
+        results = Parallel(n_jobs=-1, backend="loky")(
+            delayed(worker_fit_block)(
+                r_start, c_start, *args) 
+                for r_start, c_start in tqdm(block_indices, total=len(block_indices),
+                                             bar_format=bar_format)
+            )
+        # Now we need to aggregate the results from the blocks.
+        for _result in results:
+            # If the block is empty or the fit is not successful, move to the next block.
+            if _result is None:
                 continue
-            gain, error = results
+            # Unpack the results from the block fit.
+            gain, error, rows, cols = _result
             weights = 1 / (error**2 + 1e-10)
             # Use the mask of the active pixels to update the weighted gains and the sum
             # of the weights matrices for the pixels in the block that have been fitted.
-            np.add.at(weighted_gains, (rows[mask], cols[mask]), gain * weights)
-            np.add.at(sum_weights, (rows[mask], cols[mask]), weights)
-        pbar.close()
+            np.add.at(weighted_gains, (rows, cols), gain * weights)
+            np.add.at(sum_weights, (rows, cols), weights)
         # Calculate the final values and errors as the weighted average of the fit results.
         values = np.divide(weighted_gains, sum_weights,
                            out=np.full_like(weighted_gains, np.nan),

From 6b1eabfb9f9c26061e4ed2e8793282eaeeba8f6a Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Fri, 8 May 2026 15:52:37 +0200
Subject: [PATCH 13/30] Updating dependencies.

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 4ae94c0..726a8db 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,6 +45,7 @@ dependencies = [
     "aptapy>=0.18.0",
     "scikit-image",
     "iminuit",
+    "joblib",
   ]
 
 [project.optional-dependencies]

From 8e9ec8f88e050989fef9fef4ab7a8ac54e683f17 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Fri, 8 May 2026 15:52:47 +0200
Subject: [PATCH 14/30] Linting.

---
 src/hexsample/calibration.py | 2 +-
 src/hexsample/cli.py         | 2 +-
 src/hexsample/pdf.py         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index 4037495..ced1f5a 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -818,7 +818,7 @@ def fit(self) -> CalibrationMatrix:
         bar_format = "{desc}: {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]"
         results = Parallel(n_jobs=-1, backend="loky")(
             delayed(worker_fit_block)(
-                r_start, c_start, *args) 
+                r_start, c_start, *args)
                 for r_start, c_start in tqdm(block_indices, total=len(block_indices),
                                              bar_format=bar_format)
             )
diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py
index d556f3a..9597541 100644
--- a/src/hexsample/cli.py
+++ b/src/hexsample/cli.py
@@ -161,7 +161,7 @@ def __init__(self) -> None:
         self.add_input_file(calibspec)
         self.add_logging_level(calibspec)
         calibspec.set_defaults(runner=pipeline.calibspec)
-        
+
         # Inspect a calibration matrix?
         calibview = subparsers.add_parser("calibview",
             help="inspect a calibration matrix",
diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py
index 7a08cb3..02887fc 100644
--- a/src/hexsample/pdf.py
+++ b/src/hexsample/pdf.py
@@ -96,7 +96,7 @@ def from_file(cls, file_path: str) -> "SpectrumPDF":
         instance = cls()
         instance.pdf = CubicSpline.construct_fast(data["c"], data["x"], extrapolate=True)
         return instance
-    
+
     def derivative(self, x: np.ndarray, order: int = 1) -> np.ndarray:
         """Evaluate the derivative of the PDF at the given points.
 

From ad5909b04b0f3ccfff075ad53170c6992049f19c Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Fri, 8 May 2026 16:29:31 +0200
Subject: [PATCH 15/30] Minor.

---
 src/hexsample/calibration.py | 42 ++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index ced1f5a..69ba188 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -627,6 +627,15 @@ def nll_grad(pars):
     m.limits = [(1e-10, None) for _ in range(len(init_pars))]
     m.errordef = Minuit.LIKELIHOOD
     m.migrad()
+    # If the first fit is not successful, try to perform a second fit. We are not passing
+    # the gradient, because the fit should be more robust (but slower).
+    if not m.valid:
+        print("Retrying fit without gradient...")
+        m = Minuit(nll, init_pars)
+        m.limits = [(1e-10, None) for _ in range(len(init_pars))]
+        m.errordef = Minuit.LIKELIHOOD
+        m.migrad()
+        if m.valid: print("Fit successful without gradient.")
     # If the fit is successful, return the gain values and their errors, otherwise
     # return None.
     if m.valid:
@@ -685,16 +694,29 @@ def _cut_data(data: csc_matrix, ncols: int, cols: np.ndarray, rows: np.ndarray,
     return data_active_pixels, active_mask
 
 
-def worker_fit_block(r_start: int, c_start: int, size: int, nrows: int, ncols: int,
-                     data: csc_matrix, event_sum: np.ndarray, conv_factor: float,
-                     pdf: SpectrumPDF, pdf_derivative: callable
-                     ) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]:
+def _fit_block(
+        r_start: int, c_start: int, size: int, nrows: int, ncols: int, data: csc_matrix,
+        event_sum: np.ndarray, conv_factor: float, pdf: SpectrumPDF, pdf_derivative: callable
+    ) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]]:
     """Method to perform the fit for a block of pixels in the detector.
     This method is meant to be used as a function to be executed in parallel for different
     blocks of pixels.
 
     .. note:: This function must be defined outside the class to be picklable and usable
             in parallel execution.
+
+    Returns
+    -------
+    gain : np.ndarray
+        The gain values for the pixels in the block, calculated from the fit.
+    error : np.ndarray
+        The error on the gain values for the pixels in the block, calculated from the fit.
+    rows : np.ndarray
+        The row coordinates of the active pixels in the block.
+    cols : np.ndarray
+        The column coordinates of the active pixels in the block.
+    num_events_per_pixel : np.ndarray
+        The number of events for each active pixel in the block.
     """
     # Calculate the end row and column indices, taking into account the possibility
     # of being at the end of the chip.
@@ -706,6 +728,7 @@ def worker_fit_block(r_start: int, c_start: int, size: int, nrows: int, ncols: i
     rows = rows.flatten()
     # Cut the data to select only the events that have signal in the block.
     data_fit, active_mask = _cut_data(data, ncols, cols, rows, event_sum)
+    num_events_per_pixel = np.asarray((data_fit > 0).sum(axis=0)).flatten()
     # Check on the data: if there are no good events or no active pixels,
     # we cannot perform the fit, so we return None.
     if data_fit.nnz == 0 or data_fit.shape[1] == 0:
@@ -717,7 +740,7 @@ def worker_fit_block(r_start: int, c_start: int, size: int, nrows: int, ncols: i
         return None
     # If the fit is successful, return the gain values and their errors and the
     # coordinates of the active pixels.
-    return result[0], result[1], rows[active_mask], cols[active_mask]
+    return result[0], result[1], rows[active_mask], cols[active_mask], num_events_per_pixel
 
 
 class CalibrateGain(CalibrateBase):
@@ -760,8 +783,6 @@ def analyze_cluster(self, cluster: Cluster) -> None:
             # Calculate the index of the pixel in the flattened array
             self._coords.append(row * ncols + col)
             self._event_rows.append(self._event_count)
-            # Update the matrix with the number of events for each pixel
-            self.cal_matrix.entries[row, col] += 1
         # Update the event count
         self._event_count += 1
         self.cal_matrix.num_events += 1
@@ -807,6 +828,7 @@ def fit(self) -> CalibrationMatrix:
         # results, since some pixels are fitted multiple times due to the overlap.
         weighted_gains = np.zeros((nrows, ncols))
         sum_weights = np.zeros((nrows, ncols))
+        entries = np.zeros((nrows, ncols), dtype=int)
         # Convert the CSR matrix to CSC format to optimize column slicing during data cuts.
         data_csc = data_csr.tocsc()
         # Free the memory used by the CSR matrix and temporary arrays.
@@ -817,7 +839,7 @@ def fit(self) -> CalibrationMatrix:
                 self._pdf, self._pdf_derivative)
         bar_format = "{desc}: {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]"
         results = Parallel(n_jobs=-1, backend="loky")(
-            delayed(worker_fit_block)(
+            delayed(_fit_block)(
                 r_start, c_start, *args)
                 for r_start, c_start in tqdm(block_indices, total=len(block_indices),
                                              bar_format=bar_format)
@@ -828,12 +850,13 @@ def fit(self) -> CalibrationMatrix:
             if _result is None:
                 continue
             # Unpack the results from the block fit.
-            gain, error, rows, cols = _result
+            gain, error, rows, cols, num_events_per_pixel = _result
             weights = 1 / (error**2 + 1e-10)
             # Use the mask of the active pixels to update the weighted gains and the sum
             # of the weights matrices for the pixels in the block that have been fitted.
             np.add.at(weighted_gains, (rows, cols), gain * weights)
             np.add.at(sum_weights, (rows, cols), weights)
+            np.maximum.at(entries, (rows, cols), num_events_per_pixel)
         # Calculate the final values and errors as the weighted average of the fit results.
         values = np.divide(weighted_gains, sum_weights,
                            out=np.full_like(weighted_gains, np.nan),
@@ -844,6 +867,7 @@ def fit(self) -> CalibrationMatrix:
         # Write the results back to the calibration matrix.
         self.cal_matrix.values = values
         self.cal_matrix.errors = errors
+        self.cal_matrix.entries = entries
         return self.cal_matrix
 
 
From 018175aa70d503b60d9f3b981d29476dbaa30cf1 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Fri, 8 May 2026 16:47:33 +0200
Subject: [PATCH 16/30] Minor.

---
 src/hexsample/calibration.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index 69ba188..95a17cd 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -296,6 +296,7 @@ def from_hdf5(cls, file_path: str) -> "CalibrationMatrix":
             attrs = dict(h5file.attrs)
             # Instantiate the object with the attributes loaded from the header.
             obj = cls(num_cols=attrs["num_cols"], num_rows=attrs["num_rows"])
+            obj.num_events = attrs.get(CalibrationMetadata.NUM_EVENTS.value)
             for key, val in attrs.items():
                 obj._metadata[key] = val
             # Load the matrix and the hits matrices from the HDF5 file.
@@ -630,12 +631,10 @@ def nll_grad(pars):
     # If the first fit is not successful, try to perform a second fit. We are not passing
     # the gradient, because the fit should be more robust (but slower).
     if not m.valid:
-        print("Retrying fit without gradient...")
         m = Minuit(nll, init_pars)
         m.limits = [(1e-10, None) for _ in range(len(init_pars))]
         m.errordef = Minuit.LIKELIHOOD
         m.migrad()
-        if m.valid: print("Fit successful without gradient.")
     # If the fit is successful, return the gain values and their errors, otherwise
     # return None.
     if m.valid:

From 3417ebf0d6ad0d2a688165f45e453607f1e136ed Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Sun, 10 May 2026 14:51:31 +0200
Subject: [PATCH 17/30] Refactoring gain matrix to equalization matrix.

---
 src/hexsample/caldb.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hexsample/caldb.py b/src/hexsample/caldb.py
index 638d1ea..bf18e5d 100644
--- a/src/hexsample/caldb.py
+++ b/src/hexsample/caldb.py
@@ -60,7 +60,7 @@ def open_noise(cls, designator: str) -> CalibrationMatrix:
         return cls._open(CalibrationType.NOISE, designator)
 
     @classmethod
-    def open_gain(cls, designator: str) -> CalibrationMatrix:
-        """Open the gain calibration file for the given designation.
+    def open_equalization(cls, designator: str) -> CalibrationMatrix:
+        """Open the pixel equalization calibration file for the given designation.
         """
-        return cls._open(CalibrationType.GAIN, designator)
+        return cls._open(CalibrationType.EQUALIZATION, designator)

From 1b140084e7eede56f65af04911b90bc29f79f370 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Sun, 10 May 2026 15:13:55 +0200
Subject: [PATCH 18/30] Added new EQUALIZATION calibration type.

---
 src/hexsample/caldb.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/hexsample/caldb.py b/src/hexsample/caldb.py
index bf18e5d..fb509ab 100644
--- a/src/hexsample/caldb.py
+++ b/src/hexsample/caldb.py
@@ -59,8 +59,14 @@ def open_noise(cls, designator: str) -> CalibrationMatrix:
         """
         return cls._open(CalibrationType.NOISE, designator)
 
+    @classmethod
+    def open_gain(cls, designator: str) -> CalibrationMatrix:
+        """Open the gain calibration file for the given designation.
+        """
+        return cls._open(CalibrationType.GAIN, designator)
+
     @classmethod
     def open_equalization(cls, designator: str) -> CalibrationMatrix:
-        """Open the pixel equalization calibration file for the given designation.
+        """Open the equalization calibration file for the given designation.
         """
         return cls._open(CalibrationType.EQUALIZATION, designator)

From 57efdc394ee3e67e7bbf8ee426b8b87801bfbe24 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Sun, 10 May 2026 15:51:58 +0200
Subject: [PATCH 19/30] Added ADC counts column in Recon files.

---
 src/hexsample/fileio.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/hexsample/fileio.py b/src/hexsample/fileio.py
index 677e500..cb2c032 100644
--- a/src/hexsample/fileio.py
+++ b/src/hexsample/fileio.py
@@ -226,9 +226,10 @@ class ReconDescription(tables.IsDescription):
     livetime = tables.Int32Col(pos=2)
     roi_size = tables.Int32Col(pos=3)
     cluster_size = tables.Int8Col(pos=4)
-    energy = tables.Float32Col(pos=5)
-    posx = tables.Float32Col(pos=6)
-    posy = tables.Float32Col(pos=7)
+    adc = tables.Int32Col(pos=5)
+    energy = tables.Float32Col(pos=6)
+    posx = tables.Float32Col(pos=7)
+    posy = tables.Float32Col(pos=8)
 
 def _fill_recon_row(row: tables.tableextension.Row, event: ReconEvent) -> None:
     """Helper function to fill an output table row, given a ReconEvent object.
@@ -244,6 +245,7 @@ def _fill_recon_row(row: tables.tableextension.Row, event: ReconEvent) -> None:
     row["livetime"] = event.livetime
     #row["roi_size"] = event.roi_size
     row["cluster_size"] = event.cluster.size()
+    row["adc"] = event.adc()
     row["energy"] = event.energy()
     row["posx"], row["posy"] = event.position()
     row.append()

From 0a002dac86abfe9f72f5eee3f8d54a2bc8d6ba55 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Sun, 10 May 2026 15:52:59 +0200
Subject: [PATCH 20/30] Added adc method.

---
 src/hexsample/recon.py | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/src/hexsample/recon.py b/src/hexsample/recon.py
index 8b39d7e..1117036 100644
--- a/src/hexsample/recon.py
+++ b/src/hexsample/recon.py
@@ -23,12 +23,8 @@
 from dataclasses import dataclass
 from typing import Tuple
 
-import xraydb
-
 from .clustering import Cluster
 
-DEFAULT_IONIZATION_POTENTIAL = xraydb.ionization_potential("Si")
-
 @dataclass
 class ReconEventBase:
 
@@ -54,15 +50,15 @@ class ReconEventBase:
     livetime: int
     cluster: Cluster
 
-    def energy(self, ionization_potential: float = DEFAULT_IONIZATION_POTENTIAL) -> float:
-        """Return the energy of the event in eV.
+    def adc(self) -> int:
+        """Return the total ADC count for the event.
+        """
+        return self.cluster.pulse_height()
 
-        .. warning::
-           This is currently using the ionization energy of Silicon to do the
-           conversion, assuming a detector gain of 1. We will need to do some
-           bookkeeping, here, to make this work reliably.
+    def energy(self) -> float:
+        """Return the energy of the event in eV.
         """
-        return ionization_potential * self.cluster.pulse_height()
+        return self.cluster.energy()
 
     def position(self) -> Tuple[float, float]:
         """Return the reconstructed position of the event.
@@ -100,15 +96,15 @@ class ReconEvent:
     #roi_size: int
     cluster: Cluster
 
-    def energy(self, ionization_potential: float = DEFAULT_IONIZATION_POTENTIAL) -> float:
-        """Return the energy of the event in eV.
+    def adc(self) -> int:
+        """Return the total ADC counts for the event.
+        """
+        return self.cluster.pulse_height()
 
-        .. warning::
-           This is currently using the ionization energy of Silicon to do the
-           conversion, assuming a detector gain of 1. We will need to do some
-           bookkeeping, here, to make this work reliably.
+    def energy(self) -> float:
+        """Return the energy of the event in eV.
         """
-        return ionization_potential * self.cluster.pulse_height()
+        return self.cluster.energy()
 
     def position(self) -> Tuple[float, float]:
         """Return the reconstructed position of the event.

From b9c044353c11dde320548cad9f39cc1a3c505c58 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Mon, 11 May 2026 09:58:22 +0200
Subject: [PATCH 21/30] Refactoring with equaliaztion calibration matrix.

---
 src/hexsample/calibration.py | 142 ++++++++++++++++++++++++-----------
 src/hexsample/cli.py         |  46 ++++++++++--
 src/hexsample/clustering.py  |   8 +-
 src/hexsample/pipeline.py    |  31 +++++---
 src/hexsample/tasks.py       | 107 +++++++++++++++++++-------
 5 files changed, 248 insertions(+), 86 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index 95a17cd..3098515 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -28,6 +28,7 @@
 
 import h5py
 import numpy as np
+import xraydb
 from aptapy.hist import Histogram3d
 from iminuit import Minuit
 from joblib import Parallel, delayed
@@ -48,6 +49,7 @@ class CalibrationType(str, Enum):
     PEDESTAL = "pedestal"
     NOISE = "noise"
     GAIN = "gain"
+    EQUALIZATION = "equalization"
 
     @classmethod
     def values(cls) -> Tuple[str, ...]:
@@ -72,6 +74,9 @@ class CalibrationMetadata(str, Enum):
     VERSION = "version"
     CALIBRATION_TYPE = "calibration_type"
     IS_SYNTHETIC = "is_synthetic"
+    # This is to convert ADC counts to energy and it's stored only in the
+    # equalization matrix, and is measured in eV/ADC count.
+    ADC_TO_EV = "adc_to_ev"
 
 
 class CalibrationUnits(str, Enum):
@@ -82,14 +87,16 @@ class CalibrationUnits(str, Enum):
     ENC = "Electrons"
     NOISE = "ADC counts"
     PEDESTAL = "ADC counts"
-    GAIN = ""
+    GAIN = "ADC counts / electron"
+    EQUALIZATION = ""
 
 
 CALIBRATION_UNITS = {
     CalibrationType.ENC: CalibrationUnits.ENC,
     CalibrationType.NOISE: CalibrationUnits.NOISE,
     CalibrationType.PEDESTAL: CalibrationUnits.PEDESTAL,
-    CalibrationType.GAIN: CalibrationUnits.GAIN
+    CalibrationType.GAIN: CalibrationUnits.GAIN,
+    CalibrationType.EQUALIZATION: CalibrationUnits.EQUALIZATION
 }
 
 
@@ -123,7 +130,8 @@ def __init__(self, num_cols: int, num_rows: int) -> None:
         self._errors = np.full(self._shape, np.nan)
         # Other useful information for the metadata
         self.num_events = 0
-        self._metadata = {
+        self._cached = False
+        self._cached_metadata = {
             CalibrationMetadata.NUM_COLS: num_cols,
             CalibrationMetadata.NUM_ROWS: num_rows
             }
@@ -187,6 +195,8 @@ def errors(self, new_error: np.ndarray) -> None:
     def metadata(self) -> dict:
         """Return the metadata of the calibration matrix.
         """
+        if self._cached:
+            return self._cached_metadata
         mask = self._entries > 0
         # If there are no pixels with events, we can set the average, minimum and maximum number of
         # events to zero.
@@ -197,12 +207,13 @@ def metadata(self) -> dict:
         else:
             entries_avg = entries_min = entries_max = 0
         # Setting the metadata values.
-        self._metadata[CalibrationMetadata.NUM_EVENTS] = self.num_events
-        self._metadata[CalibrationMetadata.ENTRIES_AVG] = entries_avg
-        self._metadata[CalibrationMetadata.ENTRIES_MIN] = entries_min
-        self._metadata[CalibrationMetadata.ENTRIES_MAX] = entries_max
-        self._metadata[CalibrationMetadata.NUM_CALIBRATED_PIXELS] = int(mask.sum())
-        return self._metadata
+        self._cached_metadata[CalibrationMetadata.NUM_EVENTS] = self.num_events
+        self._cached_metadata[CalibrationMetadata.ENTRIES_AVG] = entries_avg
+        self._cached_metadata[CalibrationMetadata.ENTRIES_MIN] = entries_min
+        self._cached_metadata[CalibrationMetadata.ENTRIES_MAX] = entries_max
+        self._cached_metadata[CalibrationMetadata.NUM_CALIBRATED_PIXELS] = int(mask.sum())
+        self._cached = True
+        return self._cached_metadata
 
     def set_value(self, value: float) -> None:
         """Set a value for all the pixels in the calibration matrix.
@@ -269,9 +280,9 @@ def to_hdf5(self, file_path: str, calibration_type: CalibrationType, is_syntheti
             h5file.create_dataset(self.ERRORS, data=self.errors, dtype=np.float32,
                                   **compression_pars)
             # Update the header with the relevant information and metadata.
-            self._metadata[CalibrationMetadata.CALIBRATION_TYPE] = calibration_type.value
-            self._metadata[CalibrationMetadata.IS_SYNTHETIC] = is_synthetic
-            self._metadata[CalibrationMetadata.FILE_NAME] = pathlib.Path(file_path).stem
+            self._cached_metadata[CalibrationMetadata.CALIBRATION_TYPE] = calibration_type.value
+            self._cached_metadata[CalibrationMetadata.IS_SYNTHETIC] = is_synthetic
+            self._cached_metadata[CalibrationMetadata.FILE_NAME] = pathlib.Path(file_path).stem
             for key, val in self.metadata.items():
                 h5file.attrs[key] = val
         return file_path
@@ -298,7 +309,7 @@ def from_hdf5(cls, file_path: str) -> "CalibrationMatrix":
             obj = cls(num_cols=attrs["num_cols"], num_rows=attrs["num_rows"])
             obj.num_events = attrs.get(CalibrationMetadata.NUM_EVENTS.value)
             for key, val in attrs.items():
-                obj._metadata[key] = val
+                obj._cached_metadata[key] = val
             # Load the matrix and the hits matrices from the HDF5 file.
             obj._values = h5file[obj.VALUES][:]
             obj._entries = h5file[obj.ENTRIES][:]
@@ -320,10 +331,10 @@ def __call__(self, col: np.ndarray, row: np.ndarray) -> float:
     def __str__(self) -> str:
         """Return a string representation of the calibration matrix.
         """
-        if CalibrationMetadata.FILE_NAME in self._metadata:
-            return self._metadata[CalibrationMetadata.FILE_NAME]
-        return f"CalibrationMatrix(num_cols={self._metadata[CalibrationMetadata.NUM_COLS]}, " \
-                f"num_rows={self._metadata[CalibrationMetadata.NUM_ROWS]})"
+        if CalibrationMetadata.FILE_NAME in self._cached_metadata:
+            return self._cached_metadata[CalibrationMetadata.FILE_NAME]
+        return f"CalibrationMatrix(num_cols={self._cached_metadata[CalibrationMetadata.NUM_COLS]}, " \
+                f"num_rows={self._cached_metadata[CalibrationMetadata.NUM_ROWS]})"
 
 
 class CalibrateBase:
@@ -598,16 +609,22 @@ def fit(self) -> Tuple[CalibrationMatrix, CalibrationMatrix]:
         return self.noise_cal, self.pedestal_cal
 
 
+# The following methods are used for the equalization matrix calibration. They cannot be moved
+# inside the class because they are used as functions to be executed in parallel, and they
+# need to be defined at the top level of the module to be picklable by joblib.
+
+
 def _likelihood_fit(data: csr_matrix, conv_factor: float, pdf: SpectrumPDF,
                     pdf_derivative: callable) -> Optional[Tuple[np.ndarray, np.ndarray]]:
     """Perform the likelihood fit for a region of the detector.
 
     Returns
     -------
-    gain : np.ndarray
-        The gain values for the pixels in the region, calculated from the fit.
+    equalization : np.ndarray
+        The equalization values for the pixels in the region, calculated from the fit.
     error : np.ndarray
-        The error on the gain values for the pixels in the region, calculated from the fit.
+        The error on the equalization values for the pixels in the region, calculated
+        from the fit.
     """
     # Define the negative log-likelihood function for the fit.
     def nll(pars):
@@ -635,12 +652,12 @@ def nll_grad(pars):
         m.limits = [(1e-10, None) for _ in range(len(init_pars))]
         m.errordef = Minuit.LIKELIHOOD
         m.migrad()
-    # If the fit is successful, return the gain values and their errors, otherwise
-    # return None.
+    # If the fit is successful, return the pixel equalization values and their errors,
+    # otherwise return None.
     if m.valid:
-        gain = 1 / np.array(m.values)
+        equalization = 1 / np.array(m.values)
         error = m.errors / np.array(m.values)**2
-        return gain, error
+        return equalization, error
     return None
 
 
@@ -737,25 +754,28 @@ def _fit_block(
     # If the fit is not successful, return None and continue with the next block.
     if result is None:
         return None
-    # If the fit is successful, return the gain values and their errors and the
+    # If the fit is successful, return the equalization values and their errors and the
     # coordinates of the active pixels.
     return result[0], result[1], rows[active_mask], cols[active_mask], num_events_per_pixel
 
 
-class CalibrateGain(CalibrateBase):
+class CalibrateEqualization(CalibrateBase):
 
-    """Calibrate the gain of the detector by analyzing the events in a DigiFile. This class takes a
-    CalibrationMatrix object as input, and updates the matrix with the data from the file.
-
-    At the moment, the dataset used for this task should contain events from a monochromatic source
-    and with known energy.
+    """Calculate the equalization matrix for the detector readout. This matrix is used
+    to convert the PHA values to Pulse Invariant (PI). By the definition, the mean value
+    of the matrix is 1.0.
 
+    The calibration is peformed by analyzing events in a DigiFile.
+    
     Arguments
     ---------
-    cal_matrix : CalibrationMatrix
-        Calibration matrix to be updated with the gain values calculated from the data.
-    energy : float
-        The energy of the monochromatic source, in eV.
+    num_cols : int
+        The number of columns of the detector readout chip.
+    num_rows : int
+        The number of rows of the detector readout chip.
+    pdf : SpectrumPDF
+        The probability density function of the spectrum of the events in the dataset, which is
+        used for the likelihood fit to calculate the equalization values for the pixels.
     """
 
     def __init__(self, num_cols: int, num_rows: int, pdf: SpectrumPDF) -> None:
@@ -786,9 +806,18 @@ def analyze_cluster(self, cluster: Cluster) -> None:
         self._event_count += 1
         self.cal_matrix.num_events += 1
 
-    def fit(self) -> CalibrationMatrix:
+    def fit(self, size: int) -> CalibrationMatrix:
         """Fit the collected events to determine the gain of each pixel.
 
+        Arguments
+        ---------
+        size : int
+            The length of the square chip region to be fitted simultaneously. The optimal
+            value for this parameter is a trade-off between the number of active pixels in
+            the chip and the computational time of the fit, which increases significantly
+            with the number of pixels. For small active regions, a size of 6 can be a good
+            choice, while for the full chip calibration, 10 is a good value. 
+
         Returns
         -------
         cal_matrix : CalibrationMatrix
@@ -812,10 +841,8 @@ def fit(self) -> CalibrationMatrix:
         # Calculate the sum of the ADC counts in each event, which is used for the data
         # cuts in the fit, and to calculate the mean ADC count in an event.
         event_sum = data_csr.sum(axis=1).A1
-        # Calculate the mean of the ADC counts in an event.
-        conv_factor = self._pdf.mean() / event_sum.mean()
-        # Define the size of the blocks to fit contemporarily.
-        size = 6
+        # Calculate the conversion factor from ADC to eV.
+        adc_to_ev = self._pdf.mean() / event_sum.mean()
         # Calculate the starting column and row indices for the blocks. We are using an
         # overlap of 2 pixels between the blocks to ensure that pixels on the edges are
         # correctly calibrated.
@@ -834,7 +861,7 @@ def fit(self) -> CalibrationMatrix:
         del data_csr, data_pha, data_coords, data_event_rows
         gc.collect()
         # Start the parallel processing of the blocks.
-        args = (size, nrows, ncols, data_csc, event_sum, conv_factor,
+        args = (size, nrows, ncols, data_csc, event_sum, adc_to_ev,
                 self._pdf, self._pdf_derivative)
         bar_format = "{desc}: {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]"
         results = Parallel(n_jobs=-1, backend="loky")(
@@ -867,10 +894,41 @@ def fit(self) -> CalibrationMatrix:
         self.cal_matrix.values = values
         self.cal_matrix.errors = errors
         self.cal_matrix.entries = entries
+        self.cal_matrix._cached_metadata[CalibrationMetadata.ADC_TO_EV] = adc_to_ev
         return self.cal_matrix
 
 
+class CalibrateGain:
+
+    """Class for calibrating the gain of the readout chip.
+
+    This class provides methods to calculate the gain values based on the equalization
+    matrix and sensor material properties.
+    """
+
+    def __init__(self, equalization_matrix: CalibrationMatrix, material_symbol: str) -> None:
+        """Class constructor.
+        """
+        num_rows, num_cols = equalization_matrix.shape
+        self.cal_matrix = CalibrationMatrix(num_cols, num_rows)
+        self.equalization_matrix = equalization_matrix
+        self.material_symbol = material_symbol
+    
+    def fit(self) -> CalibrationMatrix:
+        """Calculate the gain values based on the equalization matrix and the sensor material
+        properties, and update the calibration matrix with the calculated values.
+        """
+        ionization_potential = xraydb.ionization_potential(self.material_symbol)
+        conv_factor = self.equalization_matrix.metadata[CalibrationMetadata.ADC_TO_EV] / ionization_potential
+        self.cal_matrix.values = self.equalization_matrix.values / conv_factor
+        self.cal_matrix.errors = self.equalization_matrix.errors / conv_factor
+        self.cal_matrix.entries = self.equalization_matrix.entries
+        self.cal_matrix.num_events = self.equalization_matrix.num_events
+        return self.cal_matrix
+        
+
 class CalibrateENC:
+
     """Class for calibrating the equivalent noise charge (ENC) of the readout chip.
 
     This class provides methods to calculate the ENC values based on the noise and gain
@@ -878,7 +936,7 @@ class CalibrateENC:
     """
 
     def __init__(self, noise_matrix: CalibrationMatrix, gain_matrix: CalibrationMatrix) -> None:
-        """Class constructor
+        """Class constructor.
         """
         if noise_matrix.shape != gain_matrix.shape:
             raise ValueError("Noise and gain matrices must have the same shape.")
diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py
index 9597541..10c157d 100644
--- a/src/hexsample/cli.py
+++ b/src/hexsample/cli.py
@@ -130,6 +130,13 @@ def __init__(self) -> None:
         self.add_enc_calibration_options(enc)
         self.add_logging_level(enc)
         enc.set_defaults(runner=pipeline.calibrate_enc)
+        # Pixel equalization calibration
+        equalization = calibrate_subparsers.add_parser("equalization",
+                                                    help="calibrate the chip pixel equalization")
+        self.add_input_file(equalization)
+        self.add_calibrate_equalization_options(equalization)
+        self.add_logging_level(equalization)
+        equalization.set_defaults(runner=pipeline.calibrate_equalization)
         # Eta function calibration
         eta = calibrate_subparsers.add_parser("eta", help="calibrate the eta function")
         self.add_input_file(eta)
@@ -138,8 +145,7 @@ def __init__(self) -> None:
         eta.set_defaults(runner=pipeline.calibrate_eta)
         # Gain calibration
         gain = calibrate_subparsers.add_parser("gain", help="calibrate the chip gain")
-        self.add_input_file(gain)
-        self.add_calibrate_gain_options(gain)
+        self.add_gain_calibration_options(gain)
         self.add_logging_level(gain)
         gain.set_defaults(runner=pipeline.calibrate_gain)
         # Noise calibration
@@ -289,6 +295,16 @@ def add_cal_pedestal_file(parser: argparse.ArgumentParser, default: str,
                             "calibration data or name of a calibration file inside the " \
                             "caldb/pedestal folder.")
 
+    @staticmethod
+    def add_cal_equalization_file(parser: argparse.ArgumentParser, default: str,
+                                  required: bool = False) -> None:
+        """Add an option for the equalization calibration file.
+        """
+        parser.add_argument("--equalization", type=caldb.CalDB.open_equalization, default=default,
+                            required=required, help="path to a file containing the equalization " \
+                            "calibration data or name of a calibration file inside the " \
+                            "caldb/equalization folder.")
+
     @staticmethod
     def add_cal_gain_file(parser: argparse.ArgumentParser, default: str,
                           required: bool = False) -> None:
@@ -409,7 +425,8 @@ def add_recon_options(self, parser: argparse.ArgumentParser) -> None:
                            type=str, default="centroid", help="How to reconstruct position")
         CliArgumentParser.add_cal_noise_file(group, default=None, required=True)
         CliArgumentParser.add_cal_pedestal_file(group, default=None, required=True)
-        CliArgumentParser.add_cal_gain_file(group, default=None, required=True)
+        # CliArgumentParser.add_cal_gain_file(group, default=None, required=True)
+        CliArgumentParser.add_cal_equalization_file(group, default=None, required=True)
         group.add_argument("--eta_2pix_rad_sigma", default=defaults.eta_2pix_rad_sigma, type=float,
                            help="probit function sigma parameter for two pixel" \
                            "events eta reconstruction")
@@ -440,12 +457,14 @@ def add_calibrate_dark_options(self, parser: argparse.ArgumentParser) -> None:
                             help="number of events to be analyzed in a batch for the dark" \
                             " calibration")
 
-    def add_calibrate_gain_options(self, parser: argparse.ArgumentParser) -> None:
+    def add_calibrate_equalization_options(self, parser: argparse.ArgumentParser) -> None:
         """Add an option group for the gain calibration properties.
         """
-        defaults = tasks.CalibrationGainDefaults
+        defaults = tasks.CalibrationEqualizationDefaults
         parser.add_argument("pdf", type=pdf.SpectrumPDF.from_file,
                             help="path to the spectrum PDF file")
+        parser.add_argument("--size", type=int, default=defaults.size,
+                            help="length of the square region of the chip to fit simultaneously")
         CliArgumentParser.add_cal_noise_file(parser, default=None, required=True)
         CliArgumentParser.add_cal_pedestal_file(parser, default=None, required=True)
         CliArgumentParser.add_zero_sup_threshold(parser,
@@ -456,7 +475,7 @@ def add_calibrate_eta_options(self, parser: argparse.ArgumentParser) -> None:
         """
         CliArgumentParser.add_cal_noise_file(parser, default=None, required=True)
         CliArgumentParser.add_cal_pedestal_file(parser, default=None, required=True)
-        CliArgumentParser.add_cal_gain_file(parser, default=None, required=True)
+        CliArgumentParser.add_cal_equalization_file(parser, default=None, required=True)
         parser.add_argument("--num_bins", type=int,
                             default=tasks.CalibrationEtaDefaults.num_bins,
                             help="number of bins to be used in the eta function calibration")
@@ -474,6 +493,19 @@ def add_enc_calibration_options(self, parser: argparse.ArgumentParser) -> None:
                             help="directory where the generated ENC calibration file" \
                             " will be saved")
 
+    def add_gain_calibration_options(self, parser: argparse.ArgumentParser) -> None:
+        """Add an option group for the gain calibration properties.
+        """
+        defaults = tasks.CalibrationGainDefaults
+        CliArgumentParser.add_cal_equalization_file(parser, default=None, required=True)
+        parser.add_argument("--material_symbol", type=str,
+                            choices=sensor.material_symbols(),
+                            default=defaults.material_symbol,
+                            help="active sensor material")
+        parser.add_argument("--output_dir", type=str, default=defaults.output_dir,
+                            help="directory where the generated gain calibration file" \
+                            " will be saved")
+
     def add_synthesize_calibration_file_options(self, parser: argparse.ArgumentParser) -> None:
         """Add an option group to generate calibration files.
         """
@@ -505,7 +537,7 @@ def add_display_options(self, parser: argparse.ArgumentParser) -> None:
         default = tasks.DisplayDefaults
         CliArgumentParser.add_cal_noise_file(parser, default=default.noise_matrix)
         CliArgumentParser.add_cal_pedestal_file(parser, default=default.pedestal_matrix)
-        CliArgumentParser.add_cal_gain_file(parser, default=default.gain_matrix)
+        CliArgumentParser.add_cal_equalization_file(parser, default=default.equalization_matrix)
 
     def add_calibview_options(self, parser: argparse.ArgumentParser) -> None:
         """Add an option group for the calibration view properties.
diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py
index 268d1e1..20f9517 100644
--- a/src/hexsample/clustering.py
+++ b/src/hexsample/clustering.py
@@ -43,6 +43,7 @@ class Cluster:
     col: np.ndarray
     row: np.ndarray
     pha: np.ndarray
+    adc_to_ev: float
     pos_recon_algorithm: str
     recon_pars: Optional[dict] = None
 
@@ -62,6 +63,11 @@ def pulse_height(self) -> float:
         """
         return self.pha.sum()
 
+    def energy(self) -> float:
+        """Return the energy of the cluster in eV.
+        """
+        return self.pulse_height() * self.adc_to_ev
+
     def centroid(self) -> Tuple[float, float]:
         """Return the cluster centroid.
         """
@@ -348,4 +354,4 @@ def run(self, event) -> Cluster:
         # Sort the arrays in decreasing order before applying the position suppression.
         pha, col, row = self.position_suppress(pha[mask], col[mask], row[mask])
         x, y = self.readout.pixel_to_world(col, row)
-        return Cluster(x, y, col, row, pha, self.pos_recon_algorithm, self.recon_pars)
+        return Cluster(x, y, col, row, pha, readout.gain.metadata["adc_to_ev"], self.pos_recon_algorithm, self.recon_pars)
diff --git a/src/hexsample/pipeline.py b/src/hexsample/pipeline.py
index 3a5a11f..3e66a91 100644
--- a/src/hexsample/pipeline.py
+++ b/src/hexsample/pipeline.py
@@ -50,7 +50,7 @@ def reconstruct(**kwargs) -> str:
     input_file_path = kwargs["input_file"]
     noise_matrix = kwargs["noise"]
     pedestal_matrix = kwargs["pedestal"]
-    gain_matrix = kwargs["gain"]
+    equalization_matrix = kwargs["equalization"]
     suffix = kwargs.get("suffix", defaults.suffix)
     zero_sup_threshold = kwargs.get("zero_sup_threshold", defaults.zero_sup_threshold)
     num_neighbors = kwargs.get("num_neighbors", defaults.num_neighbors)
@@ -64,7 +64,7 @@ def reconstruct(**kwargs) -> str:
     eta_3pix_theta_sigma = kwargs.get("eta_3pix_theta_sigma", defaults.eta_3pix_theta_sigma)
     recon_args = (eta_2pix_rad_sigma, eta_2pix_rad_pivot, eta_3pix_rad_offset, eta_3pix_rad_sigma,
                   eta_3pix_rad_pivot, eta_3pix_theta_sigma)
-    args = input_file_path, noise_matrix, pedestal_matrix, gain_matrix, suffix, \
+    args = input_file_path, noise_matrix, pedestal_matrix, equalization_matrix, suffix, \
             zero_sup_threshold,num_neighbors, max_neighbors, pos_recon_algorithm, *recon_args
     return tasks.reconstruct(*args, kwargs)
 
@@ -81,13 +81,13 @@ def calibrate_eta(**kwargs) -> None:
     """Calibrate the eta function using the events from a digi file.
     """
     input_file_path = kwargs["input_file"]
-    gain_matrix = kwargs["gain"]
+    equalization_matrix = kwargs["equalization"]
     noise_matrix = kwargs["noise"]
     pedestal_matrix = kwargs["pedestal"]
     num_bins = kwargs.get("num_bins", tasks.CalibrationEtaDefaults.num_bins)
     zero_sup_threshold = kwargs.get("zero_sup_threshold",
                                     tasks.CalibrationEtaDefaults.zero_sup_threshold)
-    args = input_file_path, noise_matrix, pedestal_matrix, gain_matrix, num_bins, \
+    args = input_file_path, noise_matrix, pedestal_matrix, equalization_matrix, num_bins, \
             zero_sup_threshold
     return tasks.calibrate_eta(*args)
 
@@ -119,16 +119,27 @@ def calibrate_enc(**kwargs) -> str:
     return tasks.calibrate_enc(*args)
 
 
-def calibrate_gain(**kwargs) -> str:
-    """Calibrate the gain of the chip.
+def calibrate_equalization(**kwargs) -> str:
+    """Calibrate the equalization of the chip.
     """
     input_file_path = kwargs["input_file"]
     pdf = kwargs["pdf"]
     noise_matrix = kwargs["noise"]
     pedestal_matrix = kwargs["pedestal"]
+    size = kwargs.get("size", tasks.CalibrationEqualizationDefaults.size)
     zero_sup_threshold = kwargs.get("zero_sup_threshold",
-                                    tasks.CalibrationGainDefaults.zero_sup_threshold)
-    args = input_file_path, pdf, noise_matrix, pedestal_matrix, zero_sup_threshold
+                                    tasks.CalibrationEqualizationDefaults.zero_sup_threshold)
+    args = input_file_path, pdf, noise_matrix, pedestal_matrix, size, zero_sup_threshold
+    return tasks.calibrate_equalization(*args)
+
+
+def calibrate_gain(**kwargs) -> str:
+    """Calibrate the gain of the chip.
+    """
+    equalization_matrix = kwargs["equalization"]
+    material_symbol = kwargs.get("material_symbol", tasks.CalibrationGainDefaults.material_symbol)
+    output_dir = kwargs.get("output_dir", tasks.CalibrationGainDefaults.output_dir)
+    args = equalization_matrix, material_symbol, output_dir
     return tasks.calibrate_gain(*args)
 
 
@@ -151,10 +162,10 @@ def display(**kwargs) -> None:
     """Display events from a digi or recon file.
     """
     input_file_path = kwargs["input_file"]
-    gain_matrix = kwargs["gain"]
+    equalization_matrix = kwargs["equalization"]
     noise_matrix = kwargs["noise"]
     pedestal_matrix = kwargs["pedestal"]
-    args = input_file_path, noise_matrix, pedestal_matrix, gain_matrix
+    args = input_file_path, noise_matrix, pedestal_matrix, equalization_matrix
     return tasks.display(*args)
 
 
diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index d8265d9..d3e34e1 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -34,9 +34,11 @@
 from . import rng
 from .analysis import create_histogram
 from .calibration import (
+    CalibrationMetadata,
     CALIBRATION_UNITS,
     CalibrateDark,
     CalibrateENC,
+    CalibrateEqualization,
     CalibrateGain,
     CalibrateNoise,
     CalibrationMatrix,
@@ -234,7 +236,7 @@ def reconstruct(
         input_file_path: str,
         noise_matrix: CalibrationMatrix,
         pedestal_matrix: CalibrationMatrix,
-        gain_matrix: CalibrationMatrix,
+        equalization_matrix: CalibrationMatrix,
         suffix: str = ReconstructionDefaults.suffix,
         zero_sup_threshold: int = ReconstructionDefaults.zero_sup_threshold,
         num_neighbors: int = ReconstructionDefaults.num_neighbors,
@@ -267,8 +269,8 @@ def reconstruct(
     pedestal_matrix : CalibrationMatrix
         The pedestal matrix to use for the reconstruction.
 
-    gain_matrix : CalibrationMatrix
-        The gain matrix to use for the reconstruction.
+    equalization_matrix : CalibrationMatrix
+        The equalization matrix to use for the reconstruction.
 
     suffix : str
         The suffix to append to the output file name.
@@ -308,7 +310,7 @@ def reconstruct(
     input_file, header, readout_mode = open_file(input_file_path)
     # Creating the readout object.
     args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\
-        header["pitch"], noise_matrix, gain_matrix, pedestal_matrix
+        header["pitch"], noise_matrix, equalization_matrix, pedestal_matrix
     readout = create_readout(readout_mode, header, *args)
     # Define the effective number of neighbors to be used for the clustering. If max_neighbors is
     # specified (i.e. different from -1), it has priority over num_neighbors. It is necessary to
@@ -400,7 +402,7 @@ def calibrate_eta(
         input_file_path: str,
         noise_matrix: CalibrationMatrix,
         pedestal_matrix: CalibrationMatrix,
-        gain_matrix: CalibrationMatrix,
+        equalization_matrix: CalibrationMatrix,
         num_bins: int = CalibrationEtaDefaults.num_bins,
         zero_sup_threshold: int = CalibrationEtaDefaults.zero_sup_threshold
         ) -> None:
@@ -417,8 +419,8 @@ def calibrate_eta(
     pedestal_matrix : CalibrationMatrix
         The pedestal calibration matrix to use for the analysis.
 
-    gain_matrix : CalibrationMatrix
-        The gain calibration matrix to use for the analysis.
+    equalization_matrix : CalibrationMatrix
+        The equalization calibration matrix to use for the analysis.
 
     num_bins : int
         The number of bins to be used in the calibration.
@@ -428,7 +430,7 @@ def calibrate_eta(
     """
     input_file, header, readout_mode = open_file(input_file_path)
     args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\
-        header["pitch"], noise_matrix, gain_matrix, pedestal_matrix
+        header["pitch"], noise_matrix, equalization_matrix, pedestal_matrix
     readout = create_readout(readout_mode, header, *args)
     clustering = ClusteringNN(readout, zero_sup_threshold, num_neighbors=6,
                               pos_recon_algorithm="centroid")
@@ -542,6 +544,10 @@ def synthesize_calibration_file(
     # Generate the calibration matrix with the appropriate size and values
     calibration_matrix = CalibrationMatrix(num_cols, num_rows)
     rms = mean * percent_rms / 100
+    if calibration_type == CalibrationType.EQUALIZATION:
+        calibration_matrix._cached_metadata[CalibrationMetadata.ADC_TO_EV] = mean
+        rms /= mean
+        mean = 1.
     logger.info(f"Generating {calibration_type.value} calibration matrix with "
                 f"mean {mean:g} and RMS {rms:g}...")
     calibration_matrix.values = rng.generator.normal(mean, scale=rms, size=(num_rows, num_cols))
@@ -663,7 +669,7 @@ def calibrate_enc(
     logger.info("Calculating the ENC matrix...")
     enc_matrix = enc_calibration.fit()
     noise_file_name = noise_matrix.metadata["file_name"]
-    enc_file_name = noise_file_name.replace("_matrix_noise", "_matrix_enc.h5")
+    enc_file_name = noise_file_name.replace("noise", "enc") + ".h5"
     output_file_path = pathlib.Path(output_dir) / enc_file_name
     logger.info(f"Saving to {output_file_path}...")
     enc_matrix.to_hdf5(output_file_path, CalibrationType.ENC, False)
@@ -672,24 +678,26 @@ def calibrate_enc(
 
 
 @dataclass(frozen=True)
-class CalibrationGainDefaults:
-    """Default parameters for the gain calibration task.
+class CalibrationEqualizationDefaults:
+    """Default parameters for the pixel equalization calibration task.
 
     This is a small helper dataclass to help ensure consistency between the main task
     definition in this Python module and the command-line interface.
     """
 
+    size: int = 10
     zero_sup_threshold: int = 20
 
 
-def calibrate_gain(
+def calibrate_equalization(
         input_file_path: str,
         pdf: SpectrumPDF,
         noise_matrix: CalibrationMatrix,
         pedestal_matrix: CalibrationMatrix,
-        zero_sup_threshold: int = CalibrationGainDefaults.zero_sup_threshold
+        size: int = CalibrationEqualizationDefaults.size,
+        zero_sup_threshold: int = CalibrationEqualizationDefaults.zero_sup_threshold
         ) -> str:
-    """Calibrate gain of the readout chip using the events from a digi file.
+    """Calibrate pixel equalization of the readout chip using the events from a digi file.
     The results are stored as a matrix in a HDF5 file.
 
     Arguments
@@ -705,21 +713,25 @@ def calibrate_gain(
 
     pedestal_matrix : CalibrationMatrix
         The pedestal matrix to use for the gain calibration.
+    
+    size : int
+        The length of the square region of the chip to fit simultaneously during the
+        calibration.
 
     zero_sup_threshold : int
         The zero-suppression threshold to use for the clustering in the gain calibration.
     """
     # Open the input file and extract the readout information.
     input_file, header, readout_mode = open_file(input_file_path)
-    # Define the arguments to create the readout object with unit gain, necessary for the
-    # calibration.
+    # Define the arguments to create the readout object with uniform pixel equalization,
+    # necessary for the calibration.
     unit_gain_map = CalibrationMatrix(header["num_cols"], header["num_rows"])
     unit_gain_map.set_value(1.)
     args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\
         header["pitch"], noise_matrix, unit_gain_map, pedestal_matrix
     readout = create_readout(readout_mode, header, *args)
     # Initialize the gain matrix and run the calibration.
-    gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], pdf)
+    gain_calibration = CalibrateEqualization(header["num_cols"], header["num_rows"], pdf)
     clustering = ClusteringNN(readout, zero_sup_threshold=zero_sup_threshold, num_neighbors=6,
                               pos_recon_algorithm="centroid")
     logger.info("Starting the event loop...")
@@ -732,11 +744,53 @@ def calibrate_gain(
             continue
         gain_calibration.analyze_cluster(cluster)
     logger.info("Calculating the gain matrix...")
+    equalization_matrix = gain_calibration.fit(size)
+    output_file_path = input_file_path.replace(".h5", "_matrix_equalization.h5")
+    logger.info(f"Saving equalization matrix to {output_file_path}...")
+    equalization_matrix.to_hdf5(output_file_path, CalibrationType.EQUALIZATION, False)
+    input_file.close()
+    logger.info("Done!")
+    return output_file_path
+
+
+@dataclass(frozen=True)
+class CalibrationGainDefaults:
+    """Default parameters for the gain calibration task.
+
+    This is a small helper dataclass to help ensure consistency between the main task
+    definition in this Python module and the command-line interface.
+    """
+
+    output_dir: Union[str, pathlib.Path] = HEXSAMPLE_DATA
+    material_symbol: str = "Si"
+
+
+def calibrate_gain(
+        equalization_matrix: CalibrationMatrix,
+        material_symbol: str = CalibrationGainDefaults.material_symbol,
+        output_dir: Union[str, pathlib.Path] = CalibrationGainDefaults.output_dir
+        ) -> str:
+    """Calibrate the gain of the readout chip using the equalization matrix and the
+    ionization potential of the sensor material. The results are stored as a matrix
+    in a HDF5 file.
+
+    Arguments
+    equalization_matrix : CalibrationMatrix
+        The equalization matrix to use for the gain calibration.
+    material_symbol : str
+        The symbol of the sensor material to use for the gain calibration, e.g. "Si" for
+        silicon.
+    """
+    name, args = current_call(num_backward_steps=1)
+    logger.info(f"Running {__name__}.{name} with arguments {args}...")
+    gain_calibration = CalibrateGain(equalization_matrix, material_symbol)
+    logger.info("Calculating the gain matrix...")
     gain_matrix = gain_calibration.fit()
-    output_file_path = input_file_path.replace(".h5", "_matrix_gain.h5")
-    logger.info(f"Saving gain matrix to {output_file_path}...")
+    equalization_file_name = equalization_matrix.metadata["file_name"]
+    gain_file_name = equalization_file_name.replace("equalization", "gain") + ".h5"
+    output_file_path = pathlib.Path(output_dir) / gain_file_name
+    logger.info(f"Saving to {output_file_path}...")
     gain_matrix.to_hdf5(output_file_path, CalibrationType.GAIN, False)
-    input_file.close()
     logger.info("Done!")
     return output_file_path
 
@@ -751,14 +805,14 @@ class DisplayDefaults:
 
     noise_matrix: Optional[CalibrationMatrix] = None
     pedestal_matrix: Optional[CalibrationMatrix] = None
-    gain_matrix: Optional[CalibrationMatrix] = None
+    equalization_matrix: Optional[CalibrationMatrix] = None
 
 
 def display(
         input_file_path: str,
         noise_matrix: Optional[CalibrationMatrix] = DisplayDefaults.noise_matrix,
         pedestal_matrix: Optional[CalibrationMatrix] = DisplayDefaults.pedestal_matrix,
-        gain_matrix: Optional[CalibrationMatrix] = DisplayDefaults.gain_matrix,
+        equalization_matrix: Optional[CalibrationMatrix] = DisplayDefaults.equalization_matrix,
         ) -> None:
     """Display events from a digi file.
 
@@ -773,16 +827,17 @@ def display(
     pedestal_matrix : CalibrationMatrix, optional
         The pedestal calibration matrix to use for the display.
 
-    gain_matrix : CalibrationMatrix, optional
-        The gain calibration matrix to use for the display.
+    equalization_matrix : CalibrationMatrix, optional
+        The equalization calibration matrix to use for the display.
     """
     # Open the input file and extract the header and the readout information.
     input_file, header, readout_mode = open_file(input_file_path)
-    cal_matrices = [noise_matrix, gain_matrix, pedestal_matrix]
+    cal_matrices = [noise_matrix, equalization_matrix, pedestal_matrix]
     missing = [matrix for matrix in cal_matrices if matrix is None]
     # Check if any of the calibration matrices is missing.
     if 0 < len(missing) < len(cal_matrices):
-        logger.warning(f"{len(missing)} calibration matrices are missing.")
+        logger.warning(f"{len(missing)} calibration matrices are missing to perform" \
+                       " event reconstruction.")
     # Initialize the correct type of event display based on the input matrices.
     grid_args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"], \
         header["pitch"]

From 091ba3f36aa80471913ea544e13d36502b0941a3 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Mon, 11 May 2026 10:09:45 +0200
Subject: [PATCH 22/30] Linting.

---
 src/hexsample/calibration.py | 36 ++++++++++++++++++++++++++++--------
 src/hexsample/clustering.py  |  3 ++-
 src/hexsample/recon.py       |  1 +
 src/hexsample/tasks.py       |  4 ++--
 4 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index 3098515..ec1ccd1 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -215,6 +215,18 @@ def metadata(self) -> dict:
         self._cached = True
         return self._cached_metadata
 
+    def update_metadata(self, key: CalibrationMetadata, value) -> None:
+        """Update the metadata of the calibration matrix with a new key-value pair.
+
+        Arguments
+        ---------
+        key : CalibrationMetadata
+            The key of the metadata to be updated.
+        value :
+            The value of the metadata to be updated.
+        """
+        self._cached_metadata[key] = value
+
     def set_value(self, value: float) -> None:
         """Set a value for all the pixels in the calibration matrix.
 
@@ -333,8 +345,9 @@ def __str__(self) -> str:
         """
         if CalibrationMetadata.FILE_NAME in self._cached_metadata:
             return self._cached_metadata[CalibrationMetadata.FILE_NAME]
-        return f"CalibrationMatrix(num_cols={self._cached_metadata[CalibrationMetadata.NUM_COLS]}, " \
-                f"num_rows={self._cached_metadata[CalibrationMetadata.NUM_ROWS]})"
+        return f"CalibrationMatrix( " \
+               f"num_cols={self._cached_metadata[CalibrationMetadata.NUM_COLS]}, " \
+               f"num_rows={self._cached_metadata[CalibrationMetadata.NUM_ROWS]})"
 
 
 class CalibrateBase:
@@ -894,7 +907,7 @@ def fit(self, size: int) -> CalibrationMatrix:
         self.cal_matrix.values = values
         self.cal_matrix.errors = errors
         self.cal_matrix.entries = entries
-        self.cal_matrix._cached_metadata[CalibrationMetadata.ADC_TO_EV] = adc_to_ev
+        self.cal_matrix.update_metadata(CalibrationMetadata.ADC_TO_EV, adc_to_ev)
         return self.cal_matrix
 
 
@@ -913,19 +926,26 @@ def __init__(self, equalization_matrix: CalibrationMatrix, material_symbol: str)
         self.cal_matrix = CalibrationMatrix(num_cols, num_rows)
         self.equalization_matrix = equalization_matrix
         self.material_symbol = material_symbol
-    
+
     def fit(self) -> CalibrationMatrix:
         """Calculate the gain values based on the equalization matrix and the sensor material
         properties, and update the calibration matrix with the calculated values.
         """
+        # Get the ionization potential of the sensor material and the conversion factor
+        # from ADC to eV from the metadata of the equalization matrix.
         ionization_potential = xraydb.ionization_potential(self.material_symbol)
-        conv_factor = self.equalization_matrix.metadata[CalibrationMetadata.ADC_TO_EV] / ionization_potential
-        self.cal_matrix.values = self.equalization_matrix.values / conv_factor
-        self.cal_matrix.errors = self.equalization_matrix.errors / conv_factor
+        adc_to_ev = self.equalization_matrix.metadata[CalibrationMetadata.ADC_TO_EV]
+        # Calculate the conversion factor from ADC to electrons.
+        adc_to_electrons = adc_to_ev / ionization_potential
+        # Calculate the gain values as the equalization values divided by the conversion
+        # factor and update the errors.
+        self.cal_matrix.values = self.equalization_matrix.values / adc_to_electrons
+        self.cal_matrix.errors = self.equalization_matrix.errors / adc_to_electrons
+        # Update some metadata for the gain matrix.
         self.cal_matrix.entries = self.equalization_matrix.entries
         self.cal_matrix.num_events = self.equalization_matrix.num_events
         return self.cal_matrix
-        
+
 
 class CalibrateENC:
 
diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py
index 20f9517..62091fe 100644
--- a/src/hexsample/clustering.py
+++ b/src/hexsample/clustering.py
@@ -307,6 +307,7 @@ def run(self, event) -> Cluster:
            for speed using proper numpy array for the offset indexes.
         """
         readout = self.readout
+        adc_to_ev = readout.gain.metadata["adc_to_ev"]
         if isinstance(event, DigiEventCircular):
             # If the readout is circular, we want to take all the neirest neighbors.
             # Trailing -1 is bc the central px is already considered.
@@ -354,4 +355,4 @@ def run(self, event) -> Cluster:
         # Sort the arrays in decreasing order before applying the position suppression.
         pha, col, row = self.position_suppress(pha[mask], col[mask], row[mask])
         x, y = self.readout.pixel_to_world(col, row)
-        return Cluster(x, y, col, row, pha, readout.gain.metadata["adc_to_ev"], self.pos_recon_algorithm, self.recon_pars)
+        return Cluster(x, y, col, row, pha, adc_to_ev, self.pos_recon_algorithm, self.recon_pars)
diff --git a/src/hexsample/recon.py b/src/hexsample/recon.py
index 1117036..679fa75 100644
--- a/src/hexsample/recon.py
+++ b/src/hexsample/recon.py
@@ -25,6 +25,7 @@
 
 from .clustering import Cluster
 
+
 @dataclass
 class ReconEventBase:
 
diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index d3e34e1..9c6be60 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -34,7 +34,6 @@
 from . import rng
 from .analysis import create_histogram
 from .calibration import (
-    CalibrationMetadata,
     CALIBRATION_UNITS,
     CalibrateDark,
     CalibrateENC,
@@ -42,6 +41,7 @@
     CalibrateGain,
     CalibrateNoise,
     CalibrationMatrix,
+    CalibrationMetadata,
     CalibrationType,
 )
 from .clustering import ClusteringNN
@@ -545,7 +545,7 @@ def synthesize_calibration_file(
     calibration_matrix = CalibrationMatrix(num_cols, num_rows)
     rms = mean * percent_rms / 100
     if calibration_type == CalibrationType.EQUALIZATION:
-        calibration_matrix._cached_metadata[CalibrationMetadata.ADC_TO_EV] = mean
+        calibration_matrix.update_metadata(CalibrationMetadata.ADC_TO_EV, mean)
         rms /= mean
         mean = 1.
     logger.info(f"Generating {calibration_type.value} calibration matrix with "

From cae43b37f404fedce543e484ecab25ab21176327 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Mon, 11 May 2026 10:42:15 +0200
Subject: [PATCH 23/30] Release notes.

---
 docs/release_notes.rst       | 29 +++++++++++++++++++++++++++++
 src/hexsample/calibration.py | 16 ++++++++--------
 src/hexsample/cli.py         |  1 -
 src/hexsample/clustering.py  |  2 ++
 src/hexsample/tasks.py       | 10 +++++-----
 5 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index 8106125..e6c5c89 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -4,6 +4,35 @@ Release notes
 =============
 
 
+* Package dependencies updated: `joblib` and `iminuit` added.
+* New calibration matrix type `equalization` added in `caldb.py` to store the pixel equalization
+  factors.
+* New `CalibrateEqualization` class to calibrate the pixel equalization factors. This is
+  performed with a likelihood fit using the source spectrum probability density function (PDF)
+  and the observed energy of the events. Corresponding CLI command `calibgen equalization` added
+  to generate equalization calibration files from event files.
+* The `CalibrateGain` class has been refactored. Now it produces a gain calibration matrix from
+  a equalization calibration matrix, by using the ADC to eV conversion and the chip sensor
+  matieral. The gain matrix is expressed in ADC counts per electron. Corresponding CLI command
+  `calibgen gain` refactored to reflect this change.
+* Some CLI commands refactored to use the equalization calibration file instead of the gain
+  calibration file (`reconstruct`, `display`, `calibgen eta`).
+* New `pdf.py` module containing facilities to calculate the PDF of the source spectrum and store
+  it in a `.npz` file.
+* Added `calibspec` CLI command to create a source spectrum PDF file from a reconstructed event
+  file. The PDF is saved as a `.npz` file.
+* New `adc` method to calculate the total ADC counts of the event added to `ReconEvent`. Now a
+  reconstructed file contains both the ADC counts and the energy in eV of the events (if the 
+  conversion factor from ADC to eV is available in the equalization calibration file).
+* New `energy` method to calculate the total energy of the event added to `Cluster`. The
+  conversion factor from ADC to eV is passed by `ClusteringNN`.
+* Pull requests merged and issues closed:
+
+  - https://github.com/lucabaldini/hexsample/pull/132
+  - https://github.com/lucabaldini/hexsample/issues/129
+
+
+
 Version 0.16.0 (2026-05-05)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index ec1ccd1..dcdba27 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -46,10 +46,10 @@ class CalibrationType(str, Enum):
     """
 
     ENC = "enc"
-    PEDESTAL = "pedestal"
-    NOISE = "noise"
-    GAIN = "gain"
     EQUALIZATION = "equalization"
+    GAIN = "gain"
+    NOISE = "noise"
+    PEDESTAL = "pedestal"
 
     @classmethod
     def values(cls) -> Tuple[str, ...]:
@@ -85,18 +85,18 @@ class CalibrationUnits(str, Enum):
     """
 
     ENC = "Electrons"
+    EQUALIZATION = ""
+    GAIN = "ADC counts / electron"
     NOISE = "ADC counts"
     PEDESTAL = "ADC counts"
-    GAIN = "ADC counts / electron"
-    EQUALIZATION = ""
 
 
 CALIBRATION_UNITS = {
     CalibrationType.ENC: CalibrationUnits.ENC,
+    CalibrationType.EQUALIZATION: CalibrationUnits.EQUALIZATION,
+    CalibrationType.GAIN: CalibrationUnits.GAIN,
     CalibrationType.NOISE: CalibrationUnits.NOISE,
     CalibrationType.PEDESTAL: CalibrationUnits.PEDESTAL,
-    CalibrationType.GAIN: CalibrationUnits.GAIN,
-    CalibrationType.EQUALIZATION: CalibrationUnits.EQUALIZATION
 }
 
 
@@ -115,9 +115,9 @@ class CalibrationMatrix:
         The number of rows of the detector readout chip.
     """
 
-    VALUES = "values"
     ENTRIES = "entries"
     ERRORS = "errors"
+    VALUES = "values"
 
     def __init__(self, num_cols: int, num_rows: int) -> None:
         """Class constructor.
diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py
index 10c157d..ef4898c 100644
--- a/src/hexsample/cli.py
+++ b/src/hexsample/cli.py
@@ -425,7 +425,6 @@ def add_recon_options(self, parser: argparse.ArgumentParser) -> None:
                            type=str, default="centroid", help="How to reconstruct position")
         CliArgumentParser.add_cal_noise_file(group, default=None, required=True)
         CliArgumentParser.add_cal_pedestal_file(group, default=None, required=True)
-        # CliArgumentParser.add_cal_gain_file(group, default=None, required=True)
         CliArgumentParser.add_cal_equalization_file(group, default=None, required=True)
         group.add_argument("--eta_2pix_rad_sigma", default=defaults.eta_2pix_rad_sigma, type=float,
                            help="probit function sigma parameter for two pixel" \
diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py
index 62091fe..7aef0f1 100644
--- a/src/hexsample/clustering.py
+++ b/src/hexsample/clustering.py
@@ -312,6 +312,8 @@ def run(self, event) -> Cluster:
             # If the readout is circular, we want to take all the neirest neighbors.
             # Trailing -1 is bc the central px is already considered.
             self.num_neighbors = 6 #HexagonalReadoutCircular.NUM_PIXELS - 1
+            if readout.is_at_border(event.column, event.row):
+                return None
             col = [event.column]
             row = [event.row]
             adc_channel_order = [readout.adc_channel(event.column, event.row)]
diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index 9c6be60..1997f60 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -730,8 +730,8 @@ def calibrate_equalization(
     args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\
         header["pitch"], noise_matrix, unit_gain_map, pedestal_matrix
     readout = create_readout(readout_mode, header, *args)
-    # Initialize the gain matrix and run the calibration.
-    gain_calibration = CalibrateEqualization(header["num_cols"], header["num_rows"], pdf)
+    # Initialize the equalization matrix and run the calibration.
+    equalization_calibration = CalibrateEqualization(header["num_cols"], header["num_rows"], pdf)
     clustering = ClusteringNN(readout, zero_sup_threshold=zero_sup_threshold, num_neighbors=6,
                               pos_recon_algorithm="centroid")
     logger.info("Starting the event loop...")
@@ -742,9 +742,9 @@ def calibrate_equalization(
             continue
         if cluster is None:
             continue
-        gain_calibration.analyze_cluster(cluster)
-    logger.info("Calculating the gain matrix...")
-    equalization_matrix = gain_calibration.fit(size)
+        equalization_calibration.analyze_cluster(cluster)
+    logger.info("Calculating the equalization matrix...")
+    equalization_matrix = equalization_calibration.fit(size)
     output_file_path = input_file_path.replace(".h5", "_matrix_equalization.h5")
     logger.info(f"Saving equalization matrix to {output_file_path}...")
     equalization_matrix.to_hdf5(output_file_path, CalibrationType.EQUALIZATION, False)

From 9a909fdbd38138fdbf33bf24a74a7e096dfae671 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Mon, 11 May 2026 10:45:57 +0200
Subject: [PATCH 24/30] Release notes.

---
 docs/release_notes.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index e6c5c89..441db35 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -32,7 +32,6 @@ Release notes
   - https://github.com/lucabaldini/hexsample/issues/129
 
 
-
 Version 0.16.0 (2026-05-05)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

From ce5e52975cc25f4165d8d8545f17cd8a0da34d0e Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Tue, 12 May 2026 09:44:27 +0200
Subject: [PATCH 25/30] Reviewing.

---
 src/hexsample/clustering.py | 1 -
 tests/test_pdf.py           | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py
index 7aef0f1..06da5b3 100644
--- a/src/hexsample/clustering.py
+++ b/src/hexsample/clustering.py
@@ -267,7 +267,6 @@ def position_suppress(self, pha: np.ndarray, col: np.ndarray, row: np.ndarray
         idx = np.argsort(-out_pha)[:3]
         return out_pha[idx], out_col[idx], out_row[idx]
 
-
     def run(self, event: DigiEventRectangular) -> Cluster:
         """Workhorse method to be reimplemented by derived classes.
         """
diff --git a/tests/test_pdf.py b/tests/test_pdf.py
index b6a12a6..46e31d2 100644
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -74,4 +74,3 @@ def test_derivative(size: int = 100000):
     plt.figure("Derivative of a gaussian PDF")
     xx = np.linspace(vals.min()*0.8, vals.max()*1.2, 10000)
     plt.plot(xx, derivative(xx))
-    plt.legend()

From e52deb5ec00111d20b8cb7643d78f7d6cc63e5f5 Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Tue, 12 May 2026 11:44:17 +0200
Subject: [PATCH 26/30] Review.

---
 docs/release_notes.rst       |  4 +--
 src/hexsample/calibration.py | 49 ++++++++++++++++++++++--------------
 src/hexsample/clustering.py  |  5 +++-
 src/hexsample/pdf.py         |  8 +++---
 src/hexsample/tasks.py       | 10 ++++----
 5 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index 5cfa444..298e549 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -12,8 +12,8 @@ Release notes
   and the observed energy of the events. Corresponding CLI command `calibgen equalization` added
   to generate equalization calibration files from event files.
 * The `CalibrateGain` class has been refactored. Now it produces a gain calibration matrix from
-  a equalization calibration matrix, by using the ADC to eV conversion and the chip sensor
-  matieral. The gain matrix is expressed in ADC counts per electron. Corresponding CLI command
+  an equalization calibration matrix, by using the ADC to eV conversion and the chip sensor
+  material. The gain matrix is expressed in ADC counts per electron. Corresponding CLI command
   `calibgen gain` refactored to reflect this change.
 * Some CLI commands refactored to use the equalization calibration file instead of the gain
   calibration file (`reconstruct`, `display`, `calibgen eta`).
diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index 9edfdf6..0d04741 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -135,7 +135,7 @@ def __init__(self, num_cols: int, num_rows: int) -> None:
         # Other useful information for the metadata
         self.num_events = 0
         self._cached = False
-        self._cached_metadata = {
+        self._metadata = {
             CalibrationMetadata.NUM_COLS: num_cols,
             CalibrationMetadata.NUM_ROWS: num_rows
             }
@@ -161,6 +161,8 @@ def values(self, new_matrix: np.ndarray) -> None:
             raise ValueError(f"Input matrix has shape {new_matrix.shape}, but expected shape is "
                              f"{self._shape}.")
         self._values = new_matrix
+        # Invalidate the cached metadata since the values of the matrix have changed.
+        self._cached = False
 
     @property
     def entries(self) -> np.ndarray:
@@ -178,6 +180,8 @@ def entries(self, new_entries: np.ndarray) -> None:
             raise ValueError(f"Input entries matrix has shape {new_entries.shape}, but expected "
                              f"shape is {self._shape}.")
         self._entries = new_entries
+        # Invalidate the cached metadata since the number of events has changed.
+        self._cached = False
 
     @property
     def errors(self) -> np.ndarray:
@@ -194,13 +198,15 @@ def errors(self, new_error: np.ndarray) -> None:
             raise ValueError(f"Input error matrix has shape {new_error.shape}, but expected shape "
                              f"is {self._shape}.")
         self._errors = new_error
+        # Invalidate the cached metadata since the error values have changed.
+        self._cached = False
 
     @property
     def metadata(self) -> dict:
         """Return the metadata of the calibration matrix.
         """
         if self._cached:
-            return self._cached_metadata
+            return self._metadata
         mask = self._entries > 0
         # If there are no pixels with events, we can set the average, minimum and maximum number of
         # events to zero.
@@ -211,13 +217,13 @@ def metadata(self) -> dict:
         else:
             entries_avg = entries_min = entries_max = 0
         # Setting the metadata values.
-        self._cached_metadata[CalibrationMetadata.NUM_EVENTS] = self.num_events
-        self._cached_metadata[CalibrationMetadata.ENTRIES_AVG] = entries_avg
-        self._cached_metadata[CalibrationMetadata.ENTRIES_MIN] = entries_min
-        self._cached_metadata[CalibrationMetadata.ENTRIES_MAX] = entries_max
-        self._cached_metadata[CalibrationMetadata.NUM_CALIBRATED_PIXELS] = int(mask.sum())
+        self._metadata[CalibrationMetadata.NUM_EVENTS] = self.num_events
+        self._metadata[CalibrationMetadata.ENTRIES_AVG] = entries_avg
+        self._metadata[CalibrationMetadata.ENTRIES_MIN] = entries_min
+        self._metadata[CalibrationMetadata.ENTRIES_MAX] = entries_max
+        self._metadata[CalibrationMetadata.NUM_CALIBRATED_PIXELS] = int(mask.sum())
         self._cached = True
-        return self._cached_metadata
+        return self._metadata
 
     def update_metadata(self, key: CalibrationMetadata, value) -> None:
         """Update the metadata of the calibration matrix with a new key-value pair.
@@ -229,7 +235,7 @@ def update_metadata(self, key: CalibrationMetadata, value) -> None:
         value :
             The value of the metadata to be updated.
         """
-        self._cached_metadata[key] = value
+        self._metadata[key] = value
 
     def set_value(self, value: float) -> None:
         """Set a value for all the pixels in the calibration matrix.
@@ -240,6 +246,7 @@ def set_value(self, value: float) -> None:
             The value to be set for all the pixels in the calibration matrix.
         """
         self._values = np.full(self._shape, value)
+        self._cached = False
 
     def fill(self, value: float, max_hits: int = 0) -> None:
         """Substitute the value of the pixels with less hits than or equal to a certain threshold
@@ -255,6 +262,7 @@ def fill(self, value: float, max_hits: int = 0) -> None:
             The maximum number of hits for a pixel to be considered for replacement.
         """
         self._values = np.where(self._entries <= max_hits, value, self._values)
+        self._cached = False
 
     def mean(self, min_hits: int = 1) -> float:
         """Return the mean value of the calibration matrix, calculated as the mean of the pixels
@@ -296,9 +304,9 @@ def to_hdf5(self, file_path: str, calibration_type: CalibrationType, is_syntheti
             h5file.create_dataset(self.ERRORS, data=self.errors, dtype=np.float32,
                                   **compression_pars)
             # Update the header with the relevant information and metadata.
-            self._cached_metadata[CalibrationMetadata.CALIBRATION_TYPE] = calibration_type.value
-            self._cached_metadata[CalibrationMetadata.IS_SYNTHETIC] = is_synthetic
-            self._cached_metadata[CalibrationMetadata.FILE_NAME] = pathlib.Path(file_path).stem
+            self._metadata[CalibrationMetadata.CALIBRATION_TYPE] = calibration_type.value
+            self._metadata[CalibrationMetadata.IS_SYNTHETIC] = is_synthetic
+            self._metadata[CalibrationMetadata.FILE_NAME] = pathlib.Path(file_path).stem
             for key, val in self.metadata.items():
                 h5file.attrs[key] = val
         return file_path
@@ -325,7 +333,7 @@ def from_hdf5(cls, file_path: str) -> "CalibrationMatrix":
             obj = cls(num_cols=attrs["num_cols"], num_rows=attrs["num_rows"])
             obj.num_events = attrs.get(CalibrationMetadata.NUM_EVENTS.value)
             for key, val in attrs.items():
-                obj._cached_metadata[key] = val
+                obj._metadata[key] = val
             # Load the matrix and the hits matrices from the HDF5 file.
             obj._values = h5file[obj.VALUES][:]
             obj._entries = h5file[obj.ENTRIES][:]
@@ -347,11 +355,11 @@ def __call__(self, col: np.ndarray, row: np.ndarray) -> float:
     def __str__(self) -> str:
         """Return a string representation of the calibration matrix.
         """
-        if CalibrationMetadata.FILE_NAME in self._cached_metadata:
-            return self._cached_metadata[CalibrationMetadata.FILE_NAME]
+        if CalibrationMetadata.FILE_NAME in self._metadata:
+            return self._metadata[CalibrationMetadata.FILE_NAME]
         return f"CalibrationMatrix( " \
-               f"num_cols={self._cached_metadata[CalibrationMetadata.NUM_COLS]}, " \
-               f"num_rows={self._cached_metadata[CalibrationMetadata.NUM_ROWS]})"
+               f"num_cols={self._metadata[CalibrationMetadata.NUM_COLS]}, " \
+               f"num_rows={self._metadata[CalibrationMetadata.NUM_ROWS]})"
 
 
 class CalibrateBase:
@@ -706,13 +714,16 @@ def _likelihood_fit(data: csr_matrix, conv_factor: float, pdf: SpectrumPDF,
     def nll(pars):
         total_adc = data @ pars
         p = pdf(total_adc * conv_factor)
-        return -np.sum(np.log(p + 1e-10))
+        # To avoid negative or zero values in the log, clip p to be larger than 1e-10.
+        p_clipped = np.clip(p, 1e-10, None)
+        return -np.sum(np.log(p_clipped))
     # Define the gradient of the log-likelihood function for the fit.
     def nll_grad(pars):
         total_adc = data @ pars
         p = pdf(total_adc * conv_factor)
+        p_clipped = np.clip(p, 1e-10, None)
         dp = pdf_derivative(total_adc * conv_factor)
-        grad = -data.T @ (dp / (p + 1e-10)) * conv_factor
+        grad = -data.T @ (dp / p_clipped) * conv_factor
         return np.asarray(grad).flatten()
     # Define the initial parameters for the fit.
     init_pars = np.ones(data.shape[1])
diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py
index 06da5b3..a603282 100644
--- a/src/hexsample/clustering.py
+++ b/src/hexsample/clustering.py
@@ -298,7 +298,7 @@ class ClusteringNN(ClusteringBase):
     pos_recon_algorithm: str
     recon_pars: Optional[dict] = None
 
-    def run(self, event) -> Cluster:
+    def run(self, event) -> Optional[Cluster]:
         """Overladed method.
 
         .. warning::
@@ -306,6 +306,9 @@ def run(self, event) -> Cluster:
            for speed using proper numpy array for the offset indexes.
         """
         readout = self.readout
+        # Load the adc_to_ev conversion factor from the readout metadata of the
+        # equalization matrix. If the data is not present, or the wrong matrix type
+        # is passed, this will raise a KeyError.
         adc_to_ev = readout.gain.metadata["adc_to_ev"]
         if isinstance(event, DigiEventCircular):
             # If the readout is circular, we want to take all the neirest neighbors.
diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py
index 02887fc..0ca6539 100644
--- a/src/hexsample/pdf.py
+++ b/src/hexsample/pdf.py
@@ -64,10 +64,10 @@ def mean(self) -> float:
             raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.")
         # The location of the PDF can be estimated as the mean of the distribution.
         x_grid = np.linspace(min(self.pdf.x), max(self.pdf.x), 1000)
-        y_grid = self.pdf(x_grid)
+        # Clip the PDF to avoid negative values.
+        y_grid = np.maximum(0, self.pdf(x_grid))
         area = np.trapezoid(y_grid, x_grid)
-        normalized_pdf = y_grid / area
-        mean = np.sum(x_grid * normalized_pdf) / np.sum(normalized_pdf)
+        mean = np.trapezoid(x_grid * y_grid, x_grid) / area
         return mean
 
     def to_file(self, file_path: str) -> str:
@@ -78,6 +78,8 @@ def to_file(self, file_path: str) -> str:
         file_path : str
             The path to the file where the PDF will be saved.
         """
+        if self.pdf is None:
+            raise ValueError("The PDF is empty. Fit the PDF with data before saving it to file.")
         np.savez(file_path, x=self.pdf.x, c=self.pdf.c)
         return file_path
 
diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index 6b0ab44..c119d36 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -344,7 +344,7 @@ def reconstruct(
             cluster = clustering.run(event)
         except IndexError as e:
             logger.warning(f"Error reconstructing event with trigger ID {event.trigger_id}: {e}")
-        if cluster.size() in size:
+        if cluster is not None and (cluster.size() in size):
             # Need to pass the recon method and other stuff as argument to ReconEvent
             args = event.trigger_id, event.timestamp(), event.livetime, cluster
             recon_event = ReconEvent(*args)
@@ -443,7 +443,7 @@ def calibrate_eta(
         except IndexError:
             continue
         # Analyze only 2-pixel and 3-pixel events.
-        if cluster.size() == 2 or cluster.size() == 3:
+        if cluster is not None and (cluster.size() == 2 or cluster.size() == 3):
             mc_event = input_file.mc_event(i)
             size_list.append(cluster.size())
             # Calculate the photon position with respect to the most charged pixel
@@ -728,6 +728,7 @@ def calibrate_equalization(
     # necessary for the calibration.
     unit_gain_map = CalibrationMatrix(header["num_cols"], header["num_rows"])
     unit_gain_map.set_value(1.)
+    unit_gain_map.update_metadata(CalibrationMetadata.ADC_TO_EV, 1.)
     args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\
         header["pitch"], noise_matrix, unit_gain_map, pedestal_matrix
     readout = create_readout(readout_mode, header, *args)
@@ -741,9 +742,8 @@ def calibrate_equalization(
             cluster = clustering.run(event)
         except IndexError:
             continue
-        if cluster is None:
-            continue
-        equalization_calibration.analyze_cluster(cluster)
+        if cluster is not None:
+            equalization_calibration.analyze_cluster(cluster)
     logger.info("Calculating the equalization matrix...")
     equalization_matrix = equalization_calibration.fit(size)
     output_file_path = input_file_path.replace(".h5", "_matrix_equalization.h5")

From 527c88392460e925e4e5d4f42128a33c79fc997a Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Tue, 12 May 2026 11:45:12 +0200
Subject: [PATCH 27/30] Linting.

---
 src/hexsample/calibration.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py
index 0d04741..4213157 100644
--- a/src/hexsample/calibration.py
+++ b/src/hexsample/calibration.py
@@ -24,19 +24,17 @@
 import pathlib
 from enum import Enum
 from itertools import product
-from itertools import product
 from typing import Optional, Tuple
 
 import h5py
 import numpy as np
 import xraydb
 from aptapy.hist import Histogram3d
+from aptapy.models import Gaussian
 from iminuit import Minuit
 from joblib import Parallel, delayed
-from aptapy.models import Gaussian
 from scipy.sparse import csc_matrix, csr_matrix
 from tqdm import tqdm
-from tqdm import tqdm
 
 from .clustering import Cluster
 from .digi import DigiEventRectangular

From 3e04211ca63b84b7d00ef8e8191ea370d6cab10f Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Tue, 12 May 2026 11:52:29 +0200
Subject: [PATCH 28/30] Added uniform equalization file in CalDB.

---
 .../sim_xpol3_equalization-3p68_uniform_v001.h5 | Bin 0 -> 18816 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 caldb/equalization/sim_xpol3_equalization-3p68_uniform_v001.h5

diff --git a/caldb/equalization/sim_xpol3_equalization-3p68_uniform_v001.h5 b/caldb/equalization/sim_xpol3_equalization-3p68_uniform_v001.h5
new file mode 100644
index 0000000000000000000000000000000000000000..8c66c7854400bbcf097bc9d563b36b47e86e22ab
GIT binary patch
literal 18816
zcmeI4PfT1z9LMLeR9|7Ci`GK*;93=_!2|-SYvP|JmI4V8Y|*S2A1gd$&9V!-3oKr&
z%7NbWhF+{`jP=k2Z`H)udO}Yc5B`aVHtE5}G^rjGPj&wNfc?6^P)oG7ok@A~X67^V
zzVqFA58kiu?XmuYhZ^ect`jQxzNi&3y-S-bHXR)>HB_edQM=>We97h~Ys`)(8ii_W
zvGp-Ke)52+NZ5`ehkJX4Bx!ugZ8y<TYxaDqaQQ9eK!0z~u$pkn&N^=MRlCk?QOu>Y
zvtdz)Fjp#M<W5*9<O{M)>oY;%Rpy?!&GfYcc%n`B^lD7SsIBj?IhG@G<*TNq_`UMn
zQYkK)ebM}y-WOtr@O67l#0*$361wc!dDZB4PZTF-CnmC?9Yf7W_4CAdIFU`4Lb211
z=i4(aQWGifFm>%R8<aox4Em=I*nw?!<e8y^o`|epC03d%+p$FF{o2FWQdy?+Iv%O4
zzgbJ_dgsZtK-W3S&FbQ3y|T-+A*Vjz&FOoFTiy(*eS5jb@h$+@2Ao<{)zUD-p}sQ1
zp}yk`hx%55OH~sm>n<}K>T78-1CTE?9_mXn9O|0{ZjsUV7Q>;w(+r3D&M_S7^Y5sf
z?+T-@gW*tLis4Y-QHDc(CxQEk(f2jOp}sYSLw)f(D}ABkqP~9MeEa(VWll33>RV<w
z)OUvAP~XqMbu;>6yDR5Q9O_Fj9O@foIMlafIr3}N>Lad}8eHxia1J;JZczuyKVCZP
z$!nuu6Mt{^)Wyc8eq%)HH<_Pk6k>G5d-2WB%Z_utxOL#pkDKy(&>+w4FOL5)G_~vY
zia#4KztWu)p1$-s#9aUM*7<Uytd~t6TJ_!S$0K7hYAC}#J_Q8h`au6;9P&(ynM%9N
z)NQ1_l}no1ZEm?Z+WlLMyJ9aK$l-nNw+x5(xxX_U-M^BYy?2>CI<7LMT6&z}P+!1s
zsP7GiLwz3rm#QXC)~zxe>idP^P+#L7b0G4Cj*I#_fm>wsjW8VQn`b!G_b$VszR!T;
z;%Mg>4)sNBe4Tuu@lanY!=b(b;3%GjHf9+P^{wrz{M=|f)OU}a0L_Cq)b}KCTpTUU
zaHwyQ;ZWa)42Sx@0*>Ni=-(xVLw&IaD*dDJP+u3rp}yxWM|xZwjht|~bHF*^954>J
zo5cUa&9e<;&Y#Lu4P@^7wD0`pK&InNfY<rr){6@4xpi>kt^@ar__{i<{W#ind#=xK
zH_oX{-A>k;V!~wCxaHz#^!yVy?1H`DL=NwB-)A_y&;63&E`WevfwP_fCB)tbO%>@Q
z4)ygg9O}z39O`=;xYXa2nTk&s4)vX9IMlbZwQ|06T-4VI+#;jz1%^X?uQMF#`-tIC
z-#5Tf+zVy?$#AIezK1G(q47}P0K=ia8Q>_Mh5FuMIMjER;ZWa2hC_W#cEQuS=i+FO
zF&yekGaTwW#&D?bW8f(6h5r4(aH!AQU-|jcc&Kkb!=b)sEk}A>9F3fCxpTld;2bax
z(ED8FK2!GXdn-}-9*atzd^05~-&M)YP6cE6Y|&8W|DSzbNb0S9+*i<hG}ZbS^7C8v
zr|Wc7(71JCw>*A&d@LyCgK$nCO}&rvl&n=LDZ1-L_mzlgARRML)O~kix*-`A!@9u}
zO?sTMbT;!+AzjMkb3tigIyAlH-<k5EyqGMgA2Ws-oyLpG`tqZOR&{+(4-M)z)j_7s
zoGOPiv+_u<>I12=wbj0TBt$VY6_ls**{)zS`AFCFfkZHy%S_}8Q^8zEN2ltm(z1OY
z#FX6DIp7>{4mby#1I_{GfOEh(;2dxcI0u{q&Vm2JfhHX(5Ms$5jJ_{tBi_&;T4jVH
zQw)j=xzc1<%8V(rMFs*?tUx`#Z(;_%IMg$Eq*vCB9DYt^x`E;elt)FsZ8GtYiA**O
za_K1wU~Cn?Xp_AuexYI!8}XU?_4rOWC%<*YMrf2i`A?>#-jWVokeiMEQQhBuUrUgl
pds!pb+J7pO+g|^4`6~M-HRp*v>+`4BP&gh;XUd@tc5O9}zW~SVFWmqD

literal 0
HcmV?d00001


From 70e94fcff7b1783a5fc89cea4c48161be977605c Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Tue, 12 May 2026 11:53:54 +0200
Subject: [PATCH 29/30] Minor.

---
 scripts/populate_caldb.py | 4 ++++
 src/hexsample/tasks.py    | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/populate_caldb.py b/scripts/populate_caldb.py
index a95aa92..ebb5e04 100644
--- a/scripts/populate_caldb.py
+++ b/scripts/populate_caldb.py
@@ -23,11 +23,14 @@
 """
 
 import numpy as np
+from xraydb import ionization_potential
 
 from hexsample.caldb import CalDB, CalibrationType
 from hexsample.tasks import SynthesizeCalibrationDefaults, synthesize_calibration_file
 
+DEFAULT_IONIZATION_POTENTIAL = ionization_potential("Si")
 DEFAULT_GAIN = 1.
+DEFAULT_ADC_TO_EV = DEFAULT_IONIZATION_POTENTIAL / DEFAULT_GAIN
 RMS_VALS = (0, 10)
 ENC_VALS = np.array((20., 30., 40., 50., 75., 100.))
 PEDESTAL_VALS = (0, 1000)
@@ -62,6 +65,7 @@ def populate_caldb() -> None:
     generate_files(CalibrationType.ENC, ENC_VALS, RMS_VALS)
     generate_files(CalibrationType.NOISE, ENC_VALS * DEFAULT_GAIN, RMS_VALS)
     generate_files(CalibrationType.GAIN, [DEFAULT_GAIN], RMS_VALS)
+    generate_files(CalibrationType.EQUALIZATION, [DEFAULT_ADC_TO_EV], [0.]) 
     generate_files(CalibrationType.PEDESTAL, PEDESTAL_VALS, RMS_VALS)
 
 
diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py
index c119d36..45987d1 100644
--- a/src/hexsample/tasks.py
+++ b/src/hexsample/tasks.py
@@ -525,7 +525,7 @@ def synthesize_calibration_file(
     random_seed : int, optional
         The seed for the random number generator.
     """
-    name, args = current_call()
+    name, args = current_call(1)
     logger.info(f"Running {__name__}.{name} with arguments {args}...")
     # Initialize the random number generator with the given seed
     rng.initialize(seed=random_seed)

From 156594d72eb9e0c22915566dc4bda591c5fe7e2f Mon Sep 17 00:00:00 2001
From: Augusto Cattafesta <augustocattafesta@gmail.com>
Date: Tue, 12 May 2026 12:05:37 +0200
Subject: [PATCH 30/30] Import error solved.

---
 src/hexsample/pdf.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py
index 0ca6539..b40f10e 100644
--- a/src/hexsample/pdf.py
+++ b/src/hexsample/pdf.py
@@ -26,6 +26,11 @@
 from scipy.interpolate import CubicSpline
 from scipy.stats import gaussian_kde
 
+try:
+    from numpy import trapezoid as trapezoid
+except ImportError:
+    from numpy import trapz as trapezoid
+
 
 class SpectrumPDF:
 
@@ -66,8 +71,8 @@ def mean(self) -> float:
         x_grid = np.linspace(min(self.pdf.x), max(self.pdf.x), 1000)
         # Clip the PDF to avoid negative values.
         y_grid = np.maximum(0, self.pdf(x_grid))
-        area = np.trapezoid(y_grid, x_grid)
-        mean = np.trapezoid(x_grid * y_grid, x_grid) / area
+        area = trapezoid(y_grid, x_grid)
+        mean = trapezoid(x_grid * y_grid, x_grid) / area
         return mean
 
     def to_file(self, file_path: str) -> str: