From f17b5741525cfd038e858ba80b4e01a2f4e84a83 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Wed, 6 May 2026 11:13:04 +0200 Subject: [PATCH 01/30] Draft. --- src/hexsample/calibration.py | 118 ++++++++++++++++++++++++++--------- src/hexsample/tasks.py | 86 ++++++++++++------------- 2 files changed, 133 insertions(+), 71 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index d70d04f..c46a1b5 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -23,13 +23,16 @@ import pathlib from enum import Enum from typing import Tuple +from time import time import h5py import numpy as np from aptapy.hist import Histogram3d from scipy.sparse import csr_matrix from scipy.sparse.linalg import lsmr +from iminuit import Minuit +from .hexagon import HexagonalGrid from .clustering import Cluster from .digi import DigiEventRectangular from .recon import DEFAULT_IONIZATION_POTENTIAL @@ -78,7 +81,7 @@ class CalibrationUnits(str, Enum): ENC = "Electrons" NOISE = "ADC counts" PEDESTAL = "ADC counts" - GAIN = "ADC counts / electron" + GAIN = "" CALIBRATION_UNITS = { @@ -639,44 +642,103 @@ def analyze_cluster(self, cluster: Cluster) -> None: self._event_count += 1 self.cal_matrix.num_events += 1 + def _fit_lh_cluster(self, a, col, row, mean): + t0 = time() + grid = HexagonalGrid() + neigh = grid.neighbors(col, row) + cols, rows = np.array(neigh).T + cols = np.insert(cols, 0, col) + rows = np.insert(rows, 0, row) + idxs = rows * 304 + cols + a_sub = a[:, idxs] + col_0 = a_sub[:, 0] + mask = col_0.nonzero()[0] + a_sub = a_sub[mask] + def nll_mono(pars): + sum_energy = a_sub @ pars + p = np.exp(- (sum_energy - mean)**2 / (2 * 20 **2)) + return -np.sum(np.log(p + 1e-10)) + + m = Minuit(nll_mono, np.ones(7)) + m.migrad() + t1 = time() + print(f"Fit time: {t1 - t0} seconds") + return m + + def fit(self) -> CalibrationMatrix: - """Perform the least squares fit to determine the gain of each pixel. + """Perform the likelihood fit to determine the gain of each pixel. """ if self._event_count == 0: raise ValueError("No events have been analyzed, cannot perform the fit.") + # Create the sparse matrix nrows, ncols = self.cal_matrix.shape # Create the sparse matrix for the least squares fit. This object allows to store # and use efficiently the large and sparse matrix that we need for the fit. shape = (self._event_count, nrows * ncols) + a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape) - # Create the vector of the expected number of electrons. - b = np.full(self._event_count, self._energy / DEFAULT_IONIZATION_POTENTIAL) - # Perform the fit - results = lsmr(a, b) - # Get the best-fit weight vector and reshape it to the shape of the calibration matrix. - weight = results[0].reshape((nrows, ncols)) - signal_power = np.array(a.multiply(a).sum(axis=0)).reshape((nrows, ncols)) - # Calculate the number of degrees of freedom and the mean squared error. - dof = self._event_count - (ncols * nrows) - mse = results[3]**2 / max(dof, 1) - # Calculate the uncertainty of the gain best-fit values. - sigma_w = np.sqrt(mse / (signal_power + 1e-15)) - with np.errstate(divide='ignore', invalid='ignore'): - sigma_g_rel = sigma_w / np.abs(weight) - # Mask for the pixels that have a weight value close to zero (no events) - mask = np.abs(weight) > 1e-10 - values = self.cal_matrix.values.copy() - entries = self.cal_matrix.entries - # Set the gain value for the pixels that pass the quality cut. - values[mask] = 1 / weight[mask] - # Set the entries to zero for the pixels that don't pass the quality cut. - entries[~mask] = 0 - # Write back through the setter so updates persist on the shared object. - self.cal_matrix.values = values - self.cal_matrix.errors = np.where(mask, sigma_g_rel * values, self.cal_matrix.errors) - self.cal_matrix.entries = entries + # Calculate the mean to normalize the results + MEAN = np.array(a.sum(axis=1)).flatten().mean() + + # Let's try to analyze a small region around the center + COL0, ROW0 = 152, 176 + m = self._fit_lh_cluster(a, COL0, ROW0, MEAN) + # values = self.cal_matrix.values.copy() + # # values[rows, cols] = 1. / np.array(m.values) + # self.cal_matrix.values = values + + print("Fit results:") + for i in range(7): + print(f"Pixel {i}: Gain = {1/m.values[i]:.2f} +/- {m.errors[i]/m.values[i]**2:.2f}") + return self.cal_matrix + + + + + + + + # def fit(self) -> CalibrationMatrix: + # """Perform the least squares fit to determine the gain of each pixel. + # """ + # if self._event_count == 0: + # raise ValueError("No events have been analyzed, cannot perform the fit.") + # nrows, ncols = self.cal_matrix.shape + # # Create the sparse matrix for the least squares fit. This object allows to store + # # and use efficiently the large and sparse matrix that we need for the fit. + # shape = (self._event_count, nrows * ncols) + # a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape) + # # Create the vector of the expected number of electrons. + # b = np.full(self._event_count, self._energy / DEFAULT_IONIZATION_POTENTIAL) + # # Perform the fit + # results = lsmr(a, b) + # # Get the best-fit weight vector and reshape it to the shape of the calibration matrix. + # weight = results[0].reshape((nrows, ncols)) + # signal_power = np.array(a.multiply(a).sum(axis=0)).reshape((nrows, ncols)) + # # Calculate the number of degrees of freedom and the mean squared error. + # dof = self._event_count - (ncols * nrows) + # mse = results[3]**2 / max(dof, 1) + # # Calculate the uncertainty of the gain best-fit values. + # sigma_w = np.sqrt(mse / (signal_power + 1e-15)) + # with np.errstate(divide='ignore', invalid='ignore'): + # sigma_g_rel = sigma_w / np.abs(weight) + # # Mask for the pixels that have a weight value close to zero (no events) + # mask = np.abs(weight) > 1e-10 + # values = self.cal_matrix.values.copy() + # entries = self.cal_matrix.entries + # # Set the gain value for the pixels that pass the quality cut. + # values[mask] = 1 / weight[mask] + # # Set the entries to zero for the pixels that don't pass the quality cut. + # entries[~mask] = 0 + # # Write back through the setter so updates persist on the shared object. + # self.cal_matrix.values = values + # self.cal_matrix.errors = np.where(mask, sigma_g_rel * values, self.cal_matrix.errors) + # self.cal_matrix.entries = entries + # return self.cal_matrix + class CalibrateENC: """Class for calibrating the equivalent noise charge (ENC) of the readout chip. diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index 463686f..79e5dc7 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -721,52 +721,52 @@ def calibrate_gain( # calibration to correct the bias in the gain matrix. # To calculate the mean value, we are excluding the outliers by considering only the values # between the 1st and the 99th percentile. - gain_sim = CalibrationMatrix(header["num_cols"], header["num_rows"]) - vals = gain_matrix.values - lower_bound, upper_bound = np.nanpercentile(vals, [1, 99]) - gain_sim.set_value(np.mean(vals[(vals > lower_bound) & (vals < upper_bound)])) - simulation_readout = HexagonalReadoutRectangular(HexagonalLayout(header["layout"]), - header["num_cols"], header["num_rows"], header["pitch"], - enc=noise_matrix, gain=gain_sim, pedestal=pedestal_matrix) - output = HEXSAMPLE_DATA / "_tmp_simulation_bias.h5" - # Simulate events with the best-fit gain matrix to correct the bias. - logger.info("Simulating file to correct the bias...") - simulate( - source=Source(Line(energy), DiskBeam(radius=0.15)), - sensor=Sensor(), - readout=simulation_readout, - num_events=num_events, - output_file_path=output) - tmp_input_file = digi_input_file_class("rectangular")(output) - tmp_gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], energy) - # Re-run the gain calibration on the simulated events to calculate the correction factor. - logger.info("Starting the event loop for the simulated file...") - for _, event in tqdm(enumerate(tmp_input_file)): - try: - cluster = clustering.run(event) - except IndexError: - continue - tmp_gain_calibration.analyze_cluster(cluster) - logger.info("Calculating the gain matrix from the simulation...") - tmp_gain_matrix = tmp_gain_calibration.fit() - # Calculate the correction factor from the simulation. - logger.info("Calculating the correction factor...") - mask = tmp_gain_matrix.entries > 0 - # Calculate the residuals between the MC and calibrated gain matrices from the simulation - # and calculate the mean residual to be used as a correction factor. - residuals = (tmp_gain_matrix.values[mask] - gain_sim.values[mask]) / gain_sim.values[mask] - # Exclude the outliers by considering only the values between the 1st and the 99th percentile. - lower_bound, upper_bound = np.percentile(residuals, [1, 99]) - mean_residual = np.mean(residuals[(residuals > lower_bound) & (residuals < upper_bound)]) - # Apply the correction factor to the gain matrix and save it to a HDF5 file. - mask_gain = gain_matrix.entries > 0 - gain_matrix.values[mask_gain] = gain_matrix.values[mask_gain] / (1 + mean_residual) + # gain_sim = CalibrationMatrix(header["num_cols"], header["num_rows"]) + # vals = gain_matrix.values + # lower_bound, upper_bound = np.nanpercentile(vals, [1, 99]) + # gain_sim.set_value(np.mean(vals[(vals > lower_bound) & (vals < upper_bound)])) + # simulation_readout = HexagonalReadoutRectangular(HexagonalLayout(header["layout"]), + # header["num_cols"], header["num_rows"], header["pitch"], + # enc=noise_matrix, gain=gain_sim, pedestal=pedestal_matrix) + # output = HEXSAMPLE_DATA / "_tmp_simulation_bias.h5" + # # Simulate events with the best-fit gain matrix to correct the bias. + # logger.info("Simulating file to correct the bias...") + # simulate( + # source=Source(Line(energy), DiskBeam(radius=0.15)), + # sensor=Sensor(), + # readout=simulation_readout, + # num_events=num_events, + # output_file_path=output) + # tmp_input_file = digi_input_file_class("rectangular")(output) + # tmp_gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], energy) + # # Re-run the gain calibration on the simulated events to calculate the correction factor. + # logger.info("Starting the event loop for the simulated file...") + # for _, event in tqdm(enumerate(tmp_input_file)): + # try: + # cluster = clustering.run(event) + # except IndexError: + # continue + # tmp_gain_calibration.analyze_cluster(cluster) + # logger.info("Calculating the gain matrix from the simulation...") + # tmp_gain_matrix = tmp_gain_calibration.fit() + # # Calculate the correction factor from the simulation. + # logger.info("Calculating the correction factor...") + # mask = tmp_gain_matrix.entries > 0 + # # Calculate the residuals between the MC and calibrated gain matrices from the simulation + # # and calculate the mean residual to be used as a correction factor. + # residuals = (tmp_gain_matrix.values[mask] - gain_sim.values[mask]) / gain_sim.values[mask] + # # Exclude the outliers by considering only the values between the 1st and the 99th percentile. + # lower_bound, upper_bound = np.percentile(residuals, [1, 99]) + # mean_residual = np.mean(residuals[(residuals > lower_bound) & (residuals < upper_bound)]) + # # Apply the correction factor to the gain matrix and save it to a HDF5 file. + # mask_gain = gain_matrix.entries > 0 + # gain_matrix.values[mask_gain] = gain_matrix.values[mask_gain] / (1 + mean_residual) output_file_path = input_file_path.replace(".h5", "_matrix_gain.h5") - logger.info(f"Saving corrected gain matrix to {output_file_path}...") + # logger.info(f"Saving corrected gain matrix to {output_file_path}...") gain_matrix.to_hdf5(output_file_path, CalibrationType.GAIN, False) # Close the input files. - tmp_input_file.close() - os.remove(output) + # tmp_input_file.close() + # os.remove(output) input_file.close() logger.info("Done!") return output_file_path From 6756c6495c632e0d496257e5984dc2f76f3d71d2 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Wed, 6 May 2026 15:10:58 +0200 Subject: [PATCH 02/30] Draft. --- src/hexsample/calibration.py | 109 +++++++++++++++++++++++++---------- src/hexsample/tasks.py | 5 +- 2 files changed, 84 insertions(+), 30 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index c46a1b5..c651655 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -24,6 +24,8 @@ from enum import Enum from typing import Tuple from time import time +from tqdm import tqdm +from itertools import product import h5py import numpy as np @@ -642,30 +644,42 @@ def analyze_cluster(self, cluster: Cluster) -> None: self._event_count += 1 self.cal_matrix.num_events += 1 - def _fit_lh_cluster(self, a, col, row, mean): - t0 = time() - grid = HexagonalGrid() - neigh = grid.neighbors(col, row) - cols, rows = np.array(neigh).T - cols = np.insert(cols, 0, col) - rows = np.insert(rows, 0, row) - idxs = rows * 304 + cols - a_sub = a[:, idxs] - col_0 = a_sub[:, 0] - mask = col_0.nonzero()[0] + def fit_lh_large(self, a, cols, rows, mean): + idxs_sub = (rows * 304 + cols).astype(int) + total_event_sum = np.array(a.sum(axis=1)).flatten() + a_sub = a[:, idxs_sub] + sub_event_sum = np.array(a_sub.sum(axis=1)).flatten() + mask = (sub_event_sum == total_event_sum) & (sub_event_sum > 0) a_sub = a_sub[mask] + # Remove no active pixels + pixel_sum = np.array(a_sub.sum(axis=0)).flatten() + active_mask = pixel_sum > 0 + + active_idxs = np.where(active_mask)[0] + a_final = a_sub[:, active_idxs] + + if a_final.nnz == 0 or a_final.shape[1] == 0: + return None + def nll_mono(pars): - sum_energy = a_sub @ pars + sum_energy = a_final @ pars p = np.exp(- (sum_energy - mean)**2 / (2 * 20 **2)) return -np.sum(np.log(p + 1e-10)) - - m = Minuit(nll_mono, np.ones(7)) + init = np.ones(len(active_idxs)) + m = Minuit(nll_mono, init) + m.errordef = Minuit.LIKELIHOOD m.migrad() - t1 = time() - print(f"Fit time: {t1 - t0} seconds") - return m - + full_region_gain = np.full(len(idxs_sub), np.nan) + full_region_error = np.full(len(idxs_sub), np.nan) + + if m.valid: + full_region_gain[active_idxs] = 1 / np.array(m.values) + full_region_error[active_idxs] = m.errors / np.array(m.values)**2 + + return full_region_gain, full_region_error, idxs_sub + + def fit(self) -> CalibrationMatrix: """Perform the likelihood fit to determine the gain of each pixel. """ @@ -680,17 +694,54 @@ def fit(self) -> CalibrationMatrix: a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape) # Calculate the mean to normalize the results MEAN = np.array(a.sum(axis=1)).flatten().mean() - - # Let's try to analyze a small region around the center - COL0, ROW0 = 152, 176 - m = self._fit_lh_cluster(a, COL0, ROW0, MEAN) - # values = self.cal_matrix.values.copy() - # # values[rows, cols] = 1. / np.array(m.values) - # self.cal_matrix.values = values - - print("Fit results:") - for i in range(7): - print(f"Pixel {i}: Gain = {1/m.values[i]:.2f} +/- {m.errors[i]/m.values[i]**2:.2f}") + + values = self.cal_matrix.values.copy() + errors = self.cal_matrix.errors.copy() + + + NCOLS = 304 + NROWS = 352 + SIZE = 10 + OVERLAP = 2 + STRIDE = SIZE - OVERLAP + col_starts = np.arange(0, NCOLS - OVERLAP, STRIDE) + row_starts = np.arange(0, NROWS - OVERLAP, STRIDE) + + weighted_gains = np.zeros((NROWS, NCOLS)) + sum_weights = np.zeros((NROWS, NCOLS)) + + total_pixels = len(col_starts) * len(row_starts) + with tqdm(total=total_pixels, desc="Processing Pixels") as pbar: + for r_start in row_starts: + r_end = min(r_start + SIZE, NROWS) + actual_h = r_end - r_start + for c_start in col_starts: + c_end = min(c_start + SIZE, NCOLS) + actual_w = c_end - c_start + pbar.update(actual_w * (actual_h if c_start == col_starts[0] else 0)) + cc, rr = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end)) + + cols_block = cc.flatten() + rows_block = rr.flatten() + + results = self.fit_lh_large(a, cols_block, rows_block, MEAN) + if results is not None: + gain, error, _ = results + + weights = 1 / (error**2 + 1e-10) + mask = ~np.isnan(gain) + + np.add.at(weighted_gains, (rows_block[mask], cols_block[mask]), gain[mask] * weights[mask]) + np.add.at(sum_weights, (rows_block[mask], cols_block[mask]), weights[mask]) + + + values = np.divide(weighted_gains, sum_weights, out=np.full_like(weighted_gains, np.nan), where=sum_weights > 0) + errors = np.full_like(weighted_gains, np.nan) + np.sqrt(sum_weights, out=errors, where=sum_weights > 0) + np.divide(1, errors, out=errors, where=sum_weights > 0) + + self.cal_matrix.values = values + self.cal_matrix.errors = errors return self.cal_matrix diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index 79e5dc7..9e072bb 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -958,7 +958,7 @@ def calibview( logger.info(f" {key}: {value}") unit = CALIBRATION_UNITS.get(matrix.metadata["calibration_type"]).value # Calculate the quantiles of the values in the matrix to set the limits for the plots. - rel_error_mask = matrix.errors / matrix.values < rel_error + rel_error_mask = abs(matrix.errors / matrix.values) < rel_error hits_mask = matrix.entries >= min_hits mask = rel_error_mask & hits_mask if not np.any(mask): @@ -996,6 +996,9 @@ def calibview( plt.colorbar(label=mc_unit) # Plot the distribution of the Monte Carlo truth values. mc_edges = np.linspace(np.nanmin(mc_vals), np.nanmax(mc_vals), 100) + # If Monte Carlo distribution is uniform, we need to modify the edges + if mc_edges[0] == mc_edges[-1]: + mc_edges = np.linspace(mc_edges[0] - mc_edges[0]*0.1, mc_edges[-1] + mc_edges[-1]*0.1, 100) mc_vals_hist = Histogram1d(mc_edges, label="MC Distribution", xlabel=mc_unit).fill(mc_vals) plt.figure("Distribution of Monte Carlo truth values") mc_vals_hist.plot(statistics=True) From f9218f13ec42d7be02cfa173960e4149f4eb8482 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Wed, 6 May 2026 15:55:06 +0200 Subject: [PATCH 03/30] Draft. --- src/hexsample/calibration.py | 52 ++++++++++++++---------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index c651655..3c56bdc 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -683,8 +683,6 @@ def nll_mono(pars): def fit(self) -> CalibrationMatrix: """Perform the likelihood fit to determine the gain of each pixel. """ - if self._event_count == 0: - raise ValueError("No events have been analyzed, cannot perform the fit.") # Create the sparse matrix nrows, ncols = self.cal_matrix.shape # Create the sparse matrix for the least squares fit. This object allows to store @@ -698,41 +696,35 @@ def fit(self) -> CalibrationMatrix: values = self.cal_matrix.values.copy() errors = self.cal_matrix.errors.copy() - - NCOLS = 304 - NROWS = 352 + ncols = 304 + nrows = 352 SIZE = 10 OVERLAP = 2 STRIDE = SIZE - OVERLAP - col_starts = np.arange(0, NCOLS - OVERLAP, STRIDE) - row_starts = np.arange(0, NROWS - OVERLAP, STRIDE) + col_starts = np.arange(0, ncols - OVERLAP, STRIDE) + row_starts = np.arange(0, nrows - OVERLAP, STRIDE) - weighted_gains = np.zeros((NROWS, NCOLS)) - sum_weights = np.zeros((NROWS, NCOLS)) + weighted_gains = np.zeros((nrows, ncols)) + sum_weights = np.zeros((nrows, ncols)) - total_pixels = len(col_starts) * len(row_starts) - with tqdm(total=total_pixels, desc="Processing Pixels") as pbar: - for r_start in row_starts: - r_end = min(r_start + SIZE, NROWS) - actual_h = r_end - r_start - for c_start in col_starts: - c_end = min(c_start + SIZE, NCOLS) - actual_w = c_end - c_start - pbar.update(actual_w * (actual_h if c_start == col_starts[0] else 0)) - cc, rr = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end)) + for r_start in row_starts: + r_end = min(r_start + SIZE, nrows) + for c_start in col_starts: + c_end = min(c_start + SIZE, ncols) + cc, rr = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end)) - cols_block = cc.flatten() - rows_block = rr.flatten() + cols_block = cc.flatten() + rows_block = rr.flatten() - results = self.fit_lh_large(a, cols_block, rows_block, MEAN) - if results is not None: - gain, error, _ = results + results = self.fit_lh_large(a, cols_block, rows_block, MEAN) + if results is not None: + gain, error, _ = results - weights = 1 / (error**2 + 1e-10) - mask = ~np.isnan(gain) + weights = 1 / (error**2 + 1e-10) + mask = ~np.isnan(gain) - np.add.at(weighted_gains, (rows_block[mask], cols_block[mask]), gain[mask] * weights[mask]) - np.add.at(sum_weights, (rows_block[mask], cols_block[mask]), weights[mask]) + np.add.at(weighted_gains, (rows_block[mask], cols_block[mask]), gain[mask] * weights[mask]) + np.add.at(sum_weights, (rows_block[mask], cols_block[mask]), weights[mask]) values = np.divide(weighted_gains, sum_weights, out=np.full_like(weighted_gains, np.nan), where=sum_weights > 0) @@ -744,10 +736,6 @@ def fit(self) -> CalibrationMatrix: self.cal_matrix.errors = errors return self.cal_matrix - - - - From 440dcc77cc822cb87b28872b61af2871519bf973 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Thu, 7 May 2026 14:54:53 +0200 Subject: [PATCH 04/30] Draft. --- src/hexsample/calibration.py | 282 ++++++++++++++++++++--------------- src/hexsample/cli.py | 6 +- src/hexsample/pipeline.py | 5 +- src/hexsample/tasks.py | 72 +-------- 4 files changed, 172 insertions(+), 193 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index 3c56bdc..cc988a7 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -22,22 +22,19 @@ import pathlib from enum import Enum -from typing import Tuple -from time import time -from tqdm import tqdm from itertools import product +from typing import Tuple, Optional +from tqdm import tqdm import h5py import numpy as np from aptapy.hist import Histogram3d from scipy.sparse import csr_matrix -from scipy.sparse.linalg import lsmr from iminuit import Minuit -from .hexagon import HexagonalGrid from .clustering import Cluster from .digi import DigiEventRectangular -from .recon import DEFAULT_IONIZATION_POTENTIAL +from .pdf import SpectrumPDF class CalibrationType(str, Enum): @@ -614,16 +611,16 @@ class CalibrateGain(CalibrateBase): The energy of the monochromatic source, in eV. """ - def __init__(self, num_cols: int, num_rows: int, energy: float) -> None: + def __init__(self, num_cols: int, num_rows: int, pdf: SpectrumPDF) -> None: """Class constructor. """ super().__init__(num_cols, num_rows) - self._energy = energy - self._event_count = 0 self._pha = [] self._coords = [] self._event_rows = [] + self._pdf = pdf + self._derivative = pdf.pdf.derivative(1) def analyze_cluster(self, cluster: Cluster) -> None: """Analyze the event cluster to update the calibration matrix. @@ -644,139 +641,180 @@ def analyze_cluster(self, cluster: Cluster) -> None: self._event_count += 1 self.cal_matrix.num_events += 1 - def fit_lh_large(self, a, cols, rows, mean): - idxs_sub = (rows * 304 + cols).astype(int) - total_event_sum = np.array(a.sum(axis=1)).flatten() - a_sub = a[:, idxs_sub] - sub_event_sum = np.array(a_sub.sum(axis=1)).flatten() - mask = (sub_event_sum == total_event_sum) & (sub_event_sum > 0) - a_sub = a_sub[mask] - # Remove no active pixels - pixel_sum = np.array(a_sub.sum(axis=0)).flatten() - active_mask = pixel_sum > 0 + def _cut_data(self, data: csr_matrix, cols: np.ndarray, rows: np.ndarray, + event_sum: np.ndarray) -> Tuple[csr_matrix, np.ndarray]: + """Cut the data to select only the events that have all the charge contained + in the pixels of the subregion defined by the input column and row coordinates. + This is done to ensure that the fit is performed only on events that are fully + contained in the subregion. - active_idxs = np.where(active_mask)[0] - a_final = a_sub[:, active_idxs] + Arguments + --------- + data : csr_matrix + The sparse matrix containing the pha values for the events in the entire detector. + cols : np.ndarray + The column coordinates of the pixels in the subregion. + rows : np.ndarray + The row coordinates of the pixels in the subregion. + event_sum : np.ndarray + The total ADC count for each event. - if a_final.nnz == 0 or a_final.shape[1] == 0: - return None + Returns + ------- + data_active_pixels : csr_matrix + The sparse matrix containing the pha values for the events in the subregion, with + only the active pixels. + mask : np.ndarray + The boolean mask indicating which pixels in the subregion are active. + """ + # Calculate the index of the pixels in the flattened array to select the + # correct columns of the sparse matrix. + pixels_idxs = (rows * self.cal_matrix.shape[1] + cols).astype(int) + # Select all the events but only the columns corresponding to the pixels + # in the subregion defined by the indices. + data_region = data[:, pixels_idxs] + # Select only the events in the subregion where all the charge is contained + # in the pixels of the subregion. Add also another cut to select only the + # events with total charge above 0 ADC, just to be sure to remove empty events. + sub_region_event_sum = np.array(data_region.sum(axis=1)).flatten() + mask = (sub_region_event_sum == event_sum) & (sub_region_event_sum > 0) + data_good_events = data_region[mask] + # We want to remove the pixels in the subregion that never got hit, since + # only pixels with signal can be fitted. This operation ensures that the + # number of fit parameters is equal to the number of active pixels. + pixel_sum = np.array(data_good_events.sum(axis=0)).flatten() + active_mask = pixel_sum > 0 + data_active_pixels = data_good_events[:, active_mask] + return data_active_pixels, active_mask - def nll_mono(pars): - sum_energy = a_final @ pars - p = np.exp(- (sum_energy - mean)**2 / (2 * 20 **2)) + def _likelihood_fit(self, data: csr_matrix, conv_factor: float + ) -> Optional[Tuple[np.ndarray, np.ndarray]]: + """Perform the likelihood fit for a region of the detector. + + Returns + ------- + gain : np.ndarray + The gain values for the pixels in the region, calculated from the fit. + error : np.ndarray + The error on the gain values for the pixels in the region, calculated from the fit. + """ + # Define the negative log-likelihood function for the fit. + def nll(pars): + total_adc = data @ pars + p = self._pdf(total_adc * conv_factor) return -np.sum(np.log(p + 1e-10)) - init = np.ones(len(active_idxs)) - m = Minuit(nll_mono, init) + + def nll_with_grad(pars): + # kept for compatibility/testing: returns (val, grad) for an array input + energy = (data @ pars) * conv_factor + p = self._pdf(energy) + dp = self._derivative(energy) + p_safe = np.maximum(p, 1e-12) + val = -np.sum(np.log(p_safe)) + common_term = -(dp / p_safe) * conv_factor + grad = np.asarray(data.T @ common_term).ravel() + return val, grad + + # Create scalar-callable wrappers that accept positional params as required by + # iminuit. These convert to numpy arrays internally and return the scalar NLL + # and a tuple gradient respectively. + def nll_scalar(*params): + pars = np.asarray(params, dtype=float) + return nll(pars) + + def grad_scalar(*params): + pars = np.asarray(params, dtype=float) + # reuse the analytic gradient computation + _, grad = nll_with_grad(pars) + return tuple(float(g) for g in grad) + + # Define the initial parameters for the fit. + init_pars = np.ones(data.shape[1]) + # Initialize the Minuit minimizer with an explicit gradient callable. + # Pass starting values as positional args (unpack the numpy array). + m = Minuit(nll_scalar, *init_pars.tolist(), grad=grad_scalar) + m.limits = [(1e-10, None) for _ in range(len(init_pars))] m.errordef = Minuit.LIKELIHOOD m.migrad() - - full_region_gain = np.full(len(idxs_sub), np.nan) - full_region_error = np.full(len(idxs_sub), np.nan) - + # If the fit is successful, return the gain values and their errors. if m.valid: - full_region_gain[active_idxs] = 1 / np.array(m.values) - full_region_error[active_idxs] = m.errors / np.array(m.values)**2 - - return full_region_gain, full_region_error, idxs_sub - + gain = 1 / np.array(m.values) + error = m.errors / np.array(m.values)**2 + return gain, error + return None def fit(self) -> CalibrationMatrix: - """Perform the likelihood fit to determine the gain of each pixel. + """Fit the collected events to determine the gain of each pixel. + + Returns + ------- + cal_matrix : CalibrationMatrix + Updated calibration matrix with the gain values calculated from the data. """ - # Create the sparse matrix + # Create a sparse matrix where the rows correspond to the events and the columns + # correspond to the pixels. In each row, the non-zero entries correspond to the + # pha values of the pixels that are hit in the event. nrows, ncols = self.cal_matrix.shape - # Create the sparse matrix for the least squares fit. This object allows to store - # and use efficiently the large and sparse matrix that we need for the fit. shape = (self._event_count, nrows * ncols) - - a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape) - # Calculate the mean to normalize the results - MEAN = np.array(a.sum(axis=1)).flatten().mean() - - values = self.cal_matrix.values.copy() - errors = self.cal_matrix.errors.copy() - - ncols = 304 - nrows = 352 - SIZE = 10 - OVERLAP = 2 - STRIDE = SIZE - OVERLAP - col_starts = np.arange(0, ncols - OVERLAP, STRIDE) - row_starts = np.arange(0, nrows - OVERLAP, STRIDE) - + data = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape) + # Calculate the sum of the ADC counts in each event, which is used for the data + # cuts in the fit, and to calculate the mean ADC count in an event. + event_sum = np.array(data.sum(axis=1)).flatten() + # Calculate the mean of the ADC counts in an event. + conv_factor = self._pdf.loc() / event_sum.mean() + # Define the size of the blocks to fit contemporarily. + size = 10 + # Calculate the starting column and row indices for the blocks. We are using an + # overlap of 2 pixels between the blocks to ensure that pixels on the edges are + # correctly calibrated. + col_starts = np.arange(0, ncols - 2, size - 2) + row_starts = np.arange(0, nrows - 2, size - 2) + block_indices = list(product(row_starts, col_starts)) + # Create the arrays to store the weighted gain values and the sum of the weights + # for each pixel. We will use these arrays to calculate a weighted average of the + # results, since some pixels are fitted multiple times due to the overlap. weighted_gains = np.zeros((nrows, ncols)) sum_weights = np.zeros((nrows, ncols)) - - for r_start in row_starts: - r_end = min(r_start + SIZE, nrows) - for c_start in col_starts: - c_end = min(c_start + SIZE, ncols) - cc, rr = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end)) - - cols_block = cc.flatten() - rows_block = rr.flatten() - - results = self.fit_lh_large(a, cols_block, rows_block, MEAN) - if results is not None: - gain, error, _ = results - - weights = 1 / (error**2 + 1e-10) - mask = ~np.isnan(gain) - - np.add.at(weighted_gains, (rows_block[mask], cols_block[mask]), gain[mask] * weights[mask]) - np.add.at(sum_weights, (rows_block[mask], cols_block[mask]), weights[mask]) - - - values = np.divide(weighted_gains, sum_weights, out=np.full_like(weighted_gains, np.nan), where=sum_weights > 0) + # Start the loop over the blocks + pbar = tqdm(block_indices, desc="Fitting chip subregions", miniters=5) + for r_start, c_start in pbar: + # Calculate the end row and column indices, taking into account the possibility of being at + # the end of the chip. + r_end = min(r_start + size, nrows) + c_end = min(c_start + size, ncols) + # Create a meshgrid to get all the coordinates of the pixels in the block. + cols, rows = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end)) + cols = cols.flatten() + rows = rows.flatten() + # Cut the data to select only the events that have signal in the block. + block_data, mask = self._cut_data(data, cols, rows, event_sum) + # Check on the data: if there are no good events or no active pixels, + # we cannot perform the fit, so we return None. + if block_data.nnz == 0 or block_data.shape[1] == 0: + continue + # Fit the data for the block. + results = self._likelihood_fit(block_data, conv_factor) + # If the fit is not successful, continue to the next block. + if results is None: + continue + gain, error = results + weights = 1 / (error**2 + 1e-10) + # Use the mask of the active pixels to update the weighted gains and the sum + # of the weights matrices for the pixels in the block that have been fitted. + np.add.at(weighted_gains, (rows[mask], cols[mask]), gain * weights) + np.add.at(sum_weights, (rows[mask], cols[mask]), weights) + pbar.close() + # Calculate the final values and errors as the weighted average of the fit results. + values = np.divide(weighted_gains, sum_weights, + out=np.full_like(weighted_gains, np.nan), + where=sum_weights > 0) errors = np.full_like(weighted_gains, np.nan) np.sqrt(sum_weights, out=errors, where=sum_weights > 0) np.divide(1, errors, out=errors, where=sum_weights > 0) - + # Write the results back to the calibration matrix. self.cal_matrix.values = values self.cal_matrix.errors = errors - return self.cal_matrix - - - - - # def fit(self) -> CalibrationMatrix: - # """Perform the least squares fit to determine the gain of each pixel. - # """ - # if self._event_count == 0: - # raise ValueError("No events have been analyzed, cannot perform the fit.") - # nrows, ncols = self.cal_matrix.shape - # # Create the sparse matrix for the least squares fit. This object allows to store - # # and use efficiently the large and sparse matrix that we need for the fit. - # shape = (self._event_count, nrows * ncols) - # a = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape) - # # Create the vector of the expected number of electrons. - # b = np.full(self._event_count, self._energy / DEFAULT_IONIZATION_POTENTIAL) - # # Perform the fit - # results = lsmr(a, b) - # # Get the best-fit weight vector and reshape it to the shape of the calibration matrix. - # weight = results[0].reshape((nrows, ncols)) - # signal_power = np.array(a.multiply(a).sum(axis=0)).reshape((nrows, ncols)) - # # Calculate the number of degrees of freedom and the mean squared error. - # dof = self._event_count - (ncols * nrows) - # mse = results[3]**2 / max(dof, 1) - # # Calculate the uncertainty of the gain best-fit values. - # sigma_w = np.sqrt(mse / (signal_power + 1e-15)) - # with np.errstate(divide='ignore', invalid='ignore'): - # sigma_g_rel = sigma_w / np.abs(weight) - # # Mask for the pixels that have a weight value close to zero (no events) - # mask = np.abs(weight) > 1e-10 - # values = self.cal_matrix.values.copy() - # entries = self.cal_matrix.entries - # # Set the gain value for the pixels that pass the quality cut. - # values[mask] = 1 / weight[mask] - # # Set the entries to zero for the pixels that don't pass the quality cut. - # entries[~mask] = 0 - # # Write back through the setter so updates persist on the shared object. - # self.cal_matrix.values = values - # self.cal_matrix.errors = np.where(mask, sigma_g_rel * values, self.cal_matrix.errors) - # self.cal_matrix.entries = entries - # return self.cal_matrix class CalibrateENC: diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py index 9061df3..17dd143 100644 --- a/src/hexsample/cli.py +++ b/src/hexsample/cli.py @@ -29,6 +29,7 @@ calibration, hexagon, logging_, + pdf, pipeline, readout, roi, @@ -435,13 +436,12 @@ def add_calibrate_gain_options(self, parser: argparse.ArgumentParser) -> None: """Add an option group for the gain calibration properties. """ defaults = tasks.CalibrationGainDefaults - parser.add_argument("energy", type=float, help="line energy in eV") + parser.add_argument("pdf", type=pdf.SpectrumPDF.from_file, + help="path to the spectrum PDF file") CliArgumentParser.add_cal_noise_file(parser, default=None, required=True) CliArgumentParser.add_cal_pedestal_file(parser, default=None, required=True) CliArgumentParser.add_zero_sup_threshold(parser, default=defaults.zero_sup_threshold) - CliArgumentParser.add_num_events(parser, default=defaults.num_events, - intent="used for the gain calibration") def add_calibrate_eta_options(self, parser: argparse.ArgumentParser) -> None: """Add an option group for the eta function calibration properties. diff --git a/src/hexsample/pipeline.py b/src/hexsample/pipeline.py index 5181a18..f2cf0a5 100644 --- a/src/hexsample/pipeline.py +++ b/src/hexsample/pipeline.py @@ -115,13 +115,12 @@ def calibrate_gain(**kwargs) -> str: """Calibrate the gain of the chip. """ input_file_path = kwargs["input_file"] - energy = kwargs["energy"] - num_events = kwargs.get("num_events", tasks.CalibrationGainDefaults.num_events) + pdf = kwargs["pdf"] noise_matrix = kwargs["noise"] pedestal_matrix = kwargs["pedestal"] zero_sup_threshold = kwargs.get("zero_sup_threshold", tasks.CalibrationGainDefaults.zero_sup_threshold) - args = input_file_path, energy, noise_matrix, pedestal_matrix, num_events, zero_sup_threshold + args = input_file_path, pdf, noise_matrix, pedestal_matrix, zero_sup_threshold return tasks.calibrate_gain(*args) diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index 9e072bb..291d3bf 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -21,7 +21,6 @@ """ import inspect -import os import pathlib from dataclasses import dataclass from enum import Enum @@ -70,9 +69,10 @@ HexagonalReadoutMode, HexagonalReadoutRectangular, ) +from .pdf import SpectrumPDF from .recon import ReconEvent from .sensor import Sensor -from .source import DiskBeam, Line, Source +from .source import Source from .xpol import chip_descriptor # Make room for the output data. @@ -654,16 +654,14 @@ class CalibrationGainDefaults: definition in this Python module and the command-line interface. """ - num_events: int = 200000 zero_sup_threshold: int = 20 def calibrate_gain( input_file_path: str, - energy: float, + pdf: SpectrumPDF, noise_matrix: CalibrationMatrix, pedestal_matrix: CalibrationMatrix, - num_events: int = CalibrationGainDefaults.num_events, zero_sup_threshold: int = CalibrationGainDefaults.zero_sup_threshold ) -> str: """Calibrate gain of the readout chip using the events from a digi file. @@ -674,9 +672,8 @@ def calibrate_gain( input_file_path : str The path to the input file. - energy : float - The energy of the X-ray photons in eV. This is used to convert the charge collected in - each pixel to the number of electron, which is necessary for the gain calibration. + pdf : SpectrumPDF + The spectrum probability density function to use for the gain calibration. noise_matrix : CalibrationMatrix The calibration noise matrix to use for the gain calibration. @@ -684,9 +681,6 @@ def calibrate_gain( pedestal_matrix : CalibrationMatrix The pedestal matrix to use for the gain calibration. - num_events : int - The number of events to simulate to correct the bias. - zero_sup_threshold : int The zero-suppression threshold to use for the clustering in the gain calibration. """ @@ -700,7 +694,7 @@ def calibrate_gain( header["pitch"], noise_matrix, unit_gain_map, pedestal_matrix readout = create_readout(readout_mode, header, *args) # Initialize the gain matrix and run the calibration. - gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], energy) + gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], pdf) clustering = ClusteringNN(readout, zero_sup_threshold=zero_sup_threshold, num_neighbors=6, pos_recon_algorithm="centroid") logger.info("Starting the event loop...") @@ -712,61 +706,9 @@ def calibrate_gain( gain_calibration.analyze_cluster(cluster) logger.info("Calculating the gain matrix...") gain_matrix = gain_calibration.fit() - if not np.any(gain_matrix.entries > 0): - raise RuntimeError("No valid gain values found during the first step of calibration," \ - "cannot proceed further. The possible reason could be a small number of events over " \ - "the analyzed chip region.") - # Create the readout object for the simulation. We are using rectangular readout just because - # it's faster to simulate, and using a uniform gain matrix with the mean value of the first - # calibration to correct the bias in the gain matrix. - # To calculate the mean value, we are excluding the outliers by considering only the values - # between the 1st and the 99th percentile. - # gain_sim = CalibrationMatrix(header["num_cols"], header["num_rows"]) - # vals = gain_matrix.values - # lower_bound, upper_bound = np.nanpercentile(vals, [1, 99]) - # gain_sim.set_value(np.mean(vals[(vals > lower_bound) & (vals < upper_bound)])) - # simulation_readout = HexagonalReadoutRectangular(HexagonalLayout(header["layout"]), - # header["num_cols"], header["num_rows"], header["pitch"], - # enc=noise_matrix, gain=gain_sim, pedestal=pedestal_matrix) - # output = HEXSAMPLE_DATA / "_tmp_simulation_bias.h5" - # # Simulate events with the best-fit gain matrix to correct the bias. - # logger.info("Simulating file to correct the bias...") - # simulate( - # source=Source(Line(energy), DiskBeam(radius=0.15)), - # sensor=Sensor(), - # readout=simulation_readout, - # num_events=num_events, - # output_file_path=output) - # tmp_input_file = digi_input_file_class("rectangular")(output) - # tmp_gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], energy) - # # Re-run the gain calibration on the simulated events to calculate the correction factor. - # logger.info("Starting the event loop for the simulated file...") - # for _, event in tqdm(enumerate(tmp_input_file)): - # try: - # cluster = clustering.run(event) - # except IndexError: - # continue - # tmp_gain_calibration.analyze_cluster(cluster) - # logger.info("Calculating the gain matrix from the simulation...") - # tmp_gain_matrix = tmp_gain_calibration.fit() - # # Calculate the correction factor from the simulation. - # logger.info("Calculating the correction factor...") - # mask = tmp_gain_matrix.entries > 0 - # # Calculate the residuals between the MC and calibrated gain matrices from the simulation - # # and calculate the mean residual to be used as a correction factor. - # residuals = (tmp_gain_matrix.values[mask] - gain_sim.values[mask]) / gain_sim.values[mask] - # # Exclude the outliers by considering only the values between the 1st and the 99th percentile. - # lower_bound, upper_bound = np.percentile(residuals, [1, 99]) - # mean_residual = np.mean(residuals[(residuals > lower_bound) & (residuals < upper_bound)]) - # # Apply the correction factor to the gain matrix and save it to a HDF5 file. - # mask_gain = gain_matrix.entries > 0 - # gain_matrix.values[mask_gain] = gain_matrix.values[mask_gain] / (1 + mean_residual) output_file_path = input_file_path.replace(".h5", "_matrix_gain.h5") - # logger.info(f"Saving corrected gain matrix to {output_file_path}...") + logger.info(f"Saving gain matrix to {output_file_path}...") gain_matrix.to_hdf5(output_file_path, CalibrationType.GAIN, False) - # Close the input files. - # tmp_input_file.close() - # os.remove(output) input_file.close() logger.info("Done!") return output_file_path From 4814bdee557ace646e8035c5672fd18558dce247 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Thu, 7 May 2026 17:06:43 +0200 Subject: [PATCH 05/30] New module pdf (with tests) to interpolate spectra. --- src/hexsample/pdf.py | 102 +++++++++++++++++++++++++++++++++++++++++++ tests/test_pdf.py | 62 ++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 src/hexsample/pdf.py create mode 100644 tests/test_pdf.py diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py new file mode 100644 index 0000000..fc89282 --- /dev/null +++ b/src/hexsample/pdf.py @@ -0,0 +1,102 @@ +# Copyright (C) 2023--2026 the hexsample team. +# +# For the license terms see the file LICENSE, distributed along with this +# software. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +"""Facilities to create and use probability density functions from spectra.""" + +import numpy as np +from scipy.interpolate import CubicSpline +from scipy.stats import gaussian_kde + + +class SpectrumPDF: + + """Create a probability density function from a spectrum. + + This class uses Gaussian kernel density estimation to create a smooth normalized + probability density function (PDF) from a set of values. The PDF is then interpolated + to allow for efficient evaluation. + """ + + def __init__(self) -> None: + """Class constructor + """ + self.pdf = None + + def fit(self, vals: np.ndarray) -> None: + """Fit the data to create the PDF. + + Arguments + --------- + vals : np.ndarray + The values to fit the PDF from. + """ + # Use Gaussian kernel density estimation to create a smooth PDF. + kde = gaussian_kde(vals) + # Create a grid of points for interpolation. We need to ensure that the grid exceeds + # the range of the data to avoid issues with extrapolation. + x_grid = np.linspace(min(vals)*0.9, max(vals)*1.1, 1000) + y_grid = kde(x_grid) + self.pdf = CubicSpline(x_grid, y_grid, extrapolate=True) + + def mean(self) -> float: + """Return the mean of the PDF. + """ + if self.pdf is None: + raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.") + # The location of the PDF can be estimated as the mean of the distribution. + x_grid = np.linspace(min(self.pdf.x), max(self.pdf.x), 1000) + y_grid = self.pdf(x_grid) + area = np.trapezoid(y_grid, x_grid) + normalized_pdf = y_grid / area + mean = np.sum(x_grid * normalized_pdf) / np.sum(normalized_pdf) + return mean + + def to_file(self, file_path: str) -> str: + """Save the PDF to a file as a numpy archive. + + Arguments + --------- + file_path : str + The path to the file where the PDF will be saved. + """ + np.savez(file_path, x=self.pdf.x, c=self.pdf.c) + return file_path + + @classmethod + def from_file(cls, file_path: str) -> "SpectrumPDF": + """Load the PDF from a numpy archive. + + Arguments + --------- + file_path : str + The path to the file from which the PDF will be loaded. + """ + if not file_path.endswith(".npz"): + raise ValueError("The file must be a numpy archive with .npz extension.") + data = np.load(file_path) + instance = cls() + instance.pdf = CubicSpline.construct_fast(data["c"], data["x"], extrapolate=True) + return instance + + def __call__(self, x: np.ndarray) -> np.ndarray: + """Evaluate the PDF at the given points. + """ + if self.pdf is None: + raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.") + return self.pdf(x) diff --git a/tests/test_pdf.py b/tests/test_pdf.py new file mode 100644 index 0000000..3f7217e --- /dev/null +++ b/tests/test_pdf.py @@ -0,0 +1,62 @@ +# Copyright (C) 2023--2026 the hexsample team. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +"""Test suite for hexsample.pdf +""" + +import numpy as np +from aptapy.plotting import plt + +from hexsample import rng +from hexsample.pdf import SpectrumPDF + +rng.initialize() + +def test_unimodal_spectrum_pdf(size: int = 10000): + """Test the SpectrumPDF class with a unimodal distribution. + """ + # Create a unimodal distribution. + vals = rng.generator.normal(loc=1.0, scale=0.1, size=size) + # Create the PDF and fit it to the data. + pdf = SpectrumPDF() + pdf.fit(vals) + # Check that the mean of the PDF is close to the mean of the data. + assert np.isclose(pdf.mean(), vals.mean(), atol=0.01) + # Plot the pdf + plt.figure("Unimodal distribution") + xx = np.linspace(vals.min()*0.8, vals.max()*1.2, 1000) + plt.plot(xx, pdf(xx), label="PDF") + plt.hist(vals, bins=50, density=True, alpha=0.5, label="Data") + plt.legend() + + +def test_bimodal_spectrum_pdf(size: int = 100000): + """Test the SpectrumPDF class with a bimodal distribution. + """ + # Extract values from a bimodal distribution. + vals = rng.generator.choice((1., 2.), p=[0.7, 0.3], size=size) + # Add some noise + vals += rng.generator.normal(scale=0.2, size=size) + # Create the PDF and fit it to the data. + pdf = SpectrumPDF() + pdf.fit(vals) + # Check that the mean of the PDF is close to the mean of the data. + assert np.isclose(pdf.mean(), vals.mean(), atol=0.01) + # Plot the pdf + plt.figure("Bimodal distribution") + xx = np.linspace(vals.min()*0.8, vals.max()*1.2, 1000) + plt.plot(xx, pdf(xx), label="PDF") + plt.hist(vals, bins=50, density=True, alpha=0.5, label="Data") + plt.legend() From 5f26af17c748e898fc218bd737e0d1f7ceaa4ebd Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Thu, 7 May 2026 18:22:55 +0200 Subject: [PATCH 06/30] New calibspec command added. --- src/hexsample/cli.py | 8 ++++++++ src/hexsample/pipeline.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py index 17dd143..d556f3a 100644 --- a/src/hexsample/cli.py +++ b/src/hexsample/cli.py @@ -154,6 +154,14 @@ def __init__(self) -> None: self.add_logging_level(synthesize) synthesize.set_defaults(runner=pipeline.synthesize_calibration_file) + # Fit a spectrum to generate a PDF for the gain calibration? + calibspec = subparsers.add_parser("calibspec", + help="generate a pdf from a spectrum to use in the gain calibration", + formatter_class=self._FORMATTER_CLASS) + self.add_input_file(calibspec) + self.add_logging_level(calibspec) + calibspec.set_defaults(runner=pipeline.calibspec) + # Inspect a calibration matrix? calibview = subparsers.add_parser("calibview", help="inspect a calibration matrix", diff --git a/src/hexsample/pipeline.py b/src/hexsample/pipeline.py index f2cf0a5..3a5a11f 100644 --- a/src/hexsample/pipeline.py +++ b/src/hexsample/pipeline.py @@ -69,6 +69,14 @@ def reconstruct(**kwargs) -> str: return tasks.reconstruct(*args, kwargs) +def calibspec(**kwargs) -> str: + """Create a probability density function from a reconstructed spectrum + to use in the gain calibration. + """ + input_file_path = kwargs["input_file"] + return tasks.calibspec(input_file_path) + + def calibrate_eta(**kwargs) -> None: """Calibrate the eta function using the events from a digi file. """ From aef6a7e125f52a333b18527e7455b95fad2a85a8 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Thu, 7 May 2026 18:23:15 +0200 Subject: [PATCH 07/30] New calibspec command added. --- src/hexsample/tasks.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index 291d3bf..ae73c35 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -62,6 +62,7 @@ from .hexagon import HexagonalGrid, HexagonalLayout from .logging_ import logger from .mc import PhotonList +from .pdf import SpectrumPDF from .readout import ( AbstractReadout, HexagonalReadoutBase, @@ -69,7 +70,6 @@ HexagonalReadoutMode, HexagonalReadoutRectangular, ) -from .pdf import SpectrumPDF from .recon import ReconEvent from .sensor import Sensor from .source import Source @@ -358,6 +358,31 @@ def reconstruct( return output_file_path +def calibspec( + input_file_path: str + ) -> str: + """Create a probability density function from a reconstructed spectrum to use in + the gain calibration. + + Arguments + ---------- + input_file_path : str + The path to the input recon file. + """ + name, args = current_call(1) + logger.info(f"Running {__name__}.{name} with arguments {args}...") + input_file = ReconInputFile(input_file_path) + energy = input_file.column("energy") + input_file.close() + pdf = SpectrumPDF() + logger.info("Fitting the PDF to the energy distribution...") + pdf.fit(energy) + output_file_path = input_file_path.replace(".h5", ".npz") + logger.info(f"Saving the PDF to {output_file_path}...") + pdf.to_file(output_file_path) + return output_file_path + + @dataclass(frozen=True) class CalibrationEtaDefaults: @@ -803,7 +828,7 @@ def quicklook(input_file_path: str) -> None: The path to the input recon file. """ # Open the input file - name, args = current_call() + name, args = current_call(1) logger.info(f"Running {__name__}.{name} with arguments {args}...") input_file = ReconInputFile(input_file_path) # Plotting the reconstructed energy and the true energy @@ -890,7 +915,7 @@ def calibview( The upper quantile of the values in the matrix to be included in the statistics. """ # pylint: disable=too-many-statements - name, args = current_call() + name, args = current_call(1) logger.info(f"Running {__name__}.{name} with arguments {args}...") logger.info("Matrix metadata:") # Log the metadata of the matrix. @@ -940,7 +965,8 @@ def calibview( mc_edges = np.linspace(np.nanmin(mc_vals), np.nanmax(mc_vals), 100) # If Monte Carlo distribution is uniform, we need to modify the edges if mc_edges[0] == mc_edges[-1]: - mc_edges = np.linspace(mc_edges[0] - mc_edges[0]*0.1, mc_edges[-1] + mc_edges[-1]*0.1, 100) + val = mc_edges[0] + mc_edges = np.linspace(val*0.9, val*1.1, 100) mc_vals_hist = Histogram1d(mc_edges, label="MC Distribution", xlabel=mc_unit).fill(mc_vals) plt.figure("Distribution of Monte Carlo truth values") mc_vals_hist.plot(statistics=True) From d2eb7367519e6670fc620880879f831f9017f1a8 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Thu, 7 May 2026 18:34:31 +0200 Subject: [PATCH 08/30] Minor. --- src/hexsample/pdf.py | 14 ++++++++++++++ tests/test_pdf.py | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py index fc89282..bee5baa 100644 --- a/src/hexsample/pdf.py +++ b/src/hexsample/pdf.py @@ -93,6 +93,20 @@ def from_file(cls, file_path: str) -> "SpectrumPDF": instance = cls() instance.pdf = CubicSpline.construct_fast(data["c"], data["x"], extrapolate=True) return instance + + def derivative(self, x: np.ndarray, order: int = 1) -> np.ndarray: + """Evaluate the derivative of the PDF at the given points. + + Arguments + --------- + x : np.ndarray + The points at which to evaluate the derivative. + order : int, optional + The order of the derivative to evaluate (default is 1). + """ + if self.pdf is None: + raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.") + return self.pdf(x, nu=order) def __call__(self, x: np.ndarray) -> np.ndarray: """Evaluate the PDF at the given points. diff --git a/tests/test_pdf.py b/tests/test_pdf.py index 3f7217e..b6a12a6 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -60,3 +60,18 @@ def test_bimodal_spectrum_pdf(size: int = 100000): plt.plot(xx, pdf(xx), label="PDF") plt.hist(vals, bins=50, density=True, alpha=0.5, label="Data") plt.legend() + +def test_derivative(size: int = 100000): + """Test the derivative of the PDF. + """ + # Create a unimodal distribution. + vals = rng.generator.normal(loc=1.0, scale=0.1, size=size) + # Create the PDF and fit it to the data. + pdf = SpectrumPDF() + pdf.fit(vals) + derivative = pdf.derivative + # Check that the derivative of the PDF is close to zero at the mean of the data. + plt.figure("Derivative of a gaussian PDF") + xx = np.linspace(vals.min()*0.8, vals.max()*1.2, 10000) + plt.plot(xx, derivative(xx)) + plt.legend() From e1d0fc949bb3318adde65de2626f4aa1ac2cc7be Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Thu, 7 May 2026 18:48:09 +0200 Subject: [PATCH 09/30] Minor. --- pyproject.toml | 1 + src/hexsample/calibration.py | 53 ++++++++++++------------------------ 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 57db12f..4ae94c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "xraydb", "aptapy>=0.18.0", "scikit-image", + "iminuit", ] [project.optional-dependencies] diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index cc988a7..dbaec59 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -23,14 +23,14 @@ import pathlib from enum import Enum from itertools import product -from typing import Tuple, Optional -from tqdm import tqdm +from typing import Optional, Tuple import h5py import numpy as np from aptapy.hist import Histogram3d -from scipy.sparse import csr_matrix from iminuit import Minuit +from scipy.sparse import csr_matrix +from tqdm import tqdm from .clustering import Cluster from .digi import DigiEventRectangular @@ -620,7 +620,7 @@ def __init__(self, num_cols: int, num_rows: int, pdf: SpectrumPDF) -> None: self._coords = [] self._event_rows = [] self._pdf = pdf - self._derivative = pdf.pdf.derivative(1) + self._pdf_derivative = pdf.derivative def analyze_cluster(self, cluster: Cluster) -> None: """Analyze the event cluster to update the calibration matrix. @@ -703,36 +703,17 @@ def nll(pars): total_adc = data @ pars p = self._pdf(total_adc * conv_factor) return -np.sum(np.log(p + 1e-10)) - - def nll_with_grad(pars): - # kept for compatibility/testing: returns (val, grad) for an array input - energy = (data @ pars) * conv_factor - p = self._pdf(energy) - dp = self._derivative(energy) - p_safe = np.maximum(p, 1e-12) - val = -np.sum(np.log(p_safe)) - common_term = -(dp / p_safe) * conv_factor - grad = np.asarray(data.T @ common_term).ravel() - return val, grad - - # Create scalar-callable wrappers that accept positional params as required by - # iminuit. These convert to numpy arrays internally and return the scalar NLL - # and a tuple gradient respectively. - def nll_scalar(*params): - pars = np.asarray(params, dtype=float) - return nll(pars) - - def grad_scalar(*params): - pars = np.asarray(params, dtype=float) - # reuse the analytic gradient computation - _, grad = nll_with_grad(pars) - return tuple(float(g) for g in grad) - + # Define the gradient of the log-likelihood function for the fit. + def nll_grad(pars): + total_adc = data @ pars + p = self._pdf(total_adc * conv_factor) + dp = self._pdf_derivative(total_adc * conv_factor) + grad = -data.T @ (dp / (p + 1e-10)) * conv_factor + return np.asarray(grad).flatten() # Define the initial parameters for the fit. init_pars = np.ones(data.shape[1]) - # Initialize the Minuit minimizer with an explicit gradient callable. - # Pass starting values as positional args (unpack the numpy array). - m = Minuit(nll_scalar, *init_pars.tolist(), grad=grad_scalar) + # Initialize the Minuit minimizer. + m = Minuit(nll, init_pars, grad=nll_grad) m.limits = [(1e-10, None) for _ in range(len(init_pars))] m.errordef = Minuit.LIKELIHOOD m.migrad() @@ -753,7 +734,7 @@ def fit(self) -> CalibrationMatrix: """ # Create a sparse matrix where the rows correspond to the events and the columns # correspond to the pixels. In each row, the non-zero entries correspond to the - # pha values of the pixels that are hit in the event. + # pha values of the pixels that are hit in the event. nrows, ncols = self.cal_matrix.shape shape = (self._event_count, nrows * ncols) data = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape) @@ -761,7 +742,7 @@ def fit(self) -> CalibrationMatrix: # cuts in the fit, and to calculate the mean ADC count in an event. event_sum = np.array(data.sum(axis=1)).flatten() # Calculate the mean of the ADC counts in an event. - conv_factor = self._pdf.loc() / event_sum.mean() + conv_factor = self._pdf.mean() / event_sum.mean() # Define the size of the blocks to fit contemporarily. size = 10 # Calculate the starting column and row indices for the blocks. We are using an @@ -778,8 +759,8 @@ def fit(self) -> CalibrationMatrix: # Start the loop over the blocks pbar = tqdm(block_indices, desc="Fitting chip subregions", miniters=5) for r_start, c_start in pbar: - # Calculate the end row and column indices, taking into account the possibility of being at - # the end of the chip. + # Calculate the end row and column indices, taking into account the possibility + # of being at the end of the chip. r_end = min(r_start + size, nrows) c_end = min(c_start + size, ncols) # Create a meshgrid to get all the coordinates of the pixels in the block. From dc8b1649abbd70389f071e0da019b62ee18a7bc7 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Fri, 8 May 2026 09:50:27 +0200 Subject: [PATCH 10/30] Added plot method. --- src/hexsample/pdf.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py index bee5baa..7a08cb3 100644 --- a/src/hexsample/pdf.py +++ b/src/hexsample/pdf.py @@ -19,7 +19,10 @@ """Facilities to create and use probability density functions from spectra.""" +from typing import Tuple + import numpy as np +from aptapy.plotting import plt from scipy.interpolate import CubicSpline from scipy.stats import gaussian_kde @@ -108,6 +111,27 @@ def derivative(self, x: np.ndarray, order: int = 1) -> np.ndarray: raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.") return self.pdf(x, nu=order) + def plot(self, xlim: Tuple[float, float] = None, **kwargs) -> None: + """Plot the PDF in a given range. + + Arguments + --------- + xlim : tuple[float, float], optional + The range of x values to plot (default is None, which means the fitting range + of the PDF). + """ + plt.figure(kwargs.get("figname", "spectrum_pdf")) + if xlim is None: + xlim = (min(self.pdf.x), max(self.pdf.x)) + x_grid = np.linspace(xlim[0], xlim[1], 1000) + y_grid = self.pdf(x_grid) + plt.plot(x_grid, y_grid, **kwargs) + plt.xlabel(kwargs.get("xlabel", "Energy [eV]")) + plt.ylabel(kwargs.get("ylabel", "PDF")) + plt.xlim(xlim) + plt.grid(visible=True, linestyle="--", alpha=0.5, color="gray") + plt.tight_layout() + def __call__(self, x: np.ndarray) -> np.ndarray: """Evaluate the PDF at the given points. """ From c70248e0b6f13d6ab3ff6aab58b0b338c4266024 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Fri, 8 May 2026 11:24:30 +0200 Subject: [PATCH 11/30] Minor. --- src/hexsample/clustering.py | 2 ++ src/hexsample/tasks.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py index 9fe2eda..268d1e1 100644 --- a/src/hexsample/clustering.py +++ b/src/hexsample/clustering.py @@ -323,6 +323,8 @@ def run(self, event) -> Cluster: # pylint: disable = invalid-name elif isinstance(event, DigiEventRectangular): seed_col, seed_row = event.highest_pixel() + if readout.is_at_border(seed_col, seed_row): + return None col = [seed_col] row = [seed_row] for _col, _row in readout.neighbors(seed_col, seed_row): diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index ae73c35..d8265d9 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -728,6 +728,8 @@ def calibrate_gain( cluster = clustering.run(event) except IndexError: continue + if cluster is None: + continue gain_calibration.analyze_cluster(cluster) logger.info("Calculating the gain matrix...") gain_matrix = gain_calibration.fit() From 98162722dbf49c38dcc22d5893b247e61aa34d8e Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Fri, 8 May 2026 15:50:41 +0200 Subject: [PATCH 12/30] Parallelization implemented for gain calibration. --- src/hexsample/calibration.py | 277 +++++++++++++++++++++-------------- 1 file changed, 163 insertions(+), 114 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index dbaec59..4037495 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -20,6 +20,7 @@ """Calibration facilities. """ +import gc import pathlib from enum import Enum from itertools import product @@ -29,7 +30,8 @@ import numpy as np from aptapy.hist import Histogram3d from iminuit import Minuit -from scipy.sparse import csr_matrix +from joblib import Parallel, delayed +from scipy.sparse import csc_matrix, csr_matrix from tqdm import tqdm from .clustering import Cluster @@ -595,6 +597,129 @@ def fit(self) -> Tuple[CalibrationMatrix, CalibrationMatrix]: return self.noise_cal, self.pedestal_cal +def _likelihood_fit(data: csr_matrix, conv_factor: float, pdf: SpectrumPDF, + pdf_derivative: callable) -> Optional[Tuple[np.ndarray, np.ndarray]]: + """Perform the likelihood fit for a region of the detector. + + Returns + ------- + gain : np.ndarray + The gain values for the pixels in the region, calculated from the fit. + error : np.ndarray + The error on the gain values for the pixels in the region, calculated from the fit. + """ + # Define the negative log-likelihood function for the fit. + def nll(pars): + total_adc = data @ pars + p = pdf(total_adc * conv_factor) + return -np.sum(np.log(p + 1e-10)) + # Define the gradient of the log-likelihood function for the fit. + def nll_grad(pars): + total_adc = data @ pars + p = pdf(total_adc * conv_factor) + dp = pdf_derivative(total_adc * conv_factor) + grad = -data.T @ (dp / (p + 1e-10)) * conv_factor + return np.asarray(grad).flatten() + # Define the initial parameters for the fit. + init_pars = np.ones(data.shape[1]) + # Initialize the Minuit minimizer. + m = Minuit(nll, init_pars, grad=nll_grad) + m.limits = [(1e-10, None) for _ in range(len(init_pars))] + m.errordef = Minuit.LIKELIHOOD + m.migrad() + # If the fit is successful, return the gain values and their errors, otherwise + # return None. + if m.valid: + gain = 1 / np.array(m.values) + error = m.errors / np.array(m.values)**2 + return gain, error + return None + + +def _cut_data(data: csc_matrix, ncols: int, cols: np.ndarray, rows: np.ndarray, + event_sum: np.ndarray) -> Tuple[csr_matrix, np.ndarray]: + """Cut the data to select only the events that have all the charge contained + in the pixels of the subregion defined by the input column and row coordinates. + This is done to ensure that the fit is performed only on events that are fully + contained in the subregion. + + Arguments + --------- + data : csr_matrix + The sparse matrix containing the pha values for the events in the entire detector. + cols : np.ndarray + The column coordinates of the pixels in the subregion. + rows : np.ndarray + The row coordinates of the pixels in the subregion. + event_sum : np.ndarray + The total ADC count for each event. + + Returns + ------- + data_active_pixels : csr_matrix + The sparse matrix containing the pha values for the events in the subregion, with + only the active pixels. + mask : np.ndarray + The boolean mask indicating which pixels in the subregion are active. + """ + # Calculate the index of the pixels in the flattened array to select the + # correct columns of the sparse matrix. + pixels_idxs = (rows * ncols + cols).astype(int) + # Select all the events but only the columns corresponding to the pixels + # in the subregion defined by the indices. + data_region = data[:, pixels_idxs] + # Re-convert to CSR format for efficient row slicing. + data_region = data_region.tocsr() + # Select only the events in the subregion where all the charge is contained + # in the pixels of the subregion. Add also another cut to select only the + # events with total charge above 0 ADC, just to be sure to remove empty events. + sub_region_event_sum = data_region.sum(axis=1).A1 + mask = (sub_region_event_sum == event_sum) & (sub_region_event_sum > 0) + data_good_events = data_region[mask] + # We want to remove the pixels in the subregion that never got hit, since + # only pixels with signal can be fitted. This operation ensures that the + # number of fit parameters is equal to the number of active pixels. + pixel_sum = data_good_events.sum(axis=0).A1 + active_mask = pixel_sum > 0 + data_active_pixels = data_good_events[:, active_mask] + return data_active_pixels, active_mask + + +def worker_fit_block(r_start: int, c_start: int, size: int, nrows: int, ncols: int, + data: csc_matrix, event_sum: np.ndarray, conv_factor: float, + pdf: SpectrumPDF, pdf_derivative: callable + ) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]: + """Method to perform the fit for a block of pixels in the detector. + This method is meant to be used as a function to be executed in parallel for different + blocks of pixels. + + .. note:: This function must be defined outside the class to be picklable and usable + in parallel execution. + """ + # Calculate the end row and column indices, taking into account the possibility + # of being at the end of the chip. + r_end = min(r_start + size, nrows) + c_end = min(c_start + size, ncols) + # Create a meshgrid to get all the coordinates of the pixels in the block. + cols, rows = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end)) + cols = cols.flatten() + rows = rows.flatten() + # Cut the data to select only the events that have signal in the block. + data_fit, active_mask = _cut_data(data, ncols, cols, rows, event_sum) + # Check on the data: if there are no good events or no active pixels, + # we cannot perform the fit, so we return None. + if data_fit.nnz == 0 or data_fit.shape[1] == 0: + return None + # Fit the data of the block. + result = _likelihood_fit(data_fit, conv_factor, pdf, pdf_derivative) + # If the fit is not successful, return None and continue with the next block. + if result is None: + return None + # If the fit is successful, return the gain values and their errors and the + # coordinates of the active pixels. + return result[0], result[1], rows[active_mask], cols[active_mask] + + class CalibrateGain(CalibrateBase): """Calibrate the gain of the detector by analyzing the events in a DigiFile. This class takes a @@ -630,10 +755,10 @@ def analyze_cluster(self, cluster: Cluster) -> None: rows = cluster.row # Update the arrays for the least squares fit. self._pha.extend(cluster.pha) + ncols = self.cal_matrix.shape[1] for col, row in zip(cols, rows): # Calculate the index of the pixel in the flattened array - i = row * self.cal_matrix.shape[1] + col - self._coords.append(i) + self._coords.append(row * ncols + col) self._event_rows.append(self._event_count) # Update the matrix with the number of events for each pixel self.cal_matrix.entries[row, col] += 1 @@ -641,89 +766,6 @@ def analyze_cluster(self, cluster: Cluster) -> None: self._event_count += 1 self.cal_matrix.num_events += 1 - def _cut_data(self, data: csr_matrix, cols: np.ndarray, rows: np.ndarray, - event_sum: np.ndarray) -> Tuple[csr_matrix, np.ndarray]: - """Cut the data to select only the events that have all the charge contained - in the pixels of the subregion defined by the input column and row coordinates. - This is done to ensure that the fit is performed only on events that are fully - contained in the subregion. - - Arguments - --------- - data : csr_matrix - The sparse matrix containing the pha values for the events in the entire detector. - cols : np.ndarray - The column coordinates of the pixels in the subregion. - rows : np.ndarray - The row coordinates of the pixels in the subregion. - event_sum : np.ndarray - The total ADC count for each event. - - Returns - ------- - data_active_pixels : csr_matrix - The sparse matrix containing the pha values for the events in the subregion, with - only the active pixels. - mask : np.ndarray - The boolean mask indicating which pixels in the subregion are active. - """ - # Calculate the index of the pixels in the flattened array to select the - # correct columns of the sparse matrix. - pixels_idxs = (rows * self.cal_matrix.shape[1] + cols).astype(int) - # Select all the events but only the columns corresponding to the pixels - # in the subregion defined by the indices. - data_region = data[:, pixels_idxs] - # Select only the events in the subregion where all the charge is contained - # in the pixels of the subregion. Add also another cut to select only the - # events with total charge above 0 ADC, just to be sure to remove empty events. - sub_region_event_sum = np.array(data_region.sum(axis=1)).flatten() - mask = (sub_region_event_sum == event_sum) & (sub_region_event_sum > 0) - data_good_events = data_region[mask] - # We want to remove the pixels in the subregion that never got hit, since - # only pixels with signal can be fitted. This operation ensures that the - # number of fit parameters is equal to the number of active pixels. - pixel_sum = np.array(data_good_events.sum(axis=0)).flatten() - active_mask = pixel_sum > 0 - data_active_pixels = data_good_events[:, active_mask] - return data_active_pixels, active_mask - - def _likelihood_fit(self, data: csr_matrix, conv_factor: float - ) -> Optional[Tuple[np.ndarray, np.ndarray]]: - """Perform the likelihood fit for a region of the detector. - - Returns - ------- - gain : np.ndarray - The gain values for the pixels in the region, calculated from the fit. - error : np.ndarray - The error on the gain values for the pixels in the region, calculated from the fit. - """ - # Define the negative log-likelihood function for the fit. - def nll(pars): - total_adc = data @ pars - p = self._pdf(total_adc * conv_factor) - return -np.sum(np.log(p + 1e-10)) - # Define the gradient of the log-likelihood function for the fit. - def nll_grad(pars): - total_adc = data @ pars - p = self._pdf(total_adc * conv_factor) - dp = self._pdf_derivative(total_adc * conv_factor) - grad = -data.T @ (dp / (p + 1e-10)) * conv_factor - return np.asarray(grad).flatten() - # Define the initial parameters for the fit. - init_pars = np.ones(data.shape[1]) - # Initialize the Minuit minimizer. - m = Minuit(nll, init_pars, grad=nll_grad) - m.limits = [(1e-10, None) for _ in range(len(init_pars))] - m.errordef = Minuit.LIKELIHOOD - m.migrad() - # If the fit is successful, return the gain values and their errors. - if m.valid: - gain = 1 / np.array(m.values) - error = m.errors / np.array(m.values)**2 - return gain, error - return None - def fit(self) -> CalibrationMatrix: """Fit the collected events to determine the gain of each pixel. @@ -737,14 +779,23 @@ def fit(self) -> CalibrationMatrix: # pha values of the pixels that are hit in the event. nrows, ncols = self.cal_matrix.shape shape = (self._event_count, nrows * ncols) - data = csr_matrix((self._pha, (self._event_rows, self._coords)), shape=shape) + # Convert the lists to numpy array of int16 to save memory. + data_pha = np.array(self._pha, dtype=np.int16) + data_coords = np.array(self._coords, dtype=np.int32) + data_event_rows = np.array(self._event_rows, dtype=np.int32) + # Empty the lists to free memory. + self._pha = [] + self._coords = [] + self._event_rows = [] + # Create the sparse matrix with the collected data. + data_csr = csr_matrix((data_pha, (data_event_rows, data_coords)), shape=shape) # Calculate the sum of the ADC counts in each event, which is used for the data # cuts in the fit, and to calculate the mean ADC count in an event. - event_sum = np.array(data.sum(axis=1)).flatten() + event_sum = data_csr.sum(axis=1).A1 # Calculate the mean of the ADC counts in an event. conv_factor = self._pdf.mean() / event_sum.mean() # Define the size of the blocks to fit contemporarily. - size = 10 + size = 6 # Calculate the starting column and row indices for the blocks. We are using an # overlap of 2 pixels between the blocks to ensure that pixels on the edges are # correctly calibrated. @@ -756,35 +807,33 @@ def fit(self) -> CalibrationMatrix: # results, since some pixels are fitted multiple times due to the overlap. weighted_gains = np.zeros((nrows, ncols)) sum_weights = np.zeros((nrows, ncols)) - # Start the loop over the blocks - pbar = tqdm(block_indices, desc="Fitting chip subregions", miniters=5) - for r_start, c_start in pbar: - # Calculate the end row and column indices, taking into account the possibility - # of being at the end of the chip. - r_end = min(r_start + size, nrows) - c_end = min(c_start + size, ncols) - # Create a meshgrid to get all the coordinates of the pixels in the block. - cols, rows = np.meshgrid(np.arange(c_start, c_end), np.arange(r_start, r_end)) - cols = cols.flatten() - rows = rows.flatten() - # Cut the data to select only the events that have signal in the block. - block_data, mask = self._cut_data(data, cols, rows, event_sum) - # Check on the data: if there are no good events or no active pixels, - # we cannot perform the fit, so we return None. - if block_data.nnz == 0 or block_data.shape[1] == 0: - continue - # Fit the data for the block. - results = self._likelihood_fit(block_data, conv_factor) - # If the fit is not successful, continue to the next block. - if results is None: + # Convert the CSR matrix to CSC format to optimize column slicing during data cuts. + data_csc = data_csr.tocsc() + # Free the memory used by the CSR matrix and temporary arrays. + del data_csr, data_pha, data_coords, data_event_rows + gc.collect() + # Start the parallel processing of the blocks. + args = (size, nrows, ncols, data_csc, event_sum, conv_factor, + self._pdf, self._pdf_derivative) + bar_format = "{desc}: {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]" + results = Parallel(n_jobs=-1, backend="loky")( + delayed(worker_fit_block)( + r_start, c_start, *args) + for r_start, c_start in tqdm(block_indices, total=len(block_indices), + bar_format=bar_format) + ) + # Now we need to aggregate the results from the blocks. + for _result in results: + # If the block is empty or the fit is not successful, move to the next block. + if _result is None: continue - gain, error = results + # Unpack the results from the block fit. + gain, error, rows, cols = _result weights = 1 / (error**2 + 1e-10) # Use the mask of the active pixels to update the weighted gains and the sum # of the weights matrices for the pixels in the block that have been fitted. - np.add.at(weighted_gains, (rows[mask], cols[mask]), gain * weights) - np.add.at(sum_weights, (rows[mask], cols[mask]), weights) - pbar.close() + np.add.at(weighted_gains, (rows, cols), gain * weights) + np.add.at(sum_weights, (rows, cols), weights) # Calculate the final values and errors as the weighted average of the fit results. values = np.divide(weighted_gains, sum_weights, out=np.full_like(weighted_gains, np.nan), From 6b1eabfb9f9c26061e4ed2e8793282eaeeba8f6a Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Fri, 8 May 2026 15:52:37 +0200 Subject: [PATCH 13/30] Updating dependencies. --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 4ae94c0..726a8db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ dependencies = [ "aptapy>=0.18.0", "scikit-image", "iminuit", + "joblib", ] [project.optional-dependencies] From 8e9ec8f88e050989fef9fef4ab7a8ac54e683f17 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Fri, 8 May 2026 15:52:47 +0200 Subject: [PATCH 14/30] Linting. --- src/hexsample/calibration.py | 2 +- src/hexsample/cli.py | 2 +- src/hexsample/pdf.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index 4037495..ced1f5a 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -818,7 +818,7 @@ def fit(self) -> CalibrationMatrix: bar_format = "{desc}: {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]" results = Parallel(n_jobs=-1, backend="loky")( delayed(worker_fit_block)( - r_start, c_start, *args) + r_start, c_start, *args) for r_start, c_start in tqdm(block_indices, total=len(block_indices), bar_format=bar_format) ) diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py index d556f3a..9597541 100644 --- a/src/hexsample/cli.py +++ b/src/hexsample/cli.py @@ -161,7 +161,7 @@ def __init__(self) -> None: self.add_input_file(calibspec) self.add_logging_level(calibspec) calibspec.set_defaults(runner=pipeline.calibspec) - + # Inspect a calibration matrix? calibview = subparsers.add_parser("calibview", help="inspect a calibration matrix", diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py index 7a08cb3..02887fc 100644 --- a/src/hexsample/pdf.py +++ b/src/hexsample/pdf.py @@ -96,7 +96,7 @@ def from_file(cls, file_path: str) -> "SpectrumPDF": instance = cls() instance.pdf = CubicSpline.construct_fast(data["c"], data["x"], extrapolate=True) return instance - + def derivative(self, x: np.ndarray, order: int = 1) -> np.ndarray: """Evaluate the derivative of the PDF at the given points. From ad5909b04b0f3ccfff075ad53170c6992049f19c Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Fri, 8 May 2026 16:29:31 +0200 Subject: [PATCH 15/30] Minor. --- src/hexsample/calibration.py | 42 ++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index ced1f5a..69ba188 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -627,6 +627,15 @@ def nll_grad(pars): m.limits = [(1e-10, None) for _ in range(len(init_pars))] m.errordef = Minuit.LIKELIHOOD m.migrad() + # If the first fit is not successful, try to perform a second fit. We are not passing + # the gradient, because the fit should be more robust (but slower). + if not m.valid: + print("Retrying fit without gradient...") + m = Minuit(nll, init_pars) + m.limits = [(1e-10, None) for _ in range(len(init_pars))] + m.errordef = Minuit.LIKELIHOOD + m.migrad() + if m.valid: print("Fit successful without gradient.") # If the fit is successful, return the gain values and their errors, otherwise # return None. if m.valid: @@ -685,16 +694,29 @@ def _cut_data(data: csc_matrix, ncols: int, cols: np.ndarray, rows: np.ndarray, return data_active_pixels, active_mask -def worker_fit_block(r_start: int, c_start: int, size: int, nrows: int, ncols: int, - data: csc_matrix, event_sum: np.ndarray, conv_factor: float, - pdf: SpectrumPDF, pdf_derivative: callable - ) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]: +def _fit_block( + r_start: int, c_start: int, size: int, nrows: int, ncols: int, data: csc_matrix, + event_sum: np.ndarray, conv_factor: float, pdf: SpectrumPDF, pdf_derivative: callable + ) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]]: """Method to perform the fit for a block of pixels in the detector. This method is meant to be used as a function to be executed in parallel for different blocks of pixels. .. note:: This function must be defined outside the class to be picklable and usable in parallel execution. + + Returns + ------- + gain : np.ndarray + The gain values for the pixels in the block, calculated from the fit. + error : np.ndarray + The error on the gain values for the pixels in the block, calculated from the fit. + rows : np.ndarray + The row coordinates of the active pixels in the block. + cols : np.ndarray + The column coordinates of the active pixels in the block. + num_events_per_pixel : np.ndarray + The number of events for each active pixel in the block. """ # Calculate the end row and column indices, taking into account the possibility # of being at the end of the chip. @@ -706,6 +728,7 @@ def worker_fit_block(r_start: int, c_start: int, size: int, nrows: int, ncols: i rows = rows.flatten() # Cut the data to select only the events that have signal in the block. data_fit, active_mask = _cut_data(data, ncols, cols, rows, event_sum) + num_events_per_pixel = np.asarray((data_fit > 0).sum(axis=0)).flatten() # Check on the data: if there are no good events or no active pixels, # we cannot perform the fit, so we return None. if data_fit.nnz == 0 or data_fit.shape[1] == 0: @@ -717,7 +740,7 @@ def worker_fit_block(r_start: int, c_start: int, size: int, nrows: int, ncols: i return None # If the fit is successful, return the gain values and their errors and the # coordinates of the active pixels. - return result[0], result[1], rows[active_mask], cols[active_mask] + return result[0], result[1], rows[active_mask], cols[active_mask], num_events_per_pixel class CalibrateGain(CalibrateBase): @@ -760,8 +783,6 @@ def analyze_cluster(self, cluster: Cluster) -> None: # Calculate the index of the pixel in the flattened array self._coords.append(row * ncols + col) self._event_rows.append(self._event_count) - # Update the matrix with the number of events for each pixel - self.cal_matrix.entries[row, col] += 1 # Update the event count self._event_count += 1 self.cal_matrix.num_events += 1 @@ -807,6 +828,7 @@ def fit(self) -> CalibrationMatrix: # results, since some pixels are fitted multiple times due to the overlap. weighted_gains = np.zeros((nrows, ncols)) sum_weights = np.zeros((nrows, ncols)) + entries = np.zeros((nrows, ncols), dtype=int) # Convert the CSR matrix to CSC format to optimize column slicing during data cuts. data_csc = data_csr.tocsc() # Free the memory used by the CSR matrix and temporary arrays. @@ -817,7 +839,7 @@ def fit(self) -> CalibrationMatrix: self._pdf, self._pdf_derivative) bar_format = "{desc}: {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]" results = Parallel(n_jobs=-1, backend="loky")( - delayed(worker_fit_block)( + delayed(_fit_block)( r_start, c_start, *args) for r_start, c_start in tqdm(block_indices, total=len(block_indices), bar_format=bar_format) @@ -828,12 +850,13 @@ def fit(self) -> CalibrationMatrix: if _result is None: continue # Unpack the results from the block fit. - gain, error, rows, cols = _result + gain, error, rows, cols, num_events_per_pixel = _result weights = 1 / (error**2 + 1e-10) # Use the mask of the active pixels to update the weighted gains and the sum # of the weights matrices for the pixels in the block that have been fitted. np.add.at(weighted_gains, (rows, cols), gain * weights) np.add.at(sum_weights, (rows, cols), weights) + np.maximum.at(entries, (rows, cols), num_events_per_pixel) # Calculate the final values and errors as the weighted average of the fit results. values = np.divide(weighted_gains, sum_weights, out=np.full_like(weighted_gains, np.nan), @@ -844,6 +867,7 @@ def fit(self) -> CalibrationMatrix: # Write the results back to the calibration matrix. self.cal_matrix.values = values self.cal_matrix.errors = errors + self.cal_matrix.entries = entries return self.cal_matrix From 018175aa70d503b60d9f3b981d29476dbaa30cf1 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Fri, 8 May 2026 16:47:33 +0200 Subject: [PATCH 16/30] Minor. --- src/hexsample/calibration.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index 69ba188..95a17cd 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -296,6 +296,7 @@ def from_hdf5(cls, file_path: str) -> "CalibrationMatrix": attrs = dict(h5file.attrs) # Instantiate the object with the attributes loaded from the header. obj = cls(num_cols=attrs["num_cols"], num_rows=attrs["num_rows"]) + obj.num_events = attrs.get(CalibrationMetadata.NUM_EVENTS.value) for key, val in attrs.items(): obj._metadata[key] = val # Load the matrix and the hits matrices from the HDF5 file. @@ -630,12 +631,10 @@ def nll_grad(pars): # If the first fit is not successful, try to perform a second fit. We are not passing # the gradient, because the fit should be more robust (but slower). if not m.valid: - print("Retrying fit without gradient...") m = Minuit(nll, init_pars) m.limits = [(1e-10, None) for _ in range(len(init_pars))] m.errordef = Minuit.LIKELIHOOD m.migrad() - if m.valid: print("Fit successful without gradient.") # If the fit is successful, return the gain values and their errors, otherwise # return None. if m.valid: From 3417ebf0d6ad0d2a688165f45e453607f1e136ed Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Sun, 10 May 2026 14:51:31 +0200 Subject: [PATCH 17/30] Refactoring gain matrix to equalization matrix. --- src/hexsample/caldb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hexsample/caldb.py b/src/hexsample/caldb.py index 638d1ea..bf18e5d 100644 --- a/src/hexsample/caldb.py +++ b/src/hexsample/caldb.py @@ -60,7 +60,7 @@ def open_noise(cls, designator: str) -> CalibrationMatrix: return cls._open(CalibrationType.NOISE, designator) @classmethod - def open_gain(cls, designator: str) -> CalibrationMatrix: - """Open the gain calibration file for the given designation. + def open_equalization(cls, designator: str) -> CalibrationMatrix: + """Open the pixel equalization calibration file for the given designation. """ - return cls._open(CalibrationType.GAIN, designator) + return cls._open(CalibrationType.EQUALIZATION, designator) From 1b140084e7eede56f65af04911b90bc29f79f370 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Sun, 10 May 2026 15:13:55 +0200 Subject: [PATCH 18/30] Added new EQUALIZATION calibration type. --- src/hexsample/caldb.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/hexsample/caldb.py b/src/hexsample/caldb.py index bf18e5d..fb509ab 100644 --- a/src/hexsample/caldb.py +++ b/src/hexsample/caldb.py @@ -59,8 +59,14 @@ def open_noise(cls, designator: str) -> CalibrationMatrix: """ return cls._open(CalibrationType.NOISE, designator) + @classmethod + def open_gain(cls, designator: str) -> CalibrationMatrix: + """Open the gain calibration file for the given designation. + """ + return cls._open(CalibrationType.GAIN, designator) + @classmethod def open_equalization(cls, designator: str) -> CalibrationMatrix: - """Open the pixel equalization calibration file for the given designation. + """Open the equalization calibration file for the given designation. """ return cls._open(CalibrationType.EQUALIZATION, designator) From 57efdc394ee3e67e7bbf8ee426b8b87801bfbe24 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Sun, 10 May 2026 15:51:58 +0200 Subject: [PATCH 19/30] Added ADC counts column in Recon files. --- src/hexsample/fileio.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/hexsample/fileio.py b/src/hexsample/fileio.py index 677e500..cb2c032 100644 --- a/src/hexsample/fileio.py +++ b/src/hexsample/fileio.py @@ -226,9 +226,10 @@ class ReconDescription(tables.IsDescription): livetime = tables.Int32Col(pos=2) roi_size = tables.Int32Col(pos=3) cluster_size = tables.Int8Col(pos=4) - energy = tables.Float32Col(pos=5) - posx = tables.Float32Col(pos=6) - posy = tables.Float32Col(pos=7) + adc = tables.Int32Col(pos=5) + energy = tables.Float32Col(pos=6) + posx = tables.Float32Col(pos=7) + posy = tables.Float32Col(pos=8) def _fill_recon_row(row: tables.tableextension.Row, event: ReconEvent) -> None: """Helper function to fill an output table row, given a ReconEvent object. @@ -244,6 +245,7 @@ def _fill_recon_row(row: tables.tableextension.Row, event: ReconEvent) -> None: row["livetime"] = event.livetime #row["roi_size"] = event.roi_size row["cluster_size"] = event.cluster.size() + row["adc"] = event.adc() row["energy"] = event.energy() row["posx"], row["posy"] = event.position() row.append() From 0a002dac86abfe9f72f5eee3f8d54a2bc8d6ba55 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Sun, 10 May 2026 15:52:59 +0200 Subject: [PATCH 20/30] Added adc method. --- src/hexsample/recon.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/hexsample/recon.py b/src/hexsample/recon.py index 8b39d7e..1117036 100644 --- a/src/hexsample/recon.py +++ b/src/hexsample/recon.py @@ -23,12 +23,8 @@ from dataclasses import dataclass from typing import Tuple -import xraydb - from .clustering import Cluster -DEFAULT_IONIZATION_POTENTIAL = xraydb.ionization_potential("Si") - @dataclass class ReconEventBase: @@ -54,15 +50,15 @@ class ReconEventBase: livetime: int cluster: Cluster - def energy(self, ionization_potential: float = DEFAULT_IONIZATION_POTENTIAL) -> float: - """Return the energy of the event in eV. + def adc(self) -> int: + """Return the total ADC count for the event. + """ + return self.cluster.pulse_height() - .. warning:: - This is currently using the ionization energy of Silicon to do the - conversion, assuming a detector gain of 1. We will need to do some - bookkeeping, here, to make this work reliably. + def energy(self) -> float: + """Return the energy of the event in eV. """ - return ionization_potential * self.cluster.pulse_height() + return self.cluster.energy() def position(self) -> Tuple[float, float]: """Return the reconstructed position of the event. @@ -100,15 +96,15 @@ class ReconEvent: #roi_size: int cluster: Cluster - def energy(self, ionization_potential: float = DEFAULT_IONIZATION_POTENTIAL) -> float: - """Return the energy of the event in eV. + def adc(self) -> int: + """Return the total ADC counts for the event. + """ + return self.cluster.pulse_height() - .. warning:: - This is currently using the ionization energy of Silicon to do the - conversion, assuming a detector gain of 1. We will need to do some - bookkeeping, here, to make this work reliably. + def energy(self) -> float: + """Return the energy of the event in eV. """ - return ionization_potential * self.cluster.pulse_height() + return self.cluster.energy() def position(self) -> Tuple[float, float]: """Return the reconstructed position of the event. From b9c044353c11dde320548cad9f39cc1a3c505c58 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Mon, 11 May 2026 09:58:22 +0200 Subject: [PATCH 21/30] Refactoring with equaliaztion calibration matrix. --- src/hexsample/calibration.py | 142 ++++++++++++++++++++++++----------- src/hexsample/cli.py | 46 ++++++++++-- src/hexsample/clustering.py | 8 +- src/hexsample/pipeline.py | 31 +++++--- src/hexsample/tasks.py | 107 +++++++++++++++++++------- 5 files changed, 248 insertions(+), 86 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index 95a17cd..3098515 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -28,6 +28,7 @@ import h5py import numpy as np +import xraydb from aptapy.hist import Histogram3d from iminuit import Minuit from joblib import Parallel, delayed @@ -48,6 +49,7 @@ class CalibrationType(str, Enum): PEDESTAL = "pedestal" NOISE = "noise" GAIN = "gain" + EQUALIZATION = "equalization" @classmethod def values(cls) -> Tuple[str, ...]: @@ -72,6 +74,9 @@ class CalibrationMetadata(str, Enum): VERSION = "version" CALIBRATION_TYPE = "calibration_type" IS_SYNTHETIC = "is_synthetic" + # This is to convert ADC counts to energy and it's stored only in the + # equalization matrix, and is measured in eV/ADC count. + ADC_TO_EV = "adc_to_ev" class CalibrationUnits(str, Enum): @@ -82,14 +87,16 @@ class CalibrationUnits(str, Enum): ENC = "Electrons" NOISE = "ADC counts" PEDESTAL = "ADC counts" - GAIN = "" + GAIN = "ADC counts / electron" + EQUALIZATION = "" CALIBRATION_UNITS = { CalibrationType.ENC: CalibrationUnits.ENC, CalibrationType.NOISE: CalibrationUnits.NOISE, CalibrationType.PEDESTAL: CalibrationUnits.PEDESTAL, - CalibrationType.GAIN: CalibrationUnits.GAIN + CalibrationType.GAIN: CalibrationUnits.GAIN, + CalibrationType.EQUALIZATION: CalibrationUnits.EQUALIZATION } @@ -123,7 +130,8 @@ def __init__(self, num_cols: int, num_rows: int) -> None: self._errors = np.full(self._shape, np.nan) # Other useful information for the metadata self.num_events = 0 - self._metadata = { + self._cached = False + self._cached_metadata = { CalibrationMetadata.NUM_COLS: num_cols, CalibrationMetadata.NUM_ROWS: num_rows } @@ -187,6 +195,8 @@ def errors(self, new_error: np.ndarray) -> None: def metadata(self) -> dict: """Return the metadata of the calibration matrix. """ + if self._cached: + return self._cached_metadata mask = self._entries > 0 # If there are no pixels with events, we can set the average, minimum and maximum number of # events to zero. @@ -197,12 +207,13 @@ def metadata(self) -> dict: else: entries_avg = entries_min = entries_max = 0 # Setting the metadata values. - self._metadata[CalibrationMetadata.NUM_EVENTS] = self.num_events - self._metadata[CalibrationMetadata.ENTRIES_AVG] = entries_avg - self._metadata[CalibrationMetadata.ENTRIES_MIN] = entries_min - self._metadata[CalibrationMetadata.ENTRIES_MAX] = entries_max - self._metadata[CalibrationMetadata.NUM_CALIBRATED_PIXELS] = int(mask.sum()) - return self._metadata + self._cached_metadata[CalibrationMetadata.NUM_EVENTS] = self.num_events + self._cached_metadata[CalibrationMetadata.ENTRIES_AVG] = entries_avg + self._cached_metadata[CalibrationMetadata.ENTRIES_MIN] = entries_min + self._cached_metadata[CalibrationMetadata.ENTRIES_MAX] = entries_max + self._cached_metadata[CalibrationMetadata.NUM_CALIBRATED_PIXELS] = int(mask.sum()) + self._cached = True + return self._cached_metadata def set_value(self, value: float) -> None: """Set a value for all the pixels in the calibration matrix. @@ -269,9 +280,9 @@ def to_hdf5(self, file_path: str, calibration_type: CalibrationType, is_syntheti h5file.create_dataset(self.ERRORS, data=self.errors, dtype=np.float32, **compression_pars) # Update the header with the relevant information and metadata. - self._metadata[CalibrationMetadata.CALIBRATION_TYPE] = calibration_type.value - self._metadata[CalibrationMetadata.IS_SYNTHETIC] = is_synthetic - self._metadata[CalibrationMetadata.FILE_NAME] = pathlib.Path(file_path).stem + self._cached_metadata[CalibrationMetadata.CALIBRATION_TYPE] = calibration_type.value + self._cached_metadata[CalibrationMetadata.IS_SYNTHETIC] = is_synthetic + self._cached_metadata[CalibrationMetadata.FILE_NAME] = pathlib.Path(file_path).stem for key, val in self.metadata.items(): h5file.attrs[key] = val return file_path @@ -298,7 +309,7 @@ def from_hdf5(cls, file_path: str) -> "CalibrationMatrix": obj = cls(num_cols=attrs["num_cols"], num_rows=attrs["num_rows"]) obj.num_events = attrs.get(CalibrationMetadata.NUM_EVENTS.value) for key, val in attrs.items(): - obj._metadata[key] = val + obj._cached_metadata[key] = val # Load the matrix and the hits matrices from the HDF5 file. obj._values = h5file[obj.VALUES][:] obj._entries = h5file[obj.ENTRIES][:] @@ -320,10 +331,10 @@ def __call__(self, col: np.ndarray, row: np.ndarray) -> float: def __str__(self) -> str: """Return a string representation of the calibration matrix. """ - if CalibrationMetadata.FILE_NAME in self._metadata: - return self._metadata[CalibrationMetadata.FILE_NAME] - return f"CalibrationMatrix(num_cols={self._metadata[CalibrationMetadata.NUM_COLS]}, " \ - f"num_rows={self._metadata[CalibrationMetadata.NUM_ROWS]})" + if CalibrationMetadata.FILE_NAME in self._cached_metadata: + return self._cached_metadata[CalibrationMetadata.FILE_NAME] + return f"CalibrationMatrix(num_cols={self._cached_metadata[CalibrationMetadata.NUM_COLS]}, " \ + f"num_rows={self._cached_metadata[CalibrationMetadata.NUM_ROWS]})" class CalibrateBase: @@ -598,16 +609,22 @@ def fit(self) -> Tuple[CalibrationMatrix, CalibrationMatrix]: return self.noise_cal, self.pedestal_cal +# The following methods are used for the equalization matrix calibration. They cannot be moved +# inside the class because they are used as functions to be executed in parallel, and they +# need to be defined at the top level of the module to be picklable by joblib. + + def _likelihood_fit(data: csr_matrix, conv_factor: float, pdf: SpectrumPDF, pdf_derivative: callable) -> Optional[Tuple[np.ndarray, np.ndarray]]: """Perform the likelihood fit for a region of the detector. Returns ------- - gain : np.ndarray - The gain values for the pixels in the region, calculated from the fit. + equalization : np.ndarray + The equalization values for the pixels in the region, calculated from the fit. error : np.ndarray - The error on the gain values for the pixels in the region, calculated from the fit. + The error on the equalization values for the pixels in the region, calculated + from the fit. """ # Define the negative log-likelihood function for the fit. def nll(pars): @@ -635,12 +652,12 @@ def nll_grad(pars): m.limits = [(1e-10, None) for _ in range(len(init_pars))] m.errordef = Minuit.LIKELIHOOD m.migrad() - # If the fit is successful, return the gain values and their errors, otherwise - # return None. + # If the fit is successful, return the pixel equalization values and their errors, + # otherwise return None. if m.valid: - gain = 1 / np.array(m.values) + equalization = 1 / np.array(m.values) error = m.errors / np.array(m.values)**2 - return gain, error + return equalization, error return None @@ -737,25 +754,28 @@ def _fit_block( # If the fit is not successful, return None and continue with the next block. if result is None: return None - # If the fit is successful, return the gain values and their errors and the + # If the fit is successful, return the equalization values and their errors and the # coordinates of the active pixels. return result[0], result[1], rows[active_mask], cols[active_mask], num_events_per_pixel -class CalibrateGain(CalibrateBase): +class CalibrateEqualization(CalibrateBase): - """Calibrate the gain of the detector by analyzing the events in a DigiFile. This class takes a - CalibrationMatrix object as input, and updates the matrix with the data from the file. - - At the moment, the dataset used for this task should contain events from a monochromatic source - and with known energy. + """Calculate the equalization matrix for the detector readout. This matrix is used + to convert the PHA values to Pulse Invariant (PI). By the definition, the mean value + of the matrix is 1.0. + The calibration is peformed by analyzing events in a DigiFile. + Arguments --------- - cal_matrix : CalibrationMatrix - Calibration matrix to be updated with the gain values calculated from the data. - energy : float - The energy of the monochromatic source, in eV. + num_cols : int + The number of columns of the detector readout chip. + num_rows : int + The number of rows of the detector readout chip. + pdf : SpectrumPDF + The probability density function of the spectrum of the events in the dataset, which is + used for the likelihood fit to calculate the equalization values for the pixels. """ def __init__(self, num_cols: int, num_rows: int, pdf: SpectrumPDF) -> None: @@ -786,9 +806,18 @@ def analyze_cluster(self, cluster: Cluster) -> None: self._event_count += 1 self.cal_matrix.num_events += 1 - def fit(self) -> CalibrationMatrix: + def fit(self, size: int) -> CalibrationMatrix: """Fit the collected events to determine the gain of each pixel. + Arguments + --------- + size : int + The length of the square chip region to be fitted simultaneously. The optimal + value for this parameter is a trade-off between the number of active pixels in + the chip and the computational time of the fit, which increases significantly + with the number of pixels. For small active regions, a size of 6 can be a good + choice, while for the full chip calibration, 10 is a good value. + Returns ------- cal_matrix : CalibrationMatrix @@ -812,10 +841,8 @@ def fit(self) -> CalibrationMatrix: # Calculate the sum of the ADC counts in each event, which is used for the data # cuts in the fit, and to calculate the mean ADC count in an event. event_sum = data_csr.sum(axis=1).A1 - # Calculate the mean of the ADC counts in an event. - conv_factor = self._pdf.mean() / event_sum.mean() - # Define the size of the blocks to fit contemporarily. - size = 6 + # Calculate the conversion factor from ADC to eV. + adc_to_ev = self._pdf.mean() / event_sum.mean() # Calculate the starting column and row indices for the blocks. We are using an # overlap of 2 pixels between the blocks to ensure that pixels on the edges are # correctly calibrated. @@ -834,7 +861,7 @@ def fit(self) -> CalibrationMatrix: del data_csr, data_pha, data_coords, data_event_rows gc.collect() # Start the parallel processing of the blocks. - args = (size, nrows, ncols, data_csc, event_sum, conv_factor, + args = (size, nrows, ncols, data_csc, event_sum, adc_to_ev, self._pdf, self._pdf_derivative) bar_format = "{desc}: {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]" results = Parallel(n_jobs=-1, backend="loky")( @@ -867,10 +894,41 @@ def fit(self) -> CalibrationMatrix: self.cal_matrix.values = values self.cal_matrix.errors = errors self.cal_matrix.entries = entries + self.cal_matrix._cached_metadata[CalibrationMetadata.ADC_TO_EV] = adc_to_ev return self.cal_matrix +class CalibrateGain: + + """Class for calibrating the gain of the readout chip. + + This class provides methods to calculate the gain values based on the equalization + matrix and sensor material properties. + """ + + def __init__(self, equalization_matrix: CalibrationMatrix, material_symbol: str) -> None: + """Class constructor. + """ + num_rows, num_cols = equalization_matrix.shape + self.cal_matrix = CalibrationMatrix(num_cols, num_rows) + self.equalization_matrix = equalization_matrix + self.material_symbol = material_symbol + + def fit(self) -> CalibrationMatrix: + """Calculate the gain values based on the equalization matrix and the sensor material + properties, and update the calibration matrix with the calculated values. + """ + ionization_potential = xraydb.ionization_potential(self.material_symbol) + conv_factor = self.equalization_matrix.metadata[CalibrationMetadata.ADC_TO_EV] / ionization_potential + self.cal_matrix.values = self.equalization_matrix.values / conv_factor + self.cal_matrix.errors = self.equalization_matrix.errors / conv_factor + self.cal_matrix.entries = self.equalization_matrix.entries + self.cal_matrix.num_events = self.equalization_matrix.num_events + return self.cal_matrix + + class CalibrateENC: + """Class for calibrating the equivalent noise charge (ENC) of the readout chip. This class provides methods to calculate the ENC values based on the noise and gain @@ -878,7 +936,7 @@ class CalibrateENC: """ def __init__(self, noise_matrix: CalibrationMatrix, gain_matrix: CalibrationMatrix) -> None: - """Class constructor + """Class constructor. """ if noise_matrix.shape != gain_matrix.shape: raise ValueError("Noise and gain matrices must have the same shape.") diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py index 9597541..10c157d 100644 --- a/src/hexsample/cli.py +++ b/src/hexsample/cli.py @@ -130,6 +130,13 @@ def __init__(self) -> None: self.add_enc_calibration_options(enc) self.add_logging_level(enc) enc.set_defaults(runner=pipeline.calibrate_enc) + # Pixel equalization calibration + equalization = calibrate_subparsers.add_parser("equalization", + help="calibrate the chip pixel equalization") + self.add_input_file(equalization) + self.add_calibrate_equalization_options(equalization) + self.add_logging_level(equalization) + equalization.set_defaults(runner=pipeline.calibrate_equalization) # Eta function calibration eta = calibrate_subparsers.add_parser("eta", help="calibrate the eta function") self.add_input_file(eta) @@ -138,8 +145,7 @@ def __init__(self) -> None: eta.set_defaults(runner=pipeline.calibrate_eta) # Gain calibration gain = calibrate_subparsers.add_parser("gain", help="calibrate the chip gain") - self.add_input_file(gain) - self.add_calibrate_gain_options(gain) + self.add_gain_calibration_options(gain) self.add_logging_level(gain) gain.set_defaults(runner=pipeline.calibrate_gain) # Noise calibration @@ -289,6 +295,16 @@ def add_cal_pedestal_file(parser: argparse.ArgumentParser, default: str, "calibration data or name of a calibration file inside the " \ "caldb/pedestal folder.") + @staticmethod + def add_cal_equalization_file(parser: argparse.ArgumentParser, default: str, + required: bool = False) -> None: + """Add an option for the equalization calibration file. + """ + parser.add_argument("--equalization", type=caldb.CalDB.open_equalization, default=default, + required=required, help="path to a file containing the equalization " \ + "calibration data or name of a calibration file inside the " \ + "caldb/equalization folder.") + @staticmethod def add_cal_gain_file(parser: argparse.ArgumentParser, default: str, required: bool = False) -> None: @@ -409,7 +425,8 @@ def add_recon_options(self, parser: argparse.ArgumentParser) -> None: type=str, default="centroid", help="How to reconstruct position") CliArgumentParser.add_cal_noise_file(group, default=None, required=True) CliArgumentParser.add_cal_pedestal_file(group, default=None, required=True) - CliArgumentParser.add_cal_gain_file(group, default=None, required=True) + # CliArgumentParser.add_cal_gain_file(group, default=None, required=True) + CliArgumentParser.add_cal_equalization_file(group, default=None, required=True) group.add_argument("--eta_2pix_rad_sigma", default=defaults.eta_2pix_rad_sigma, type=float, help="probit function sigma parameter for two pixel" \ "events eta reconstruction") @@ -440,12 +457,14 @@ def add_calibrate_dark_options(self, parser: argparse.ArgumentParser) -> None: help="number of events to be analyzed in a batch for the dark" \ " calibration") - def add_calibrate_gain_options(self, parser: argparse.ArgumentParser) -> None: + def add_calibrate_equalization_options(self, parser: argparse.ArgumentParser) -> None: """Add an option group for the gain calibration properties. """ - defaults = tasks.CalibrationGainDefaults + defaults = tasks.CalibrationEqualizationDefaults parser.add_argument("pdf", type=pdf.SpectrumPDF.from_file, help="path to the spectrum PDF file") + parser.add_argument("--size", type=int, default=defaults.size, + help="length of the square region of the chip to fit simultaneously") CliArgumentParser.add_cal_noise_file(parser, default=None, required=True) CliArgumentParser.add_cal_pedestal_file(parser, default=None, required=True) CliArgumentParser.add_zero_sup_threshold(parser, @@ -456,7 +475,7 @@ def add_calibrate_eta_options(self, parser: argparse.ArgumentParser) -> None: """ CliArgumentParser.add_cal_noise_file(parser, default=None, required=True) CliArgumentParser.add_cal_pedestal_file(parser, default=None, required=True) - CliArgumentParser.add_cal_gain_file(parser, default=None, required=True) + CliArgumentParser.add_cal_equalization_file(parser, default=None, required=True) parser.add_argument("--num_bins", type=int, default=tasks.CalibrationEtaDefaults.num_bins, help="number of bins to be used in the eta function calibration") @@ -474,6 +493,19 @@ def add_enc_calibration_options(self, parser: argparse.ArgumentParser) -> None: help="directory where the generated ENC calibration file" \ " will be saved") + def add_gain_calibration_options(self, parser: argparse.ArgumentParser) -> None: + """Add an option group for the gain calibration properties. + """ + defaults = tasks.CalibrationGainDefaults + CliArgumentParser.add_cal_equalization_file(parser, default=None, required=True) + parser.add_argument("--material_symbol", type=str, + choices=sensor.material_symbols(), + default=defaults.material_symbol, + help="active sensor material") + parser.add_argument("--output_dir", type=str, default=defaults.output_dir, + help="directory where the generated gain calibration file" \ + " will be saved") + def add_synthesize_calibration_file_options(self, parser: argparse.ArgumentParser) -> None: """Add an option group to generate calibration files. """ @@ -505,7 +537,7 @@ def add_display_options(self, parser: argparse.ArgumentParser) -> None: default = tasks.DisplayDefaults CliArgumentParser.add_cal_noise_file(parser, default=default.noise_matrix) CliArgumentParser.add_cal_pedestal_file(parser, default=default.pedestal_matrix) - CliArgumentParser.add_cal_gain_file(parser, default=default.gain_matrix) + CliArgumentParser.add_cal_equalization_file(parser, default=default.equalization_matrix) def add_calibview_options(self, parser: argparse.ArgumentParser) -> None: """Add an option group for the calibration view properties. diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py index 268d1e1..20f9517 100644 --- a/src/hexsample/clustering.py +++ b/src/hexsample/clustering.py @@ -43,6 +43,7 @@ class Cluster: col: np.ndarray row: np.ndarray pha: np.ndarray + adc_to_ev: float pos_recon_algorithm: str recon_pars: Optional[dict] = None @@ -62,6 +63,11 @@ def pulse_height(self) -> float: """ return self.pha.sum() + def energy(self) -> float: + """Return the energy of the cluster in eV. + """ + return self.pulse_height() * self.adc_to_ev + def centroid(self) -> Tuple[float, float]: """Return the cluster centroid. """ @@ -348,4 +354,4 @@ def run(self, event) -> Cluster: # Sort the arrays in decreasing order before applying the position suppression. pha, col, row = self.position_suppress(pha[mask], col[mask], row[mask]) x, y = self.readout.pixel_to_world(col, row) - return Cluster(x, y, col, row, pha, self.pos_recon_algorithm, self.recon_pars) + return Cluster(x, y, col, row, pha, readout.gain.metadata["adc_to_ev"], self.pos_recon_algorithm, self.recon_pars) diff --git a/src/hexsample/pipeline.py b/src/hexsample/pipeline.py index 3a5a11f..3e66a91 100644 --- a/src/hexsample/pipeline.py +++ b/src/hexsample/pipeline.py @@ -50,7 +50,7 @@ def reconstruct(**kwargs) -> str: input_file_path = kwargs["input_file"] noise_matrix = kwargs["noise"] pedestal_matrix = kwargs["pedestal"] - gain_matrix = kwargs["gain"] + equalization_matrix = kwargs["equalization"] suffix = kwargs.get("suffix", defaults.suffix) zero_sup_threshold = kwargs.get("zero_sup_threshold", defaults.zero_sup_threshold) num_neighbors = kwargs.get("num_neighbors", defaults.num_neighbors) @@ -64,7 +64,7 @@ def reconstruct(**kwargs) -> str: eta_3pix_theta_sigma = kwargs.get("eta_3pix_theta_sigma", defaults.eta_3pix_theta_sigma) recon_args = (eta_2pix_rad_sigma, eta_2pix_rad_pivot, eta_3pix_rad_offset, eta_3pix_rad_sigma, eta_3pix_rad_pivot, eta_3pix_theta_sigma) - args = input_file_path, noise_matrix, pedestal_matrix, gain_matrix, suffix, \ + args = input_file_path, noise_matrix, pedestal_matrix, equalization_matrix, suffix, \ zero_sup_threshold,num_neighbors, max_neighbors, pos_recon_algorithm, *recon_args return tasks.reconstruct(*args, kwargs) @@ -81,13 +81,13 @@ def calibrate_eta(**kwargs) -> None: """Calibrate the eta function using the events from a digi file. """ input_file_path = kwargs["input_file"] - gain_matrix = kwargs["gain"] + equalization_matrix = kwargs["equalization"] noise_matrix = kwargs["noise"] pedestal_matrix = kwargs["pedestal"] num_bins = kwargs.get("num_bins", tasks.CalibrationEtaDefaults.num_bins) zero_sup_threshold = kwargs.get("zero_sup_threshold", tasks.CalibrationEtaDefaults.zero_sup_threshold) - args = input_file_path, noise_matrix, pedestal_matrix, gain_matrix, num_bins, \ + args = input_file_path, noise_matrix, pedestal_matrix, equalization_matrix, num_bins, \ zero_sup_threshold return tasks.calibrate_eta(*args) @@ -119,16 +119,27 @@ def calibrate_enc(**kwargs) -> str: return tasks.calibrate_enc(*args) -def calibrate_gain(**kwargs) -> str: - """Calibrate the gain of the chip. +def calibrate_equalization(**kwargs) -> str: + """Calibrate the equalization of the chip. """ input_file_path = kwargs["input_file"] pdf = kwargs["pdf"] noise_matrix = kwargs["noise"] pedestal_matrix = kwargs["pedestal"] + size = kwargs.get("size", tasks.CalibrationEqualizationDefaults.size) zero_sup_threshold = kwargs.get("zero_sup_threshold", - tasks.CalibrationGainDefaults.zero_sup_threshold) - args = input_file_path, pdf, noise_matrix, pedestal_matrix, zero_sup_threshold + tasks.CalibrationEqualizationDefaults.zero_sup_threshold) + args = input_file_path, pdf, noise_matrix, pedestal_matrix, size, zero_sup_threshold + return tasks.calibrate_equalization(*args) + + +def calibrate_gain(**kwargs) -> str: + """Calibrate the gain of the chip. + """ + equalization_matrix = kwargs["equalization"] + material_symbol = kwargs.get("material_symbol", tasks.CalibrationGainDefaults.material_symbol) + output_dir = kwargs.get("output_dir", tasks.CalibrationGainDefaults.output_dir) + args = equalization_matrix, material_symbol, output_dir return tasks.calibrate_gain(*args) @@ -151,10 +162,10 @@ def display(**kwargs) -> None: """Display events from a digi or recon file. """ input_file_path = kwargs["input_file"] - gain_matrix = kwargs["gain"] + equalization_matrix = kwargs["equalization"] noise_matrix = kwargs["noise"] pedestal_matrix = kwargs["pedestal"] - args = input_file_path, noise_matrix, pedestal_matrix, gain_matrix + args = input_file_path, noise_matrix, pedestal_matrix, equalization_matrix return tasks.display(*args) diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index d8265d9..d3e34e1 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -34,9 +34,11 @@ from . import rng from .analysis import create_histogram from .calibration import ( + CalibrationMetadata, CALIBRATION_UNITS, CalibrateDark, CalibrateENC, + CalibrateEqualization, CalibrateGain, CalibrateNoise, CalibrationMatrix, @@ -234,7 +236,7 @@ def reconstruct( input_file_path: str, noise_matrix: CalibrationMatrix, pedestal_matrix: CalibrationMatrix, - gain_matrix: CalibrationMatrix, + equalization_matrix: CalibrationMatrix, suffix: str = ReconstructionDefaults.suffix, zero_sup_threshold: int = ReconstructionDefaults.zero_sup_threshold, num_neighbors: int = ReconstructionDefaults.num_neighbors, @@ -267,8 +269,8 @@ def reconstruct( pedestal_matrix : CalibrationMatrix The pedestal matrix to use for the reconstruction. - gain_matrix : CalibrationMatrix - The gain matrix to use for the reconstruction. + equalization_matrix : CalibrationMatrix + The equalization matrix to use for the reconstruction. suffix : str The suffix to append to the output file name. @@ -308,7 +310,7 @@ def reconstruct( input_file, header, readout_mode = open_file(input_file_path) # Creating the readout object. args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\ - header["pitch"], noise_matrix, gain_matrix, pedestal_matrix + header["pitch"], noise_matrix, equalization_matrix, pedestal_matrix readout = create_readout(readout_mode, header, *args) # Define the effective number of neighbors to be used for the clustering. If max_neighbors is # specified (i.e. different from -1), it has priority over num_neighbors. It is necessary to @@ -400,7 +402,7 @@ def calibrate_eta( input_file_path: str, noise_matrix: CalibrationMatrix, pedestal_matrix: CalibrationMatrix, - gain_matrix: CalibrationMatrix, + equalization_matrix: CalibrationMatrix, num_bins: int = CalibrationEtaDefaults.num_bins, zero_sup_threshold: int = CalibrationEtaDefaults.zero_sup_threshold ) -> None: @@ -417,8 +419,8 @@ def calibrate_eta( pedestal_matrix : CalibrationMatrix The pedestal calibration matrix to use for the analysis. - gain_matrix : CalibrationMatrix - The gain calibration matrix to use for the analysis. + equalization_matrix : CalibrationMatrix + The equalization calibration matrix to use for the analysis. num_bins : int The number of bins to be used in the calibration. @@ -428,7 +430,7 @@ def calibrate_eta( """ input_file, header, readout_mode = open_file(input_file_path) args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\ - header["pitch"], noise_matrix, gain_matrix, pedestal_matrix + header["pitch"], noise_matrix, equalization_matrix, pedestal_matrix readout = create_readout(readout_mode, header, *args) clustering = ClusteringNN(readout, zero_sup_threshold, num_neighbors=6, pos_recon_algorithm="centroid") @@ -542,6 +544,10 @@ def synthesize_calibration_file( # Generate the calibration matrix with the appropriate size and values calibration_matrix = CalibrationMatrix(num_cols, num_rows) rms = mean * percent_rms / 100 + if calibration_type == CalibrationType.EQUALIZATION: + calibration_matrix._cached_metadata[CalibrationMetadata.ADC_TO_EV] = mean + rms /= mean + mean = 1. logger.info(f"Generating {calibration_type.value} calibration matrix with " f"mean {mean:g} and RMS {rms:g}...") calibration_matrix.values = rng.generator.normal(mean, scale=rms, size=(num_rows, num_cols)) @@ -663,7 +669,7 @@ def calibrate_enc( logger.info("Calculating the ENC matrix...") enc_matrix = enc_calibration.fit() noise_file_name = noise_matrix.metadata["file_name"] - enc_file_name = noise_file_name.replace("_matrix_noise", "_matrix_enc.h5") + enc_file_name = noise_file_name.replace("noise", "enc") + ".h5" output_file_path = pathlib.Path(output_dir) / enc_file_name logger.info(f"Saving to {output_file_path}...") enc_matrix.to_hdf5(output_file_path, CalibrationType.ENC, False) @@ -672,24 +678,26 @@ def calibrate_enc( @dataclass(frozen=True) -class CalibrationGainDefaults: - """Default parameters for the gain calibration task. +class CalibrationEqualizationDefaults: + """Default parameters for the pixel equalization calibration task. This is a small helper dataclass to help ensure consistency between the main task definition in this Python module and the command-line interface. """ + size: int = 10 zero_sup_threshold: int = 20 -def calibrate_gain( +def calibrate_equalization( input_file_path: str, pdf: SpectrumPDF, noise_matrix: CalibrationMatrix, pedestal_matrix: CalibrationMatrix, - zero_sup_threshold: int = CalibrationGainDefaults.zero_sup_threshold + size: int = CalibrationEqualizationDefaults.size, + zero_sup_threshold: int = CalibrationEqualizationDefaults.zero_sup_threshold ) -> str: - """Calibrate gain of the readout chip using the events from a digi file. + """Calibrate pixel equalization of the readout chip using the events from a digi file. The results are stored as a matrix in a HDF5 file. Arguments @@ -705,21 +713,25 @@ def calibrate_gain( pedestal_matrix : CalibrationMatrix The pedestal matrix to use for the gain calibration. + + size : int + The length of the square region of the chip to fit simultaneously during the + calibration. zero_sup_threshold : int The zero-suppression threshold to use for the clustering in the gain calibration. """ # Open the input file and extract the readout information. input_file, header, readout_mode = open_file(input_file_path) - # Define the arguments to create the readout object with unit gain, necessary for the - # calibration. + # Define the arguments to create the readout object with uniform pixel equalization, + # necessary for the calibration. unit_gain_map = CalibrationMatrix(header["num_cols"], header["num_rows"]) unit_gain_map.set_value(1.) args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\ header["pitch"], noise_matrix, unit_gain_map, pedestal_matrix readout = create_readout(readout_mode, header, *args) # Initialize the gain matrix and run the calibration. - gain_calibration = CalibrateGain(header["num_cols"], header["num_rows"], pdf) + gain_calibration = CalibrateEqualization(header["num_cols"], header["num_rows"], pdf) clustering = ClusteringNN(readout, zero_sup_threshold=zero_sup_threshold, num_neighbors=6, pos_recon_algorithm="centroid") logger.info("Starting the event loop...") @@ -732,11 +744,53 @@ def calibrate_gain( continue gain_calibration.analyze_cluster(cluster) logger.info("Calculating the gain matrix...") + equalization_matrix = gain_calibration.fit(size) + output_file_path = input_file_path.replace(".h5", "_matrix_equalization.h5") + logger.info(f"Saving equalization matrix to {output_file_path}...") + equalization_matrix.to_hdf5(output_file_path, CalibrationType.EQUALIZATION, False) + input_file.close() + logger.info("Done!") + return output_file_path + + +@dataclass(frozen=True) +class CalibrationGainDefaults: + """Default parameters for the gain calibration task. + + This is a small helper dataclass to help ensure consistency between the main task + definition in this Python module and the command-line interface. + """ + + output_dir: Union[str, pathlib.Path] = HEXSAMPLE_DATA + material_symbol: str = "Si" + + +def calibrate_gain( + equalization_matrix: CalibrationMatrix, + material_symbol: str = CalibrationGainDefaults.material_symbol, + output_dir: Union[str, pathlib.Path] = CalibrationGainDefaults.output_dir + ) -> str: + """Calibrate the gain of the readout chip using the equalization matrix and the + ionization potential of the sensor material. The results are stored as a matrix + in a HDF5 file. + + Arguments + equalization_matrix : CalibrationMatrix + The equalization matrix to use for the gain calibration. + material_symbol : str + The symbol of the sensor material to use for the gain calibration, e.g. "Si" for + silicon. + """ + name, args = current_call(num_backward_steps=1) + logger.info(f"Running {__name__}.{name} with arguments {args}...") + gain_calibration = CalibrateGain(equalization_matrix, material_symbol) + logger.info("Calculating the gain matrix...") gain_matrix = gain_calibration.fit() - output_file_path = input_file_path.replace(".h5", "_matrix_gain.h5") - logger.info(f"Saving gain matrix to {output_file_path}...") + equalization_file_name = equalization_matrix.metadata["file_name"] + gain_file_name = equalization_file_name.replace("equalization", "gain") + ".h5" + output_file_path = pathlib.Path(output_dir) / gain_file_name + logger.info(f"Saving to {output_file_path}...") gain_matrix.to_hdf5(output_file_path, CalibrationType.GAIN, False) - input_file.close() logger.info("Done!") return output_file_path @@ -751,14 +805,14 @@ class DisplayDefaults: noise_matrix: Optional[CalibrationMatrix] = None pedestal_matrix: Optional[CalibrationMatrix] = None - gain_matrix: Optional[CalibrationMatrix] = None + equalization_matrix: Optional[CalibrationMatrix] = None def display( input_file_path: str, noise_matrix: Optional[CalibrationMatrix] = DisplayDefaults.noise_matrix, pedestal_matrix: Optional[CalibrationMatrix] = DisplayDefaults.pedestal_matrix, - gain_matrix: Optional[CalibrationMatrix] = DisplayDefaults.gain_matrix, + equalization_matrix: Optional[CalibrationMatrix] = DisplayDefaults.equalization_matrix, ) -> None: """Display events from a digi file. @@ -773,16 +827,17 @@ def display( pedestal_matrix : CalibrationMatrix, optional The pedestal calibration matrix to use for the display. - gain_matrix : CalibrationMatrix, optional - The gain calibration matrix to use for the display. + equalization_matrix : CalibrationMatrix, optional + The equalization calibration matrix to use for the display. """ # Open the input file and extract the header and the readout information. input_file, header, readout_mode = open_file(input_file_path) - cal_matrices = [noise_matrix, gain_matrix, pedestal_matrix] + cal_matrices = [noise_matrix, equalization_matrix, pedestal_matrix] missing = [matrix for matrix in cal_matrices if matrix is None] # Check if any of the calibration matrices is missing. if 0 < len(missing) < len(cal_matrices): - logger.warning(f"{len(missing)} calibration matrices are missing.") + logger.warning(f"{len(missing)} calibration matrices are missing to perform" \ + " event reconstruction.") # Initialize the correct type of event display based on the input matrices. grid_args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"], \ header["pitch"] From 091ba3f36aa80471913ea544e13d36502b0941a3 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Mon, 11 May 2026 10:09:45 +0200 Subject: [PATCH 22/30] Linting. --- src/hexsample/calibration.py | 36 ++++++++++++++++++++++++++++-------- src/hexsample/clustering.py | 3 ++- src/hexsample/recon.py | 1 + src/hexsample/tasks.py | 4 ++-- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index 3098515..ec1ccd1 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -215,6 +215,18 @@ def metadata(self) -> dict: self._cached = True return self._cached_metadata + def update_metadata(self, key: CalibrationMetadata, value) -> None: + """Update the metadata of the calibration matrix with a new key-value pair. + + Arguments + --------- + key : CalibrationMetadata + The key of the metadata to be updated. + value : + The value of the metadata to be updated. + """ + self._cached_metadata[key] = value + def set_value(self, value: float) -> None: """Set a value for all the pixels in the calibration matrix. @@ -333,8 +345,9 @@ def __str__(self) -> str: """ if CalibrationMetadata.FILE_NAME in self._cached_metadata: return self._cached_metadata[CalibrationMetadata.FILE_NAME] - return f"CalibrationMatrix(num_cols={self._cached_metadata[CalibrationMetadata.NUM_COLS]}, " \ - f"num_rows={self._cached_metadata[CalibrationMetadata.NUM_ROWS]})" + return f"CalibrationMatrix( " \ + f"num_cols={self._cached_metadata[CalibrationMetadata.NUM_COLS]}, " \ + f"num_rows={self._cached_metadata[CalibrationMetadata.NUM_ROWS]})" class CalibrateBase: @@ -894,7 +907,7 @@ def fit(self, size: int) -> CalibrationMatrix: self.cal_matrix.values = values self.cal_matrix.errors = errors self.cal_matrix.entries = entries - self.cal_matrix._cached_metadata[CalibrationMetadata.ADC_TO_EV] = adc_to_ev + self.cal_matrix.update_metadata(CalibrationMetadata.ADC_TO_EV, adc_to_ev) return self.cal_matrix @@ -913,19 +926,26 @@ def __init__(self, equalization_matrix: CalibrationMatrix, material_symbol: str) self.cal_matrix = CalibrationMatrix(num_cols, num_rows) self.equalization_matrix = equalization_matrix self.material_symbol = material_symbol - + def fit(self) -> CalibrationMatrix: """Calculate the gain values based on the equalization matrix and the sensor material properties, and update the calibration matrix with the calculated values. """ + # Get the ionization potential of the sensor material and the conversion factor + # from ADC to eV from the metadata of the equalization matrix. ionization_potential = xraydb.ionization_potential(self.material_symbol) - conv_factor = self.equalization_matrix.metadata[CalibrationMetadata.ADC_TO_EV] / ionization_potential - self.cal_matrix.values = self.equalization_matrix.values / conv_factor - self.cal_matrix.errors = self.equalization_matrix.errors / conv_factor + adc_to_ev = self.equalization_matrix.metadata[CalibrationMetadata.ADC_TO_EV] + # Calculate the conversion factor from ADC to electrons. + adc_to_electrons = adc_to_ev / ionization_potential + # Calculate the gain values as the equalization values divided by the conversion + # factor and update the errors. + self.cal_matrix.values = self.equalization_matrix.values / adc_to_electrons + self.cal_matrix.errors = self.equalization_matrix.errors / adc_to_electrons + # Update some metadata for the gain matrix. self.cal_matrix.entries = self.equalization_matrix.entries self.cal_matrix.num_events = self.equalization_matrix.num_events return self.cal_matrix - + class CalibrateENC: diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py index 20f9517..62091fe 100644 --- a/src/hexsample/clustering.py +++ b/src/hexsample/clustering.py @@ -307,6 +307,7 @@ def run(self, event) -> Cluster: for speed using proper numpy array for the offset indexes. """ readout = self.readout + adc_to_ev = readout.gain.metadata["adc_to_ev"] if isinstance(event, DigiEventCircular): # If the readout is circular, we want to take all the neirest neighbors. # Trailing -1 is bc the central px is already considered. @@ -354,4 +355,4 @@ def run(self, event) -> Cluster: # Sort the arrays in decreasing order before applying the position suppression. pha, col, row = self.position_suppress(pha[mask], col[mask], row[mask]) x, y = self.readout.pixel_to_world(col, row) - return Cluster(x, y, col, row, pha, readout.gain.metadata["adc_to_ev"], self.pos_recon_algorithm, self.recon_pars) + return Cluster(x, y, col, row, pha, adc_to_ev, self.pos_recon_algorithm, self.recon_pars) diff --git a/src/hexsample/recon.py b/src/hexsample/recon.py index 1117036..679fa75 100644 --- a/src/hexsample/recon.py +++ b/src/hexsample/recon.py @@ -25,6 +25,7 @@ from .clustering import Cluster + @dataclass class ReconEventBase: diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index d3e34e1..9c6be60 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -34,7 +34,6 @@ from . import rng from .analysis import create_histogram from .calibration import ( - CalibrationMetadata, CALIBRATION_UNITS, CalibrateDark, CalibrateENC, @@ -42,6 +41,7 @@ CalibrateGain, CalibrateNoise, CalibrationMatrix, + CalibrationMetadata, CalibrationType, ) from .clustering import ClusteringNN @@ -545,7 +545,7 @@ def synthesize_calibration_file( calibration_matrix = CalibrationMatrix(num_cols, num_rows) rms = mean * percent_rms / 100 if calibration_type == CalibrationType.EQUALIZATION: - calibration_matrix._cached_metadata[CalibrationMetadata.ADC_TO_EV] = mean + calibration_matrix.update_metadata(CalibrationMetadata.ADC_TO_EV, mean) rms /= mean mean = 1. logger.info(f"Generating {calibration_type.value} calibration matrix with " From cae43b37f404fedce543e484ecab25ab21176327 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Mon, 11 May 2026 10:42:15 +0200 Subject: [PATCH 23/30] Release notes. --- docs/release_notes.rst | 29 +++++++++++++++++++++++++++++ src/hexsample/calibration.py | 16 ++++++++-------- src/hexsample/cli.py | 1 - src/hexsample/clustering.py | 2 ++ src/hexsample/tasks.py | 10 +++++----- 5 files changed, 44 insertions(+), 14 deletions(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 8106125..e6c5c89 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -4,6 +4,35 @@ Release notes ============= +* Package dependencies updated: `joblib` and `iminuit` added. +* New calibration matrix type `equalization` added in `caldb.py` to store the pixel equalization + factors. +* New `CalibrateEqualization` class to calibrate the pixel equalization factors. This is + performed with a likelihood fit using the source spectrum probability density function (PDF) + and the observed energy of the events. Corresponding CLI command `calibgen equalization` added + to generate equalization calibration files from event files. +* The `CalibrateGain` class has been refactored. Now it produces a gain calibration matrix from + a equalization calibration matrix, by using the ADC to eV conversion and the chip sensor + matieral. The gain matrix is expressed in ADC counts per electron. Corresponding CLI command + `calibgen gain` refactored to reflect this change. +* Some CLI commands refactored to use the equalization calibration file instead of the gain + calibration file (`reconstruct`, `display`, `calibgen eta`). +* New `pdf.py` module containing facilities to calculate the PDF of the source spectrum and store + it in a `.npz` file. +* Added `calibspec` CLI command to create a source spectrum PDF file from a reconstructed event + file. The PDF is saved as a `.npz` file. +* New `adc` method to calculate the total ADC counts of the event added to `ReconEvent`. Now a + reconstructed file contains both the ADC counts and the energy in eV of the events (if the + conversion factor from ADC to eV is available in the equalization calibration file). +* New `energy` method to calculate the total energy of the event added to `Cluster`. The + conversion factor from ADC to eV is passed by `ClusteringNN`. +* Pull requests merged and issues closed: + + - https://github.com/lucabaldini/hexsample/pull/132 + - https://github.com/lucabaldini/hexsample/issues/129 + + + Version 0.16.0 (2026-05-05) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index ec1ccd1..dcdba27 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -46,10 +46,10 @@ class CalibrationType(str, Enum): """ ENC = "enc" - PEDESTAL = "pedestal" - NOISE = "noise" - GAIN = "gain" EQUALIZATION = "equalization" + GAIN = "gain" + NOISE = "noise" + PEDESTAL = "pedestal" @classmethod def values(cls) -> Tuple[str, ...]: @@ -85,18 +85,18 @@ class CalibrationUnits(str, Enum): """ ENC = "Electrons" + EQUALIZATION = "" + GAIN = "ADC counts / electron" NOISE = "ADC counts" PEDESTAL = "ADC counts" - GAIN = "ADC counts / electron" - EQUALIZATION = "" CALIBRATION_UNITS = { CalibrationType.ENC: CalibrationUnits.ENC, + CalibrationType.EQUALIZATION: CalibrationUnits.EQUALIZATION, + CalibrationType.GAIN: CalibrationUnits.GAIN, CalibrationType.NOISE: CalibrationUnits.NOISE, CalibrationType.PEDESTAL: CalibrationUnits.PEDESTAL, - CalibrationType.GAIN: CalibrationUnits.GAIN, - CalibrationType.EQUALIZATION: CalibrationUnits.EQUALIZATION } @@ -115,9 +115,9 @@ class CalibrationMatrix: The number of rows of the detector readout chip. """ - VALUES = "values" ENTRIES = "entries" ERRORS = "errors" + VALUES = "values" def __init__(self, num_cols: int, num_rows: int) -> None: """Class constructor. diff --git a/src/hexsample/cli.py b/src/hexsample/cli.py index 10c157d..ef4898c 100644 --- a/src/hexsample/cli.py +++ b/src/hexsample/cli.py @@ -425,7 +425,6 @@ def add_recon_options(self, parser: argparse.ArgumentParser) -> None: type=str, default="centroid", help="How to reconstruct position") CliArgumentParser.add_cal_noise_file(group, default=None, required=True) CliArgumentParser.add_cal_pedestal_file(group, default=None, required=True) - # CliArgumentParser.add_cal_gain_file(group, default=None, required=True) CliArgumentParser.add_cal_equalization_file(group, default=None, required=True) group.add_argument("--eta_2pix_rad_sigma", default=defaults.eta_2pix_rad_sigma, type=float, help="probit function sigma parameter for two pixel" \ diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py index 62091fe..7aef0f1 100644 --- a/src/hexsample/clustering.py +++ b/src/hexsample/clustering.py @@ -312,6 +312,8 @@ def run(self, event) -> Cluster: # If the readout is circular, we want to take all the neirest neighbors. # Trailing -1 is bc the central px is already considered. self.num_neighbors = 6 #HexagonalReadoutCircular.NUM_PIXELS - 1 + if readout.is_at_border(event.column, event.row): + return None col = [event.column] row = [event.row] adc_channel_order = [readout.adc_channel(event.column, event.row)] diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index 9c6be60..1997f60 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -730,8 +730,8 @@ def calibrate_equalization( args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\ header["pitch"], noise_matrix, unit_gain_map, pedestal_matrix readout = create_readout(readout_mode, header, *args) - # Initialize the gain matrix and run the calibration. - gain_calibration = CalibrateEqualization(header["num_cols"], header["num_rows"], pdf) + # Initialize the equalization matrix and run the calibration. + equalization_calibration = CalibrateEqualization(header["num_cols"], header["num_rows"], pdf) clustering = ClusteringNN(readout, zero_sup_threshold=zero_sup_threshold, num_neighbors=6, pos_recon_algorithm="centroid") logger.info("Starting the event loop...") @@ -742,9 +742,9 @@ def calibrate_equalization( continue if cluster is None: continue - gain_calibration.analyze_cluster(cluster) - logger.info("Calculating the gain matrix...") - equalization_matrix = gain_calibration.fit(size) + equalization_calibration.analyze_cluster(cluster) + logger.info("Calculating the equalization matrix...") + equalization_matrix = equalization_calibration.fit(size) output_file_path = input_file_path.replace(".h5", "_matrix_equalization.h5") logger.info(f"Saving equalization matrix to {output_file_path}...") equalization_matrix.to_hdf5(output_file_path, CalibrationType.EQUALIZATION, False) From 9a909fdbd38138fdbf33bf24a74a7e096dfae671 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Mon, 11 May 2026 10:45:57 +0200 Subject: [PATCH 24/30] Release notes. --- docs/release_notes.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index e6c5c89..441db35 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -32,7 +32,6 @@ Release notes - https://github.com/lucabaldini/hexsample/issues/129 - Version 0.16.0 (2026-05-05) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ From ce5e52975cc25f4165d8d8545f17cd8a0da34d0e Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Tue, 12 May 2026 09:44:27 +0200 Subject: [PATCH 25/30] Reviewing. --- src/hexsample/clustering.py | 1 - tests/test_pdf.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py index 7aef0f1..06da5b3 100644 --- a/src/hexsample/clustering.py +++ b/src/hexsample/clustering.py @@ -267,7 +267,6 @@ def position_suppress(self, pha: np.ndarray, col: np.ndarray, row: np.ndarray idx = np.argsort(-out_pha)[:3] return out_pha[idx], out_col[idx], out_row[idx] - def run(self, event: DigiEventRectangular) -> Cluster: """Workhorse method to be reimplemented by derived classes. """ diff --git a/tests/test_pdf.py b/tests/test_pdf.py index b6a12a6..46e31d2 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -74,4 +74,3 @@ def test_derivative(size: int = 100000): plt.figure("Derivative of a gaussian PDF") xx = np.linspace(vals.min()*0.8, vals.max()*1.2, 10000) plt.plot(xx, derivative(xx)) - plt.legend() From e52deb5ec00111d20b8cb7643d78f7d6cc63e5f5 Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Tue, 12 May 2026 11:44:17 +0200 Subject: [PATCH 26/30] Review. --- docs/release_notes.rst | 4 +-- src/hexsample/calibration.py | 49 ++++++++++++++++++++++-------------- src/hexsample/clustering.py | 5 +++- src/hexsample/pdf.py | 8 +++--- src/hexsample/tasks.py | 10 ++++---- 5 files changed, 46 insertions(+), 30 deletions(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 5cfa444..298e549 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -12,8 +12,8 @@ Release notes and the observed energy of the events. Corresponding CLI command `calibgen equalization` added to generate equalization calibration files from event files. * The `CalibrateGain` class has been refactored. Now it produces a gain calibration matrix from - a equalization calibration matrix, by using the ADC to eV conversion and the chip sensor - matieral. The gain matrix is expressed in ADC counts per electron. Corresponding CLI command + an equalization calibration matrix, by using the ADC to eV conversion and the chip sensor + material. The gain matrix is expressed in ADC counts per electron. Corresponding CLI command `calibgen gain` refactored to reflect this change. * Some CLI commands refactored to use the equalization calibration file instead of the gain calibration file (`reconstruct`, `display`, `calibgen eta`). diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index 9edfdf6..0d04741 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -135,7 +135,7 @@ def __init__(self, num_cols: int, num_rows: int) -> None: # Other useful information for the metadata self.num_events = 0 self._cached = False - self._cached_metadata = { + self._metadata = { CalibrationMetadata.NUM_COLS: num_cols, CalibrationMetadata.NUM_ROWS: num_rows } @@ -161,6 +161,8 @@ def values(self, new_matrix: np.ndarray) -> None: raise ValueError(f"Input matrix has shape {new_matrix.shape}, but expected shape is " f"{self._shape}.") self._values = new_matrix + # Invalidate the cached metadata since the values of the matrix have changed. + self._cached = False @property def entries(self) -> np.ndarray: @@ -178,6 +180,8 @@ def entries(self, new_entries: np.ndarray) -> None: raise ValueError(f"Input entries matrix has shape {new_entries.shape}, but expected " f"shape is {self._shape}.") self._entries = new_entries + # Invalidate the cached metadata since the number of events has changed. + self._cached = False @property def errors(self) -> np.ndarray: @@ -194,13 +198,15 @@ def errors(self, new_error: np.ndarray) -> None: raise ValueError(f"Input error matrix has shape {new_error.shape}, but expected shape " f"is {self._shape}.") self._errors = new_error + # Invalidate the cached metadata since the error values have changed. + self._cached = False @property def metadata(self) -> dict: """Return the metadata of the calibration matrix. """ if self._cached: - return self._cached_metadata + return self._metadata mask = self._entries > 0 # If there are no pixels with events, we can set the average, minimum and maximum number of # events to zero. @@ -211,13 +217,13 @@ def metadata(self) -> dict: else: entries_avg = entries_min = entries_max = 0 # Setting the metadata values. - self._cached_metadata[CalibrationMetadata.NUM_EVENTS] = self.num_events - self._cached_metadata[CalibrationMetadata.ENTRIES_AVG] = entries_avg - self._cached_metadata[CalibrationMetadata.ENTRIES_MIN] = entries_min - self._cached_metadata[CalibrationMetadata.ENTRIES_MAX] = entries_max - self._cached_metadata[CalibrationMetadata.NUM_CALIBRATED_PIXELS] = int(mask.sum()) + self._metadata[CalibrationMetadata.NUM_EVENTS] = self.num_events + self._metadata[CalibrationMetadata.ENTRIES_AVG] = entries_avg + self._metadata[CalibrationMetadata.ENTRIES_MIN] = entries_min + self._metadata[CalibrationMetadata.ENTRIES_MAX] = entries_max + self._metadata[CalibrationMetadata.NUM_CALIBRATED_PIXELS] = int(mask.sum()) self._cached = True - return self._cached_metadata + return self._metadata def update_metadata(self, key: CalibrationMetadata, value) -> None: """Update the metadata of the calibration matrix with a new key-value pair. @@ -229,7 +235,7 @@ def update_metadata(self, key: CalibrationMetadata, value) -> None: value : The value of the metadata to be updated. """ - self._cached_metadata[key] = value + self._metadata[key] = value def set_value(self, value: float) -> None: """Set a value for all the pixels in the calibration matrix. @@ -240,6 +246,7 @@ def set_value(self, value: float) -> None: The value to be set for all the pixels in the calibration matrix. """ self._values = np.full(self._shape, value) + self._cached = False def fill(self, value: float, max_hits: int = 0) -> None: """Substitute the value of the pixels with less hits than or equal to a certain threshold @@ -255,6 +262,7 @@ def fill(self, value: float, max_hits: int = 0) -> None: The maximum number of hits for a pixel to be considered for replacement. """ self._values = np.where(self._entries <= max_hits, value, self._values) + self._cached = False def mean(self, min_hits: int = 1) -> float: """Return the mean value of the calibration matrix, calculated as the mean of the pixels @@ -296,9 +304,9 @@ def to_hdf5(self, file_path: str, calibration_type: CalibrationType, is_syntheti h5file.create_dataset(self.ERRORS, data=self.errors, dtype=np.float32, **compression_pars) # Update the header with the relevant information and metadata. - self._cached_metadata[CalibrationMetadata.CALIBRATION_TYPE] = calibration_type.value - self._cached_metadata[CalibrationMetadata.IS_SYNTHETIC] = is_synthetic - self._cached_metadata[CalibrationMetadata.FILE_NAME] = pathlib.Path(file_path).stem + self._metadata[CalibrationMetadata.CALIBRATION_TYPE] = calibration_type.value + self._metadata[CalibrationMetadata.IS_SYNTHETIC] = is_synthetic + self._metadata[CalibrationMetadata.FILE_NAME] = pathlib.Path(file_path).stem for key, val in self.metadata.items(): h5file.attrs[key] = val return file_path @@ -325,7 +333,7 @@ def from_hdf5(cls, file_path: str) -> "CalibrationMatrix": obj = cls(num_cols=attrs["num_cols"], num_rows=attrs["num_rows"]) obj.num_events = attrs.get(CalibrationMetadata.NUM_EVENTS.value) for key, val in attrs.items(): - obj._cached_metadata[key] = val + obj._metadata[key] = val # Load the matrix and the hits matrices from the HDF5 file. obj._values = h5file[obj.VALUES][:] obj._entries = h5file[obj.ENTRIES][:] @@ -347,11 +355,11 @@ def __call__(self, col: np.ndarray, row: np.ndarray) -> float: def __str__(self) -> str: """Return a string representation of the calibration matrix. """ - if CalibrationMetadata.FILE_NAME in self._cached_metadata: - return self._cached_metadata[CalibrationMetadata.FILE_NAME] + if CalibrationMetadata.FILE_NAME in self._metadata: + return self._metadata[CalibrationMetadata.FILE_NAME] return f"CalibrationMatrix( " \ - f"num_cols={self._cached_metadata[CalibrationMetadata.NUM_COLS]}, " \ - f"num_rows={self._cached_metadata[CalibrationMetadata.NUM_ROWS]})" + f"num_cols={self._metadata[CalibrationMetadata.NUM_COLS]}, " \ + f"num_rows={self._metadata[CalibrationMetadata.NUM_ROWS]})" class CalibrateBase: @@ -706,13 +714,16 @@ def _likelihood_fit(data: csr_matrix, conv_factor: float, pdf: SpectrumPDF, def nll(pars): total_adc = data @ pars p = pdf(total_adc * conv_factor) - return -np.sum(np.log(p + 1e-10)) + # To avoid negative or zero values in the log, clip p to be larger than 1e-10. + p_clipped = np.clip(p, 1e-10, None) + return -np.sum(np.log(p_clipped)) # Define the gradient of the log-likelihood function for the fit. def nll_grad(pars): total_adc = data @ pars p = pdf(total_adc * conv_factor) + p_clipped = np.clip(p, 1e-10, None) dp = pdf_derivative(total_adc * conv_factor) - grad = -data.T @ (dp / (p + 1e-10)) * conv_factor + grad = -data.T @ (dp / p_clipped) * conv_factor return np.asarray(grad).flatten() # Define the initial parameters for the fit. init_pars = np.ones(data.shape[1]) diff --git a/src/hexsample/clustering.py b/src/hexsample/clustering.py index 06da5b3..a603282 100644 --- a/src/hexsample/clustering.py +++ b/src/hexsample/clustering.py @@ -298,7 +298,7 @@ class ClusteringNN(ClusteringBase): pos_recon_algorithm: str recon_pars: Optional[dict] = None - def run(self, event) -> Cluster: + def run(self, event) -> Optional[Cluster]: """Overladed method. .. warning:: @@ -306,6 +306,9 @@ def run(self, event) -> Cluster: for speed using proper numpy array for the offset indexes. """ readout = self.readout + # Load the adc_to_ev conversion factor from the readout metadata of the + # equalization matrix. If the data is not present, or the wrong matrix type + # is passed, this will raise a KeyError. adc_to_ev = readout.gain.metadata["adc_to_ev"] if isinstance(event, DigiEventCircular): # If the readout is circular, we want to take all the neirest neighbors. diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py index 02887fc..0ca6539 100644 --- a/src/hexsample/pdf.py +++ b/src/hexsample/pdf.py @@ -64,10 +64,10 @@ def mean(self) -> float: raise ValueError("The PDF is empty. Fit the PDF with data or load it from file.") # The location of the PDF can be estimated as the mean of the distribution. x_grid = np.linspace(min(self.pdf.x), max(self.pdf.x), 1000) - y_grid = self.pdf(x_grid) + # Clip the PDF to avoid negative values. + y_grid = np.maximum(0, self.pdf(x_grid)) area = np.trapezoid(y_grid, x_grid) - normalized_pdf = y_grid / area - mean = np.sum(x_grid * normalized_pdf) / np.sum(normalized_pdf) + mean = np.trapezoid(x_grid * y_grid, x_grid) / area return mean def to_file(self, file_path: str) -> str: @@ -78,6 +78,8 @@ def to_file(self, file_path: str) -> str: file_path : str The path to the file where the PDF will be saved. """ + if self.pdf is None: + raise ValueError("The PDF is empty. Fit the PDF with data before saving it to file.") np.savez(file_path, x=self.pdf.x, c=self.pdf.c) return file_path diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index 6b0ab44..c119d36 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -344,7 +344,7 @@ def reconstruct( cluster = clustering.run(event) except IndexError as e: logger.warning(f"Error reconstructing event with trigger ID {event.trigger_id}: {e}") - if cluster.size() in size: + if cluster is not None and (cluster.size() in size): # Need to pass the recon method and other stuff as argument to ReconEvent args = event.trigger_id, event.timestamp(), event.livetime, cluster recon_event = ReconEvent(*args) @@ -443,7 +443,7 @@ def calibrate_eta( except IndexError: continue # Analyze only 2-pixel and 3-pixel events. - if cluster.size() == 2 or cluster.size() == 3: + if cluster is not None and (cluster.size() == 2 or cluster.size() == 3): mc_event = input_file.mc_event(i) size_list.append(cluster.size()) # Calculate the photon position with respect to the most charged pixel @@ -728,6 +728,7 @@ def calibrate_equalization( # necessary for the calibration. unit_gain_map = CalibrationMatrix(header["num_cols"], header["num_rows"]) unit_gain_map.set_value(1.) + unit_gain_map.update_metadata(CalibrationMetadata.ADC_TO_EV, 1.) args = HexagonalLayout(header["layout"]), header["num_cols"], header["num_rows"],\ header["pitch"], noise_matrix, unit_gain_map, pedestal_matrix readout = create_readout(readout_mode, header, *args) @@ -741,9 +742,8 @@ def calibrate_equalization( cluster = clustering.run(event) except IndexError: continue - if cluster is None: - continue - equalization_calibration.analyze_cluster(cluster) + if cluster is not None: + equalization_calibration.analyze_cluster(cluster) logger.info("Calculating the equalization matrix...") equalization_matrix = equalization_calibration.fit(size) output_file_path = input_file_path.replace(".h5", "_matrix_equalization.h5") From 527c88392460e925e4e5d4f42128a33c79fc997a Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Tue, 12 May 2026 11:45:12 +0200 Subject: [PATCH 27/30] Linting. --- src/hexsample/calibration.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/hexsample/calibration.py b/src/hexsample/calibration.py index 0d04741..4213157 100644 --- a/src/hexsample/calibration.py +++ b/src/hexsample/calibration.py @@ -24,19 +24,17 @@ import pathlib from enum import Enum from itertools import product -from itertools import product from typing import Optional, Tuple import h5py import numpy as np import xraydb from aptapy.hist import Histogram3d +from aptapy.models import Gaussian from iminuit import Minuit from joblib import Parallel, delayed -from aptapy.models import Gaussian from scipy.sparse import csc_matrix, csr_matrix from tqdm import tqdm -from tqdm import tqdm from .clustering import Cluster from .digi import DigiEventRectangular From 3e04211ca63b84b7d00ef8e8191ea370d6cab10f Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Tue, 12 May 2026 11:52:29 +0200 Subject: [PATCH 28/30] Added uniform equalization file in CalDB. --- .../sim_xpol3_equalization-3p68_uniform_v001.h5 | Bin 0 -> 18816 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 caldb/equalization/sim_xpol3_equalization-3p68_uniform_v001.h5 diff --git a/caldb/equalization/sim_xpol3_equalization-3p68_uniform_v001.h5 b/caldb/equalization/sim_xpol3_equalization-3p68_uniform_v001.h5 new file mode 100644 index 0000000000000000000000000000000000000000..8c66c7854400bbcf097bc9d563b36b47e86e22ab GIT binary patch literal 18816 zcmeI4PfT1z9LMLeR9|7Ci`GK*;93=_!2|-SYvP|JmI4V8Y|*S2A1gd$&9V!-3oKr& z%7NbWhF+{`jP=k2Z`H)udO}Yc5B`aVHtE5}G^rjGPj&wNfc?6^P)oG7ok@A~X67^V zzVqFA58kiu?XmuYhZ^ect`jQxzNi&3y-S-bHXR)>HB_edQM=>We97h~Ys`)(8ii_W zvGp-Ke)52+NZ5`ehkJX4Bx!ugZ8yY zvtdz)Fjp#MoY;%Rpy?!&GfYcc%n`B^lD7SsIBj?IhG@G<*TNq_`UMn zQYkK)ebM}y-WOtr@O67l#0*$361wc!dDZB4PZTF-CnmC?9Yf7W_4CAdIFU`4Lb211 z=i4(aQWGifFm>%R8n<}K>T78-1CTE?9_mXn9O|0{ZjsUV7Q>;w(+r3D&M_S7^Y5sf z?+T-@gW*tLis4Y-QHDc(CxQEk(f2jOp}sYSLw)f(D}ABkqP~9MeEa(VWll33>RV6yDR5Q9O_Fj9O@foIMlafIr3}N>Lad}8eHxia1J;JZczuyKVCZP z$!nuu6Mt{^)Wyc8eq%)HH<_Pk6k>G5d-2WB%Z_utxOL#pkDKy(&>+w4FOL5)G_~vY zia#4KztWu)p1$-s#9aUM*7CI<7LMT6&z}P+!1s zsP7GiLwz3rm#QXC)~zxe>idP^P+#L7b0G4Cj*I#_fm>wsjW8VQn`b!G_b$VszR!T; z;%Mg>4)sNBe4Tuu@lanY!=b(b;3%GjHf9+P^{wrz{M=|f)OU}a0L_Cq)b}KCTpTUU zaHwyQ;ZWa)42Sx@0*>Ni=-(xVLw&IaD*dDJP+u3rp}yxWM|xZwjht|~bHF*^954>J zo5cUa&9e<;&Y#Lu4P@^7wD0`pK&InNfYkt^@ar__{i<{W#ind#=xK zH_oX{-A>k;V!~wCxaHz#^!yVy?1H`DL=NwB-)A_y&;63&E`WevfwP_fCB)tbO%>@Q z4)ygg9O}z39O`=;xYXa2nTk&s4)vX9IMlbZwQ|06T-4VI+#;jz1%^X?uQMF#`-tIC z-#5Tf+zVy?$#AIezK1G(q47}P0K=ia8Q>_Mh5FuMIMjER;ZWa2hC_W#cEQuS=i+FO zF&yekGaTwW#&D?bW8f(6h5r4(aH!AQU-|jcc&Kkb!=b)sEk}A>9F3fCxpTld;2bax z(ED8FK2!GXdn-}-9*atzd^05~-&M)YP6cE6Y|&8W|DSzbNb0S9+*i*A&d@LyCgK$nCO}&rvl&n=LDZ1-L_mzlgARRML)O~kix*-`A!@9u} zO?sTMbT;!+AzjMkb3tigIyAlH-I12=wbj0TBt$VY6_ls**{)zS`AFCFfkZHy%S_}8Q^8zEN2ltm(z1OY z#FX6DIp7>{4mby#1I_{GfOEh(;2dxcI0u{q&Vm2JfhHX(5Ms$5jJ_{tBi_&;T4jVH zQw)j=xzc1<%8V(rMFs*?tUx`#Z(;_%IMg$Eq*vCB9DYt^x`E;elt)FsZ8GtYiA**O za_K1wU~Cn?Xp_AuexYI!8}XU?_4rOWC%<*YMrf2i`A?>#-jWVokeiMEQQhBuUrUgl pds!pb+J7pO+g|^4`6~M-HRp*v>+`4BP&gh;XUd@tc5O9}zW~SVFWmqD literal 0 HcmV?d00001 From 70e94fcff7b1783a5fc89cea4c48161be977605c Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Tue, 12 May 2026 11:53:54 +0200 Subject: [PATCH 29/30] Minor. --- scripts/populate_caldb.py | 4 ++++ src/hexsample/tasks.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/populate_caldb.py b/scripts/populate_caldb.py index a95aa92..ebb5e04 100644 --- a/scripts/populate_caldb.py +++ b/scripts/populate_caldb.py @@ -23,11 +23,14 @@ """ import numpy as np +from xraydb import ionization_potential from hexsample.caldb import CalDB, CalibrationType from hexsample.tasks import SynthesizeCalibrationDefaults, synthesize_calibration_file +DEFAULT_IONIZATION_POTENTIAL = ionization_potential("Si") DEFAULT_GAIN = 1. +DEFAULT_ADC_TO_EV = DEFAULT_IONIZATION_POTENTIAL / DEFAULT_GAIN RMS_VALS = (0, 10) ENC_VALS = np.array((20., 30., 40., 50., 75., 100.)) PEDESTAL_VALS = (0, 1000) @@ -62,6 +65,7 @@ def populate_caldb() -> None: generate_files(CalibrationType.ENC, ENC_VALS, RMS_VALS) generate_files(CalibrationType.NOISE, ENC_VALS * DEFAULT_GAIN, RMS_VALS) generate_files(CalibrationType.GAIN, [DEFAULT_GAIN], RMS_VALS) + generate_files(CalibrationType.EQUALIZATION, [DEFAULT_ADC_TO_EV], [0.]) generate_files(CalibrationType.PEDESTAL, PEDESTAL_VALS, RMS_VALS) diff --git a/src/hexsample/tasks.py b/src/hexsample/tasks.py index c119d36..45987d1 100644 --- a/src/hexsample/tasks.py +++ b/src/hexsample/tasks.py @@ -525,7 +525,7 @@ def synthesize_calibration_file( random_seed : int, optional The seed for the random number generator. """ - name, args = current_call() + name, args = current_call(1) logger.info(f"Running {__name__}.{name} with arguments {args}...") # Initialize the random number generator with the given seed rng.initialize(seed=random_seed) From 156594d72eb9e0c22915566dc4bda591c5fe7e2f Mon Sep 17 00:00:00 2001 From: Augusto Cattafesta Date: Tue, 12 May 2026 12:05:37 +0200 Subject: [PATCH 30/30] Import error solved. --- src/hexsample/pdf.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/hexsample/pdf.py b/src/hexsample/pdf.py index 0ca6539..b40f10e 100644 --- a/src/hexsample/pdf.py +++ b/src/hexsample/pdf.py @@ -26,6 +26,11 @@ from scipy.interpolate import CubicSpline from scipy.stats import gaussian_kde +try: + from numpy import trapezoid as trapezoid +except ImportError: + from numpy import trapz as trapezoid + class SpectrumPDF: @@ -66,8 +71,8 @@ def mean(self) -> float: x_grid = np.linspace(min(self.pdf.x), max(self.pdf.x), 1000) # Clip the PDF to avoid negative values. y_grid = np.maximum(0, self.pdf(x_grid)) - area = np.trapezoid(y_grid, x_grid) - mean = np.trapezoid(x_grid * y_grid, x_grid) / area + area = trapezoid(y_grid, x_grid) + mean = trapezoid(x_grid * y_grid, x_grid) / area return mean def to_file(self, file_path: str) -> str: