From 72eaaf34bebb6b630fc7be7f350d97db61a09798 Mon Sep 17 00:00:00 2001 From: LebombJames Date: Sat, 5 Jul 2025 02:56:48 +0100 Subject: [PATCH 1/9] - Allow passing a URL to an XDF - Typing and documentation fixes --- neurokit2/data/read_xdf.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index b8c7ded2ca..fdcbf41884 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -1,9 +1,19 @@ # -*- coding: utf-8 -*- import numpy as np import pandas as pd - - -def read_xdf(filename, upsample=2, fillmissing=None): +import urllib +import io +import requests +from typing import TypedDict + +class ReadXDFInfo(TypedDict): + sampling_rates_original: list[float] + sampling_rates_effective: list[float] + sampling_rate: int + datetime: str + data: list[pd.DataFrame] + +def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = None) -> tuple[pd.DataFrame, ReadXDFInfo]: """**Read and tidy an XDF file** Reads and tidies an XDF file with multiple streams into a Pandas DataFrame. @@ -21,7 +31,7 @@ def read_xdf(filename, upsample=2, fillmissing=None): Parameters ---------- filename : str - Path (with the extension) of an XDF file (e.g., ``"data.xdf"``). + Path (with the extension) or URL pointing to an XDF file (e.g., ``"data.xdf"``). upsample : float Factor by which to upsample the data. Default is 2, which means that the data will be resampled to 2 times the highest sampling rate. You can increase that to further reduce @@ -35,9 +45,9 @@ def read_xdf(filename, upsample=2, fillmissing=None): Returns ---------- - df : DataFrame, dict - The BITalino file as a pandas dataframe if one device was read, or a dictionary - of pandas dataframes (one dataframe per device) if multiple devices are read. + df : DataFrame + The device's BITalino file as a pandas dataframe. If multiple devices are read, + each device's BITalino file will be merged into one dataframe. info : dict The metadata information containing the sampling rate(s). @@ -63,7 +73,11 @@ def read_xdf(filename, upsample=2, fillmissing=None): ) # Load file - # TODO: would be nice to be able to stream a file from URL + # if filename is a URL, stream bytes from file + if urllib.parse.urlparse(filename).scheme != "": + req = requests.get(filename, stream=True) + req.raw.decode_content = True + filename = io.BytesIO(req.content) streams, header = pyxdf.load_xdf(filename) # Get smaller time stamp to later use as offset (zero point) From e0bad5051dba3b6a184c42b0abc58057e4b8734b Mon Sep 17 00:00:00 2001 From: LebombJames Date: Sat, 5 Jul 2025 11:59:15 +0100 Subject: [PATCH 2/9] Run linting and address tests --- neurokit2/data/read_xdf.py | 54 ++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index fdcbf41884..bf6df5005a 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -1,10 +1,13 @@ # -*- coding: utf-8 -*- +import io +import urllib + +from typing import TypedDict + import numpy as np import pandas as pd -import urllib -import io import requests -from typing import TypedDict + class ReadXDFInfo(TypedDict): sampling_rates_original: list[float] @@ -13,7 +16,10 @@ class ReadXDFInfo(TypedDict): datetime: str data: list[pd.DataFrame] -def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = None) -> tuple[pd.DataFrame, ReadXDFInfo]: + +def read_xdf( + filename: str, upsample: float = 2.0, fillmissing: float | None = None +) -> tuple[pd.DataFrame, ReadXDFInfo]: """**Read and tidy an XDF file** Reads and tidies an XDF file with multiple streams into a Pandas DataFrame. @@ -63,6 +69,7 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N # data, info = nk.read_xdf("data.xdf") # sampling_rate = info["sampling_rate"] + """ try: import pyxdf @@ -75,9 +82,16 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N # Load file # if filename is a URL, stream bytes from file if urllib.parse.urlparse(filename).scheme != "": - req = requests.get(filename, stream=True) + try: + req = requests.get(filename, stream=True, timeout=10) + except requests.exceptions.Timeout: + print("The request timed out!") + except requests.exceptions.RequestException as e: + print("An error occurred:", e) + req.raw.decode_content = True filename = io.BytesIO(req.content) + streams, header = pyxdf.load_xdf(filename) # Get smaller time stamp to later use as offset (zero point) @@ -97,22 +111,16 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N if stream["info"]["type"][0] == "GYRO": dat = dat.rename(columns={"X": "GYRO_X", "Y": "GYRO_Y", "Z": "GYRO_Z"}) # Compute movement - dat["GYRO"] = np.sqrt( - dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2 - ) + dat["GYRO"] = np.sqrt(dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2) if stream["info"]["type"][0] == "ACC": dat = dat.rename(columns={"X": "ACC_X", "Y": "ACC_Y", "Z": "ACC_Z"}) # Compute acceleration - dat["ACC"] = np.sqrt( - dat["ACC_X"] ** 2 + dat["ACC_Y"] ** 2 + dat["ACC_Z"] ** 2 - ) + dat["ACC"] = np.sqrt(dat["ACC_X"] ** 2 + dat["ACC_Y"] ** 2 + dat["ACC_Z"] ** 2) # Muse - PPG data has three channels: ambient, infrared, red if stream["info"]["type"][0] == "PPG": - dat = dat.rename( - columns={"PPG1": "LUX", "PPG2": "PPG", "PPG3": "RED", "IR": "PPG"} - ) + dat = dat.rename(columns={"PPG1": "LUX", "PPG2": "PPG", "PPG3": "RED", "IR": "PPG"}) # Zeros suggest interruptions, better to replace with NaNs (I think?) dat["PPG"] = dat["PPG"].replace(0, value=np.nan) dat["LUX"] = dat["LUX"].replace(0, value=np.nan) @@ -125,12 +133,8 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N # Store metadata info = { - "sampling_rates_original": [ - float(s["info"]["nominal_srate"][0]) for s in streams - ], - "sampling_rates_effective": [ - float(s["info"]["effective_srate"]) for s in streams - ], + "sampling_rates_original": [float(s["info"]["nominal_srate"][0]) for s in streams], + "sampling_rates_effective": [float(s["info"]["effective_srate"]) for s in streams], "datetime": header["info"]["datetime"][0], "data": dfs, } @@ -151,14 +155,8 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N fillmissing = int(info["sampling_rate"] * fillmissing) # Create new index with evenly spaced timestamps - idx = pd.date_range( - df.index.min(), df.index.max(), freq=str(1000 / info["sampling_rate"]) + "ms" - ) + idx = pd.date_range(df.index.min(), df.index.max(), freq=str(1000 / info["sampling_rate"]) + "ms") # https://stackoverflow.com/questions/47148446/pandas-resample-interpolate-is-producing-nans - df = ( - df.reindex(df.index.union(idx)) - .interpolate(method="index", limit=fillmissing) - .reindex(idx) - ) + df = df.reindex(df.index.union(idx)).interpolate(method="index", limit=fillmissing).reindex(idx) return df, info From 5200454ef199a906dd528c3004087551fd82935e Mon Sep 17 00:00:00 2001 From: Sam Marine <77904738+LebombJames@users.noreply.github.com> Date: Sat, 5 Jul 2025 12:17:54 +0100 Subject: [PATCH 3/9] Update neurokit2/data/read_xdf.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- neurokit2/data/read_xdf.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index bf6df5005a..9ede5c4bb2 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -84,13 +84,12 @@ def read_xdf( if urllib.parse.urlparse(filename).scheme != "": try: req = requests.get(filename, stream=True, timeout=10) - except requests.exceptions.Timeout: - print("The request timed out!") - except requests.exceptions.RequestException as e: - print("An error occurred:", e) + req.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) - req.raw.decode_content = True - filename = io.BytesIO(req.content) + req.raw.decode_content = True + filename = io.BytesIO(req.content) + except requests.exceptions.RequestException as e: + raise IOError(f"Failed to read XDF file from URL: {filename}") from e streams, header = pyxdf.load_xdf(filename) From 034ffc74f3f263a71825726c11a2e27cbf1e7b57 Mon Sep 17 00:00:00 2001 From: LebombJames Date: Sat, 5 Jul 2025 12:31:22 +0100 Subject: [PATCH 4/9] Improve tests results further --- neurokit2/data/read_xdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index 9ede5c4bb2..5899f5ec32 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -73,11 +73,11 @@ def read_xdf( """ try: import pyxdf - except ImportError: + except ImportError as e: raise ImportError( "The 'pyxdf' module is required for this function to run. ", "Please install it first (`pip install pyxdf`).", - ) + ) from e # Load file # if filename is a URL, stream bytes from file From c4b6e8ed0daae83e6158ed2a4d75712803840782 Mon Sep 17 00:00:00 2001 From: Sam Marine <77904738+LebombJames@users.noreply.github.com> Date: Sat, 5 Jul 2025 12:33:13 +0100 Subject: [PATCH 5/9] Update neurokit2/data/read_xdf.py This seems like copy-paste from `read-bitalino` Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- neurokit2/data/read_xdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index 5899f5ec32..c29887bf41 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -52,8 +52,8 @@ def read_xdf( Returns ---------- df : DataFrame - The device's BITalino file as a pandas dataframe. If multiple devices are read, - each device's BITalino file will be merged into one dataframe. + The XDF data as a pandas dataframe. If multiple streams are read, + they will be merged into one dataframe. info : dict The metadata information containing the sampling rate(s). From 02d29d288914a0fd392806ac6381238307cb72f4 Mon Sep 17 00:00:00 2001 From: LebombJames Date: Sat, 5 Jul 2025 16:05:53 +0100 Subject: [PATCH 6/9] Updated docstring with interpolation details --- neurokit2/data/read_xdf.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index c29887bf41..815b8118f4 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -27,7 +27,11 @@ def read_xdf( Note that, as XDF can store streams with different sampling rates and different time stamps, **the function will resample all streams to 2 times (default) the highest sampling rate** (to - minimize aliasing). The final sampling rate can be found in the ``info`` dictionary. + minimize aliasing) and then interpolate based on an evenly spaced index. While this is generally safe, it + may produce unexpected results, particularly if the original stream has large gaps in its time series. + For more discussion, see `here `_. + + The final upsampled sampling rate can be found in the ``info`` dictionary. .. note:: @@ -39,9 +43,10 @@ def read_xdf( filename : str Path (with the extension) or URL pointing to an XDF file (e.g., ``"data.xdf"``). upsample : float - Factor by which to upsample the data. Default is 2, which means that the data will be + Factor by which to upsample the data. Default is 2.0, which means that the data will be resampled to 2 times the highest sampling rate. You can increase that to further reduce - edge-distortion, especially for high frequency signals like EEG. + edge-distortion, especially for high frequency signals like EEG. ``1.0`` disables upsampling + (but not interpolation). fillmissing : float The maximum duration in seconds of missing data to fill. ``None`` (default) will interpolate all missing values and prevent issues with NaNs. However, it might be important From 56caaed53cd34f7696638b5d1811047ba982ef9f Mon Sep 17 00:00:00 2001 From: LebombJames Date: Sat, 5 Jul 2025 17:21:28 +0100 Subject: [PATCH 7/9] Use a generator comprehension to further improve test score --- neurokit2/data/read_xdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index 815b8118f4..8e849cc5a7 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -99,7 +99,7 @@ def read_xdf( streams, header = pyxdf.load_xdf(filename) # Get smaller time stamp to later use as offset (zero point) - min_ts = min([min(s["time_stamps"]) for s in streams]) + min_ts = min(min(s["time_stamps"]) for s in streams) # Loop through all the streams and convert to dataframes dfs = [] From d1edf58ed481004fa892a205cb8db87332269269 Mon Sep 17 00:00:00 2001 From: DominiqueMakowski Date: Mon, 7 Jul 2025 09:01:43 +0100 Subject: [PATCH 8/9] simplify return type --- neurokit2/data/read_xdf.py | 40 +++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index 8e849cc5a7..a420a20759 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -9,17 +9,9 @@ import requests -class ReadXDFInfo(TypedDict): - sampling_rates_original: list[float] - sampling_rates_effective: list[float] - sampling_rate: int - datetime: str - data: list[pd.DataFrame] - - def read_xdf( filename: str, upsample: float = 2.0, fillmissing: float | None = None -) -> tuple[pd.DataFrame, ReadXDFInfo]: +) -> tuple[pd.DataFrame, dict]: """**Read and tidy an XDF file** Reads and tidies an XDF file with multiple streams into a Pandas DataFrame. @@ -115,16 +107,22 @@ def read_xdf( if stream["info"]["type"][0] == "GYRO": dat = dat.rename(columns={"X": "GYRO_X", "Y": "GYRO_Y", "Z": "GYRO_Z"}) # Compute movement - dat["GYRO"] = np.sqrt(dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2) + dat["GYRO"] = np.sqrt( + dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2 + ) if stream["info"]["type"][0] == "ACC": dat = dat.rename(columns={"X": "ACC_X", "Y": "ACC_Y", "Z": "ACC_Z"}) # Compute acceleration - dat["ACC"] = np.sqrt(dat["ACC_X"] ** 2 + dat["ACC_Y"] ** 2 + dat["ACC_Z"] ** 2) + dat["ACC"] = np.sqrt( + dat["ACC_X"] ** 2 + dat["ACC_Y"] ** 2 + dat["ACC_Z"] ** 2 + ) # Muse - PPG data has three channels: ambient, infrared, red if stream["info"]["type"][0] == "PPG": - dat = dat.rename(columns={"PPG1": "LUX", "PPG2": "PPG", "PPG3": "RED", "IR": "PPG"}) + dat = dat.rename( + columns={"PPG1": "LUX", "PPG2": "PPG", "PPG3": "RED", "IR": "PPG"} + ) # Zeros suggest interruptions, better to replace with NaNs (I think?) dat["PPG"] = dat["PPG"].replace(0, value=np.nan) dat["LUX"] = dat["LUX"].replace(0, value=np.nan) @@ -137,8 +135,12 @@ def read_xdf( # Store metadata info = { - "sampling_rates_original": [float(s["info"]["nominal_srate"][0]) for s in streams], - "sampling_rates_effective": [float(s["info"]["effective_srate"]) for s in streams], + "sampling_rates_original": [ + float(s["info"]["nominal_srate"][0]) for s in streams + ], + "sampling_rates_effective": [ + float(s["info"]["effective_srate"]) for s in streams + ], "datetime": header["info"]["datetime"][0], "data": dfs, } @@ -159,8 +161,14 @@ def read_xdf( fillmissing = int(info["sampling_rate"] * fillmissing) # Create new index with evenly spaced timestamps - idx = pd.date_range(df.index.min(), df.index.max(), freq=str(1000 / info["sampling_rate"]) + "ms") + idx = pd.date_range( + df.index.min(), df.index.max(), freq=str(1000 / info["sampling_rate"]) + "ms" + ) # https://stackoverflow.com/questions/47148446/pandas-resample-interpolate-is-producing-nans - df = df.reindex(df.index.union(idx)).interpolate(method="index", limit=fillmissing).reindex(idx) + df = ( + df.reindex(df.index.union(idx)) + .interpolate(method="index", limit=fillmissing) + .reindex(idx) + ) return df, info From fb6676fbc8a8c0a78df53434671ac043f7f8e7a1 Mon Sep 17 00:00:00 2001 From: DominiqueMakowski Date: Mon, 7 Jul 2025 09:04:22 +0100 Subject: [PATCH 9/9] Update read_xdf.py --- neurokit2/data/read_xdf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index a420a20759..a2deb40448 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -2,8 +2,6 @@ import io import urllib -from typing import TypedDict - import numpy as np import pandas as pd import requests