From 72eaaf34bebb6b630fc7be7f350d97db61a09798 Mon Sep 17 00:00:00 2001
From: LebombJames <sam.marine418@gmail.com>
Date: Sat, 5 Jul 2025 02:56:48 +0100
Subject: [PATCH 1/9] - Allow passing a URL to an XDF - Typing and
 documentation fixes

---
 neurokit2/data/read_xdf.py | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py
index b8c7ded2ca..fdcbf41884 100644
--- a/neurokit2/data/read_xdf.py
+++ b/neurokit2/data/read_xdf.py
@@ -1,9 +1,19 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import pandas as pd
-
-
-def read_xdf(filename, upsample=2, fillmissing=None):
+import urllib
+import io
+import requests
+from typing import TypedDict
+
+class ReadXDFInfo(TypedDict):
+    sampling_rates_original: list[float]
+    sampling_rates_effective: list[float]
+    sampling_rate: int
+    datetime: str
+    data: list[pd.DataFrame]
+
+def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = None) -> tuple[pd.DataFrame, ReadXDFInfo]:
     """**Read and tidy an XDF file**
 
     Reads and tidies an XDF file with multiple streams into a Pandas DataFrame.
@@ -21,7 +31,7 @@ def read_xdf(filename, upsample=2, fillmissing=None):
     Parameters
     ----------
     filename :  str
-        Path (with the extension) of an XDF file (e.g., ``"data.xdf"``).
+        Path (with the extension) or URL pointing to an XDF file (e.g., ``"data.xdf"``).
     upsample : float
         Factor by which to upsample the data. Default is 2, which means that the data will be
         resampled to 2 times the highest sampling rate. You can increase that to further reduce
@@ -35,9 +45,9 @@ def read_xdf(filename, upsample=2, fillmissing=None):
 
     Returns
     ----------
-    df : DataFrame, dict
-        The BITalino file as a pandas dataframe if one device was read, or a dictionary
-        of pandas dataframes (one dataframe per device) if multiple devices are read.
+    df : DataFrame
+        The device's BITalino file as a pandas dataframe. If multiple devices are read,
+        each device's BITalino file will be merged into one dataframe.
     info : dict
         The metadata information containing the sampling rate(s).
 
@@ -63,7 +73,11 @@ def read_xdf(filename, upsample=2, fillmissing=None):
         )
 
     # Load file
-    # TODO: would be nice to be able to stream a file from URL
+    # if filename is a URL, stream bytes from file
+    if urllib.parse.urlparse(filename).scheme != "":
+        req = requests.get(filename, stream=True)
+        req.raw.decode_content = True
+        filename = io.BytesIO(req.content)
     streams, header = pyxdf.load_xdf(filename)
 
     # Get smaller time stamp to later use as offset (zero point)

From e0bad5051dba3b6a184c42b0abc58057e4b8734b Mon Sep 17 00:00:00 2001
From: LebombJames <sam.marine418@gmail.com>
Date: Sat, 5 Jul 2025 11:59:15 +0100
Subject: [PATCH 2/9] Run linting and address tests

---
 neurokit2/data/read_xdf.py | 54 ++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py
index fdcbf41884..bf6df5005a 100644
--- a/neurokit2/data/read_xdf.py
+++ b/neurokit2/data/read_xdf.py
@@ -1,10 +1,13 @@
 # -*- coding: utf-8 -*-
+import io
+import urllib
+
+from typing import TypedDict
+
 import numpy as np
 import pandas as pd
-import urllib
-import io
 import requests
-from typing import TypedDict
+
 
 class ReadXDFInfo(TypedDict):
     sampling_rates_original: list[float]
@@ -13,7 +16,10 @@ class ReadXDFInfo(TypedDict):
     datetime: str
     data: list[pd.DataFrame]
 
-def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = None) -> tuple[pd.DataFrame, ReadXDFInfo]:
+
+def read_xdf(
+    filename: str, upsample: float = 2.0, fillmissing: float | None = None
+) -> tuple[pd.DataFrame, ReadXDFInfo]:
     """**Read and tidy an XDF file**
 
     Reads and tidies an XDF file with multiple streams into a Pandas DataFrame.
@@ -63,6 +69,7 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N
 
       # data, info = nk.read_xdf("data.xdf")
       # sampling_rate = info["sampling_rate"]
+
     """
     try:
         import pyxdf
@@ -75,9 +82,16 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N
     # Load file
     # if filename is a URL, stream bytes from file
     if urllib.parse.urlparse(filename).scheme != "":
-        req = requests.get(filename, stream=True)
+        try:
+            req = requests.get(filename, stream=True, timeout=10)
+        except requests.exceptions.Timeout:
+            print("The request timed out!")
+        except requests.exceptions.RequestException as e:
+            print("An error occurred:", e)
+
         req.raw.decode_content = True
         filename = io.BytesIO(req.content)
+
     streams, header = pyxdf.load_xdf(filename)
 
     # Get smaller time stamp to later use as offset (zero point)
@@ -97,22 +111,16 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N
             if stream["info"]["type"][0] == "GYRO":
                 dat = dat.rename(columns={"X": "GYRO_X", "Y": "GYRO_Y", "Z": "GYRO_Z"})
                 # Compute movement
-                dat["GYRO"] = np.sqrt(
-                    dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2
-                )
+                dat["GYRO"] = np.sqrt(dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2)
 
             if stream["info"]["type"][0] == "ACC":
                 dat = dat.rename(columns={"X": "ACC_X", "Y": "ACC_Y", "Z": "ACC_Z"})
                 # Compute acceleration
-                dat["ACC"] = np.sqrt(
-                    dat["ACC_X"] ** 2 + dat["ACC_Y"] ** 2 + dat["ACC_Z"] ** 2
-                )
+                dat["ACC"] = np.sqrt(dat["ACC_X"] ** 2 + dat["ACC_Y"] ** 2 + dat["ACC_Z"] ** 2)
 
             # Muse - PPG data has three channels: ambient, infrared, red
             if stream["info"]["type"][0] == "PPG":
-                dat = dat.rename(
-                    columns={"PPG1": "LUX", "PPG2": "PPG", "PPG3": "RED", "IR": "PPG"}
-                )
+                dat = dat.rename(columns={"PPG1": "LUX", "PPG2": "PPG", "PPG3": "RED", "IR": "PPG"})
                 # Zeros suggest interruptions, better to replace with NaNs (I think?)
                 dat["PPG"] = dat["PPG"].replace(0, value=np.nan)
                 dat["LUX"] = dat["LUX"].replace(0, value=np.nan)
@@ -125,12 +133,8 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N
 
     # Store metadata
     info = {
-        "sampling_rates_original": [
-            float(s["info"]["nominal_srate"][0]) for s in streams
-        ],
-        "sampling_rates_effective": [
-            float(s["info"]["effective_srate"]) for s in streams
-        ],
+        "sampling_rates_original": [float(s["info"]["nominal_srate"][0]) for s in streams],
+        "sampling_rates_effective": [float(s["info"]["effective_srate"]) for s in streams],
         "datetime": header["info"]["datetime"][0],
         "data": dfs,
     }
@@ -151,14 +155,8 @@ def read_xdf(filename: str, upsample: float = 2.0, fillmissing: float | None = N
         fillmissing = int(info["sampling_rate"] * fillmissing)
 
     # Create new index with evenly spaced timestamps
-    idx = pd.date_range(
-        df.index.min(), df.index.max(), freq=str(1000 / info["sampling_rate"]) + "ms"
-    )
+    idx = pd.date_range(df.index.min(), df.index.max(), freq=str(1000 / info["sampling_rate"]) + "ms")
     # https://stackoverflow.com/questions/47148446/pandas-resample-interpolate-is-producing-nans
-    df = (
-        df.reindex(df.index.union(idx))
-        .interpolate(method="index", limit=fillmissing)
-        .reindex(idx)
-    )
+    df = df.reindex(df.index.union(idx)).interpolate(method="index", limit=fillmissing).reindex(idx)
 
     return df, info

From 5200454ef199a906dd528c3004087551fd82935e Mon Sep 17 00:00:00 2001
From: Sam Marine <77904738+LebombJames@users.noreply.github.com>
Date: Sat, 5 Jul 2025 12:17:54 +0100
Subject: [PATCH 3/9] Update neurokit2/data/read_xdf.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 neurokit2/data/read_xdf.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py
index bf6df5005a..9ede5c4bb2 100644
--- a/neurokit2/data/read_xdf.py
+++ b/neurokit2/data/read_xdf.py
@@ -84,13 +84,12 @@ def read_xdf(
     if urllib.parse.urlparse(filename).scheme != "":
         try:
             req = requests.get(filename, stream=True, timeout=10)
-        except requests.exceptions.Timeout:
-            print("The request timed out!")
-        except requests.exceptions.RequestException as e:
-            print("An error occurred:", e)
+            req.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
 
-        req.raw.decode_content = True
-        filename = io.BytesIO(req.content)
+            req.raw.decode_content = True
+            filename = io.BytesIO(req.content)
+        except requests.exceptions.RequestException as e:
+            raise IOError(f"Failed to read XDF file from URL: {filename}") from e
 
     streams, header = pyxdf.load_xdf(filename)
 

From 034ffc74f3f263a71825726c11a2e27cbf1e7b57 Mon Sep 17 00:00:00 2001
From: LebombJames <sam.marine418@gmail.com>
Date: Sat, 5 Jul 2025 12:31:22 +0100
Subject: [PATCH 4/9] Improve tests results further

---
 neurokit2/data/read_xdf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py
index 9ede5c4bb2..5899f5ec32 100644
--- a/neurokit2/data/read_xdf.py
+++ b/neurokit2/data/read_xdf.py
@@ -73,11 +73,11 @@ def read_xdf(
     """
     try:
         import pyxdf
-    except ImportError:
+    except ImportError as e:
         raise ImportError(
             "The 'pyxdf' module is required for this function to run. ",
             "Please install it first (`pip install pyxdf`).",
-        )
+        ) from e
 
     # Load file
     # if filename is a URL, stream bytes from file

From c4b6e8ed0daae83e6158ed2a4d75712803840782 Mon Sep 17 00:00:00 2001
From: Sam Marine <77904738+LebombJames@users.noreply.github.com>
Date: Sat, 5 Jul 2025 12:33:13 +0100
Subject: [PATCH 5/9] Update neurokit2/data/read_xdf.py

This seems like copy-paste from `read-bitalino`

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 neurokit2/data/read_xdf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py
index 5899f5ec32..c29887bf41 100644
--- a/neurokit2/data/read_xdf.py
+++ b/neurokit2/data/read_xdf.py
@@ -52,8 +52,8 @@ def read_xdf(
     Returns
     ----------
     df : DataFrame
-        The device's BITalino file as a pandas dataframe. If multiple devices are read,
-        each device's BITalino file will be merged into one dataframe.
+        The XDF data as a pandas dataframe. If multiple streams are read,
+        they will be merged into one dataframe.
     info : dict
         The metadata information containing the sampling rate(s).
 

From 02d29d288914a0fd392806ac6381238307cb72f4 Mon Sep 17 00:00:00 2001
From: LebombJames <sam.marine418@gmail.com>
Date: Sat, 5 Jul 2025 16:05:53 +0100
Subject: [PATCH 6/9] Updated docstring with interpolation details

---
 neurokit2/data/read_xdf.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py
index c29887bf41..815b8118f4 100644
--- a/neurokit2/data/read_xdf.py
+++ b/neurokit2/data/read_xdf.py
@@ -27,7 +27,11 @@ def read_xdf(
 
     Note that, as XDF can store streams with different sampling rates and different time stamps,
     **the function will resample all streams to 2 times (default) the highest sampling rate** (to
-    minimize aliasing). The final sampling rate can be found in the ``info`` dictionary.
+    minimize aliasing) and then interpolate based on an evenly spaced index. While this is generally safe, it
+    may produce unexpected results, particularly if the original stream has large gaps in its time series.
+    For more discussion, see `here <https://github.com/xdf-modules/pyxdf/pull/1>`_.
+
+    The final upsampled sampling rate can be found in the ``info`` dictionary.
 
     .. note::
 
@@ -39,9 +43,10 @@ def read_xdf(
     filename :  str
         Path (with the extension) or URL pointing to an XDF file (e.g., ``"data.xdf"``).
     upsample : float
-        Factor by which to upsample the data. Default is 2, which means that the data will be
+        Factor by which to upsample the data. Default is 2.0, which means that the data will be
         resampled to 2 times the highest sampling rate. You can increase that to further reduce
-        edge-distortion, especially for high frequency signals like EEG.
+        edge-distortion, especially for high frequency signals like EEG. ``1.0`` disables upsampling
+        (but not interpolation).
     fillmissing : float
         The maximum duration in seconds of missing data to fill. ``None`` (default) will
         interpolate all missing values and prevent issues with NaNs. However, it might be important

From 56caaed53cd34f7696638b5d1811047ba982ef9f Mon Sep 17 00:00:00 2001
From: LebombJames <sam.marine418@gmail.com>
Date: Sat, 5 Jul 2025 17:21:28 +0100
Subject: [PATCH 7/9] Use a generator comprehension to further improve test
 score

---
 neurokit2/data/read_xdf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py
index 815b8118f4..8e849cc5a7 100644
--- a/neurokit2/data/read_xdf.py
+++ b/neurokit2/data/read_xdf.py
@@ -99,7 +99,7 @@ def read_xdf(
     streams, header = pyxdf.load_xdf(filename)
 
     # Get smaller time stamp to later use as offset (zero point)
-    min_ts = min([min(s["time_stamps"]) for s in streams])
+    min_ts = min(min(s["time_stamps"]) for s in streams)
 
     # Loop through all the streams and convert to dataframes
     dfs = []

From d1edf58ed481004fa892a205cb8db87332269269 Mon Sep 17 00:00:00 2001
From: DominiqueMakowski <dom.mak19@gmail.com>
Date: Mon, 7 Jul 2025 09:01:43 +0100
Subject: [PATCH 8/9] simplify return  type

---
 neurokit2/data/read_xdf.py | 40 +++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py
index 8e849cc5a7..a420a20759 100644
--- a/neurokit2/data/read_xdf.py
+++ b/neurokit2/data/read_xdf.py
@@ -9,17 +9,9 @@
 import requests
 
 
-class ReadXDFInfo(TypedDict):
-    sampling_rates_original: list[float]
-    sampling_rates_effective: list[float]
-    sampling_rate: int
-    datetime: str
-    data: list[pd.DataFrame]
-
-
 def read_xdf(
     filename: str, upsample: float = 2.0, fillmissing: float | None = None
-) -> tuple[pd.DataFrame, ReadXDFInfo]:
+) -> tuple[pd.DataFrame, dict]:
     """**Read and tidy an XDF file**
 
     Reads and tidies an XDF file with multiple streams into a Pandas DataFrame.
@@ -115,16 +107,22 @@ def read_xdf(
             if stream["info"]["type"][0] == "GYRO":
                 dat = dat.rename(columns={"X": "GYRO_X", "Y": "GYRO_Y", "Z": "GYRO_Z"})
                 # Compute movement
-                dat["GYRO"] = np.sqrt(dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2)
+                dat["GYRO"] = np.sqrt(
+                    dat["GYRO_X"] ** 2 + dat["GYRO_Y"] ** 2 + dat["GYRO_Z"] ** 2
+                )
 
             if stream["info"]["type"][0] == "ACC":
                 dat = dat.rename(columns={"X": "ACC_X", "Y": "ACC_Y", "Z": "ACC_Z"})
                 # Compute acceleration
-                dat["ACC"] = np.sqrt(dat["ACC_X"] ** 2 + dat["ACC_Y"] ** 2 + dat["ACC_Z"] ** 2)
+                dat["ACC"] = np.sqrt(
+                    dat["ACC_X"] ** 2 + dat["ACC_Y"] ** 2 + dat["ACC_Z"] ** 2
+                )
 
             # Muse - PPG data has three channels: ambient, infrared, red
             if stream["info"]["type"][0] == "PPG":
-                dat = dat.rename(columns={"PPG1": "LUX", "PPG2": "PPG", "PPG3": "RED", "IR": "PPG"})
+                dat = dat.rename(
+                    columns={"PPG1": "LUX", "PPG2": "PPG", "PPG3": "RED", "IR": "PPG"}
+                )
                 # Zeros suggest interruptions, better to replace with NaNs (I think?)
                 dat["PPG"] = dat["PPG"].replace(0, value=np.nan)
                 dat["LUX"] = dat["LUX"].replace(0, value=np.nan)
@@ -137,8 +135,12 @@ def read_xdf(
 
     # Store metadata
     info = {
-        "sampling_rates_original": [float(s["info"]["nominal_srate"][0]) for s in streams],
-        "sampling_rates_effective": [float(s["info"]["effective_srate"]) for s in streams],
+        "sampling_rates_original": [
+            float(s["info"]["nominal_srate"][0]) for s in streams
+        ],
+        "sampling_rates_effective": [
+            float(s["info"]["effective_srate"]) for s in streams
+        ],
         "datetime": header["info"]["datetime"][0],
         "data": dfs,
     }
@@ -159,8 +161,14 @@ def read_xdf(
         fillmissing = int(info["sampling_rate"] * fillmissing)
 
     # Create new index with evenly spaced timestamps
-    idx = pd.date_range(df.index.min(), df.index.max(), freq=str(1000 / info["sampling_rate"]) + "ms")
+    idx = pd.date_range(
+        df.index.min(), df.index.max(), freq=str(1000 / info["sampling_rate"]) + "ms"
+    )
     # https://stackoverflow.com/questions/47148446/pandas-resample-interpolate-is-producing-nans
-    df = df.reindex(df.index.union(idx)).interpolate(method="index", limit=fillmissing).reindex(idx)
+    df = (
+        df.reindex(df.index.union(idx))
+        .interpolate(method="index", limit=fillmissing)
+        .reindex(idx)
+    )
 
     return df, info

From fb6676fbc8a8c0a78df53434671ac043f7f8e7a1 Mon Sep 17 00:00:00 2001
From: DominiqueMakowski <dom.mak19@gmail.com>
Date: Mon, 7 Jul 2025 09:04:22 +0100
Subject: [PATCH 9/9] Update read_xdf.py

---
 neurokit2/data/read_xdf.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py
index a420a20759..a2deb40448 100644
--- a/neurokit2/data/read_xdf.py
+++ b/neurokit2/data/read_xdf.py
@@ -2,8 +2,6 @@
 import io
 import urllib
 
-from typing import TypedDict
-
 import numpy as np
 import pandas as pd
 import requests