diff --git a/neurokit2/data/read_xdf.py b/neurokit2/data/read_xdf.py index b8c7ded2ca..a2deb40448 100644 --- a/neurokit2/data/read_xdf.py +++ b/neurokit2/data/read_xdf.py @@ -1,9 +1,15 @@ # -*- coding: utf-8 -*- +import io +import urllib + import numpy as np import pandas as pd +import requests -def read_xdf(filename, upsample=2, fillmissing=None): +def read_xdf( + filename: str, upsample: float = 2.0, fillmissing: float | None = None +) -> tuple[pd.DataFrame, dict]: """**Read and tidy an XDF file** Reads and tidies an XDF file with multiple streams into a Pandas DataFrame. @@ -11,7 +17,11 @@ def read_xdf(filename, upsample=2, fillmissing=None): Note that, as XDF can store streams with different sampling rates and different time stamps, **the function will resample all streams to 2 times (default) the highest sampling rate** (to - minimize aliasing). The final sampling rate can be found in the ``info`` dictionary. + minimize aliasing) and then interpolate based on an evenly spaced index. While this is generally safe, it + may produce unexpected results, particularly if the original stream has large gaps in its time series. + For more discussion, see `here `_. + + The final upsampled sampling rate can be found in the ``info`` dictionary. .. note:: @@ -21,11 +31,12 @@ def read_xdf(filename, upsample=2, fillmissing=None): Parameters ---------- filename : str - Path (with the extension) of an XDF file (e.g., ``"data.xdf"``). + Path (with the extension) or URL pointing to an XDF file (e.g., ``"data.xdf"``). upsample : float - Factor by which to upsample the data. Default is 2, which means that the data will be + Factor by which to upsample the data. Default is 2.0, which means that the data will be resampled to 2 times the highest sampling rate. You can increase that to further reduce - edge-distortion, especially for high frequency signals like EEG. + edge-distortion, especially for high frequency signals like EEG. ``1.0`` disables upsampling + (but not interpolation). fillmissing : float The maximum duration in seconds of missing data to fill. ``None`` (default) will interpolate all missing values and prevent issues with NaNs. However, it might be important @@ -35,9 +46,9 @@ def read_xdf(filename, upsample=2, fillmissing=None): Returns ---------- - df : DataFrame, dict - The BITalino file as a pandas dataframe if one device was read, or a dictionary - of pandas dataframes (one dataframe per device) if multiple devices are read. + df : DataFrame + The XDF data as a pandas dataframe. If multiple streams are read, + they will be merged into one dataframe. info : dict The metadata information containing the sampling rate(s). @@ -53,21 +64,32 @@ def read_xdf(filename, upsample=2, fillmissing=None): # data, info = nk.read_xdf("data.xdf") # sampling_rate = info["sampling_rate"] + """ try: import pyxdf - except ImportError: + except ImportError as e: raise ImportError( "The 'pyxdf' module is required for this function to run. ", "Please install it first (`pip install pyxdf`).", - ) + ) from e # Load file - # TODO: would be nice to be able to stream a file from URL + # if filename is a URL, stream bytes from file + if urllib.parse.urlparse(filename).scheme != "": + try: + req = requests.get(filename, stream=True, timeout=10) + req.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) + + req.raw.decode_content = True + filename = io.BytesIO(req.content) + except requests.exceptions.RequestException as e: + raise IOError(f"Failed to read XDF file from URL: {filename}") from e + streams, header = pyxdf.load_xdf(filename) # Get smaller time stamp to later use as offset (zero point) - min_ts = min([min(s["time_stamps"]) for s in streams]) + min_ts = min(min(s["time_stamps"]) for s in streams) # Loop through all the streams and convert to dataframes dfs = []