import logging
import os
from pathlib import Path
from typing import Optional, Union
import pandas as pd
from pyhealth.datasets import BaseDataset
logger = logging.getLogger(__name__)
[docs]class DREAMTDataset(BaseDataset):
"""
Base Dataset for Real-time sleep stage EstimAtion using Multisensor wearable Technology (DREAMT)
Dataset accepts current versions of DREAMT (1.0.0, 1.0.1, 2.0.0, 2.1.0), available at:
https://physionet.org/content/dreamt/
DREAMT includes wrist-based wearable and polysomnography (PSG) sleep data from 100 participants
recruited from the Duke University Health System (DUHS) Sleep Disorder Lab. This includes
wearable signals, PSG signals, sleep labels, and clinical data related to sleep health and disorders.
Citations:
---------
When using this dataset, please cite:
Wang, K., Yang, J., Shetty, A., & Dunn, J. (2025). DREAMT: Dataset for Real-time sleep stage EstimAtion
using Multisensor wearable Technology (version 2.1.0). PhysioNet. RRID:SCR_007345.
https://doi.org/10.13026/7r9r-7r24
Will Ke Wang, Jiamu Yang, Leeor Hershkovich, Hayoung Jeong, Bill Chen, Karnika Singh, Ali R Roghanizad,
Md Mobashir Hasan Shandhi, Andrew R Spector, Jessilyn Dunn. (2024). Proceedings of the fifth
Conference on Health, Inference, and Learning, PMLR 248:380-396.
Goldberger, A., Amaral, L., Glass, L., Hausdorff, J., Ivanov, P. C., Mark, R., ... & Stanley, H. E. (2000).
PhysioBank, PhysioToolkit, and PhysioNet: Components of a new research resource for complex
physiologic signals. Circulation [Online]. 101 (23), pp. e215–e220. RRID:SCR_007345.
Note:
---------
Dataset follows file and folder structure of dataset version, looks for participant_info.csv and data folders,
so root path should be version downloaded, example: root = ".../dreamt/1.0.0/" or ".../dreamt/2.0.0/"
Args:
root: root directory containing the dataset files
dataset_name: optional name of dataset, defaults to "dreamt_sleep"
config_path: optional configuration file, defaults to "dreamt.yaml"
Attributes:
root: root directory containing the dataset files
dataset_name: name of dataset
config_path: path to configuration file
Examples:
>>> from pyhealth.datasets import DREAMTDataset
>>> dataset = DREAMTDataset(root = "/path/to/dreamt/data/version")
>>> dataset.stats()
>>>
>>> # Get all patient ids
>>> unique_patients = dataset.unique_patient_ids
>>> print(f"There are {len(unique_patients)} patients")
>>>
>>> # Get single patient data
>>> patient = dataset.get_patient("S002")
>>> print(f"Patient has {len(patient.data_source)} event")
>>>
>>> # Get event
>>> event = patient.get_events(event_type="dreamt_sleep")
>>>
>>> # Get Apnea-Hypopnea Index (AHI)
>>> ahi = event[0].ahi
>>> print(f"AHI is {ahi}")
>>>
>>> # Get 64Hz sleep file path
>>> file_path = event[0].file_64hz
>>> print(f"64Hz sleep file path: {file_path}")
"""
def __init__(
self,
root: str,
dataset_name: Optional[str] = None,
config_path: Optional[str] = None,
) -> None:
if config_path is None:
logger.info("No config provided, using default config")
config_path = Path(__file__).parent / "configs" / "dreamt.yaml"
metadata_file = Path(root) / "dreamt-metadata.csv"
if not os.path.exists(metadata_file):
logger.info(f"{metadata_file} does not exist")
self.prepare_metadata(root)
default_tables = ["dreamt_sleep"]
super().__init__(
root=root,
tables=default_tables,
dataset_name=dataset_name or "dreamt_sleep",
config_path=config_path
)
[docs] def get_patient_file(self, patient_id: str, root: str, file_path: str) -> Union[str | None]:
"""
Returns file path of 64Hz and 100Hz data for a patient, or None if no file found
Args:
patient_id: patient identifier
root: root directory containing the dataset files
file_path: path to location of 64Hz or 100Hz file
Returns:
file: path to file location or None if no file found
"""
if file_path == "data_64Hz" or file_path == "data":
file = Path(root) / f"{file_path}" / f"{patient_id}_whole_df.csv"
if file_path == "data_100Hz":
file = Path(root) / f"{file_path}" / f"{patient_id}_PSG_df.csv"
if not os.path.exists(str(file)):
logger.info(f"{file} not found")
file = None
return file