Source code for pyhealth.datasets.eicu

import logging
from pathlib import Path
from typing import List, Optional

from .base_dataset import BaseDataset

logger = logging.getLogger(__name__)


[docs]class eICUDataset(BaseDataset): """ A dataset class for handling eICU data. The eICU dataset is a large dataset of de-identified health records of ICU patients. The dataset is available at https://eicu-crd.mit.edu/. The basic information is stored in the following tables: - patient: defines a patient (uniquepid), a hospital admission (patienthealthsystemstayid), and an ICU stay (patientunitstayid) in the database. - hospital: contains information about a hospital (e.g., region). Note that in eICU, a patient can have multiple hospital admissions and each hospital admission can have multiple ICU stays. The data in eICU is centered around the ICU stay and all timestamps are relative to the ICU admission time. We further support the following tables: - diagnosis: contains ICD diagnoses (ICD9CM and ICD10CM code) and diagnosis information for patients - treatment: contains treatment information for patients. - medication: contains medication related order entries for patients. - lab: contains laboratory measurements for patients - physicalexam: contains all physical exams conducted for patients. - admissiondx: table contains the primary diagnosis for admission to the ICU per the APACHE scoring criteria. Attributes: root (str): The root directory where the dataset is stored. tables (List[str]): A list of tables to be included in the dataset. dataset_name (Optional[str]): The name of the dataset. config_path (Optional[str]): The path to the configuration file. Examples: >>> from pyhealth.datasets import eICUDataset >>> dataset = eICUDataset( ... root="/path/to/eicu-crd/2.0", ... tables=["diagnosis", "medication", "treatment"], ... ) >>> dataset.stats() >>> patient = dataset.get_patient("patient_id") """ def __init__( self, root: str, tables: List[str], dataset_name: Optional[str] = None, config_path: Optional[str] = None, **kwargs ) -> None: """ Initializes the eICUDataset with the specified parameters. Args: root (str): The root directory where the dataset is stored. tables (List[str]): A list of additional tables to include. dataset_name (Optional[str]): The name of the dataset. Defaults to "eicu". config_path (Optional[str]): The path to the configuration file. If not provided, a default config is used. """ if config_path is None: logger.info("No config path provided, using default config") config_path = Path(__file__).parent / "configs" / "eicu.yaml" # Default table is patient which contains basic patient/stay info default_tables = ["patient"] tables = default_tables + tables super().__init__( root=root, tables=tables, dataset_name=dataset_name or "eicu", config_path=config_path, **kwargs ) return