Source code for pyhealth.tasks.length_of_stay_prediction

from pyhealth.data import Patient


[docs]def categorize_los(days: int): """Categorizes length of stay into 10 categories. One for ICU stays shorter than a day, seven day-long categories for each day of the first week, one for stays of over one week but less than two, and one for stays of over two weeks. Args: days: int, length of stay in days Returns: category: int, category of length of stay """ # ICU stays shorter than a day if days < 1: return 0 # each day of the first week elif 1 <= days <= 7: return days # stays of over one week but less than two elif 7 < days <= 14: return 8 # stays of over two weeks else: return 9
[docs]def length_of_stay_prediction_mimic3_fn(patient: Patient): """Processes a single patient for the length-of-stay prediction task. Length of stay prediction aims at predicting the length of stay (in days) of the current hospital visit based on the clinical information from the visit (e.g., conditions and procedures). Args: patient: a Patient object. Returns: samples: a list of samples, each sample is a dict with patient_id, visit_id, and other task-specific attributes as key. Note that we define the task as a multi-class classification task. Examples: >>> from pyhealth.datasets import MIMIC3Dataset >>> mimic3_base = MIMIC3Dataset( ... root="/srv/local/data/physionet.org/files/mimiciii/1.4", ... tables=["DIAGNOSES_ICD", "PROCEDURES_ICD", "PRESCRIPTIONS"], ... code_mapping={"ICD9CM": "CCSCM"}, ... ) >>> from pyhealth.tasks import length_of_stay_prediction_mimic3_fn >>> mimic3_sample = mimic3_base.set_task(length_of_stay_prediction_mimic3_fn) >>> mimic3_sample.samples[0] [{'visit_id': '130744', 'patient_id': '103', 'conditions': [['42', '109', '19', '122', '98', '663', '58', '51']], 'procedures': [['1']], 'label': 4}] """ samples = [] for visit in patient: conditions = visit.get_code_list(table="DIAGNOSES_ICD") procedures = visit.get_code_list(table="PROCEDURES_ICD") drugs = visit.get_code_list(table="PRESCRIPTIONS") # exclude: visits without condition, procedure, or drug code if len(conditions) * len(procedures) * len(drugs) == 0: continue los_days = (visit.discharge_time - visit.encounter_time).days los_category = categorize_los(los_days) # TODO: should also exclude visit with age < 18 samples.append( { "visit_id": visit.visit_id, "patient_id": patient.patient_id, "conditions": [conditions], "procedures": [procedures], "drugs": [drugs], "label": los_category, } ) # no cohort selection return samples
[docs]def length_of_stay_prediction_mimic4_fn(patient: Patient): """Processes a single patient for the length-of-stay prediction task. Length of stay prediction aims at predicting the length of stay (in days) of the current hospital visit based on the clinical information from the visit (e.g., conditions and procedures). Args: patient: a Patient object. Returns: samples: a list of samples, each sample is a dict with patient_id, visit_id, and other task-specific attributes as key. Note that we define the task as a multi-class classification task. Examples: >>> from pyhealth.datasets import MIMIC4Dataset >>> mimic4_base = MIMIC4Dataset( ... root="/srv/local/data/physionet.org/files/mimiciv/2.0/hosp", ... tables=["diagnoses_icd", "procedures_icd"], ... code_mapping={"ICD10PROC": "CCSPROC"}, ... ) >>> from pyhealth.tasks import length_of_stay_prediction_mimic4_fn >>> mimic4_sample = mimic4_base.set_task(length_of_stay_prediction_mimic4_fn) >>> mimic4_sample.samples[0] [{'visit_id': '130744', 'patient_id': '103', 'conditions': [['42', '109', '19', '122', '98', '663', '58', '51']], 'procedures': [['1']], 'label': 2}] """ samples = [] for visit in patient: conditions = visit.get_code_list(table="diagnoses_icd") procedures = visit.get_code_list(table="procedures_icd") drugs = visit.get_code_list(table="prescriptions") # exclude: visits without condition, procedure, or drug code if len(conditions) * len(procedures) * len(drugs) == 0: continue los_days = (visit.discharge_time - visit.encounter_time).days los_category = categorize_los(los_days) # TODO: should also exclude visit with age < 18 samples.append( { "visit_id": visit.visit_id, "patient_id": patient.patient_id, "conditions": [conditions], "procedures": [procedures], "drugs": [drugs], "label": los_category, } ) # no cohort selection return samples
[docs]def length_of_stay_prediction_eicu_fn(patient: Patient): """Processes a single patient for the length-of-stay prediction task. Length of stay prediction aims at predicting the length of stay (in days) of the current hospital visit based on the clinical information from the visit (e.g., conditions and procedures). Args: patient: a Patient object. Returns: samples: a list of samples, each sample is a dict with patient_id, visit_id, and other task-specific attributes as key. Note that we define the task as a multi-class classification task. Examples: >>> from pyhealth.datasets import eICUDataset >>> eicu_base = eICUDataset( ... root="/srv/local/data/physionet.org/files/eicu-crd/2.0", ... tables=["diagnosis", "medication"], ... code_mapping={}, ... dev=True ... ) >>> from pyhealth.tasks import length_of_stay_prediction_eicu_fn >>> eicu_sample = eicu_base.set_task(length_of_stay_prediction_eicu_fn) >>> eicu_sample.samples[0] [{'visit_id': '130744', 'patient_id': '103', 'conditions': [['42', '109', '98', '663', '58', '51']], 'procedures': [['1']], 'label': 5}] """ samples = [] for visit in patient: conditions = visit.get_code_list(table="diagnosis") procedures = visit.get_code_list(table="physicalExam") drugs = visit.get_code_list(table="medication") # exclude: visits without condition, procedure, or drug code if len(conditions) * len(procedures) * len(drugs) == 0: continue los_days = (visit.discharge_time - visit.encounter_time).days los_category = categorize_los(los_days) # TODO: should also exclude visit with age < 18 samples.append( { "visit_id": visit.visit_id, "patient_id": patient.patient_id, "conditions": [conditions], "procedures": [procedures], "drugs": [drugs], "label": los_category, } ) # no cohort selection return samples
[docs]def length_of_stay_prediction_omop_fn(patient: Patient): """Processes a single patient for the length-of-stay prediction task. Length of stay prediction aims at predicting the length of stay (in days) of the current hospital visit based on the clinical information from the visit (e.g., conditions and procedures). Args: patient: a Patient object. Returns: samples: a list of samples, each sample is a dict with patient_id, visit_id, and other task-specific attributes as key. Note that we define the task as a multi-class classification task. Examples: >>> from pyhealth.datasets import OMOPDataset >>> omop_base = OMOPDataset( ... root="https://storage.googleapis.com/pyhealth/synpuf1k_omop_cdm_5.2.2", ... tables=["condition_occurrence", "procedure_occurrence"], ... code_mapping={}, ... ) >>> from pyhealth.tasks import length_of_stay_prediction_omop_fn >>> omop_sample = omop_base.set_task(length_of_stay_prediction_eicu_fn) >>> omop_sample.samples[0] [{'visit_id': '130744', 'patient_id': '103', 'conditions': [['42', '109', '98', '663', '58', '51']], 'procedures': [['1']], 'label': 7}] """ samples = [] for visit in patient: conditions = visit.get_code_list(table="condition_occurrence") procedures = visit.get_code_list(table="procedure_occurrence") drugs = visit.get_code_list(table="drug_exposure") # exclude: visits without condition, procedure, or drug code if len(conditions) * len(procedures) * len(drugs) == 0: continue los_days = (visit.discharge_time - visit.encounter_time).days los_category = categorize_los(los_days) # TODO: should also exclude visit with age < 18 samples.append( { "visit_id": visit.visit_id, "patient_id": patient.patient_id, "conditions": [conditions], "procedures": [procedures], "drugs": [drugs], "label": los_category, } ) # no cohort selection return samples
if __name__ == "__main__": from pyhealth.datasets import MIMIC3Dataset base_dataset = MIMIC3Dataset( root="/srv/local/data/physionet.org/files/mimiciii/1.4", tables=["DIAGNOSES_ICD", "PROCEDURES_ICD", "PRESCRIPTIONS"], dev=True, code_mapping={"ICD9CM": "CCSCM", "NDC": "ATC"}, refresh_cache=False, ) sample_dataset = base_dataset.set_task(task_fn=length_of_stay_prediction_mimic3_fn) sample_dataset.stat() print(sample_dataset.available_keys) from pyhealth.datasets import MIMIC4Dataset base_dataset = MIMIC4Dataset( root="/srv/local/data/physionet.org/files/mimiciv/2.0/hosp", tables=["diagnoses_icd", "procedures_icd", "prescriptions"], dev=True, code_mapping={"NDC": "ATC"}, refresh_cache=False, ) sample_dataset = base_dataset.set_task(task_fn=length_of_stay_prediction_mimic4_fn) sample_dataset.stat() print(sample_dataset.available_keys) from pyhealth.datasets import eICUDataset base_dataset = eICUDataset( root="/srv/local/data/physionet.org/files/eicu-crd/2.0", tables=["diagnosis", "medication", "physicalExam"], dev=True, refresh_cache=False, ) sample_dataset = base_dataset.set_task(task_fn=length_of_stay_prediction_eicu_fn) sample_dataset.stat() print(sample_dataset.available_keys) from pyhealth.datasets import OMOPDataset base_dataset = OMOPDataset( root="/srv/local/data/zw12/pyhealth/raw_data/synpuf1k_omop_cdm_5.2.2", tables=["condition_occurrence", "procedure_occurrence", "drug_exposure"], dev=True, refresh_cache=False, ) sample_dataset = base_dataset.set_task(task_fn=length_of_stay_prediction_omop_fn) sample_dataset.stat() print(sample_dataset.available_keys)