Source code for pyhealth.medcode.cross_map

import logging
import os
from collections import defaultdict
from typing import List, Optional, Dict
from urllib.error import HTTPError

import pyhealth.medcode as medcode
from pyhealth.medcode.utils import MODULE_CACHE_PATH, download_and_read_csv
from pyhealth.utils import load_pickle, save_pickle

logger = logging.getLogger(__name__)


[docs]class CrossMap: """Contains mapping between two medical code systems. `CrossMap` is a base class for all possible mappings. It will be initialized with two specific medical code systems with `CrossMap.load(source_vocabulary, target_vocabulary)`. """ def __init__( self, source_vocabulary: str, target_vocabulary: str, refresh_cache: bool = False, ): self.s_vocab = source_vocabulary self.t_vocab = target_vocabulary # load mapping pickle_filename = f"{self.s_vocab}_to_{self.t_vocab}.pkl" pickle_filepath = os.path.join(MODULE_CACHE_PATH, pickle_filename) if os.path.exists(pickle_filepath) and (not refresh_cache): logger.debug( f"Loaded {self.s_vocab}->{self.t_vocab} mapping " f"from {pickle_filepath}" ) self.mapping = load_pickle(pickle_filepath) else: logger.debug(f"Processing {self.s_vocab}->{self.t_vocab} mapping...") try: local_filename = f"{self.s_vocab}_to_{self.t_vocab}.csv" df = download_and_read_csv(local_filename, refresh_cache) except HTTPError: local_filename = f"{self.t_vocab}_to_{self.s_vocab}.csv" df = download_and_read_csv(local_filename, refresh_cache) self.mapping = defaultdict(list) for _, row in df.iterrows(): self.mapping[row[self.s_vocab]].append(row[self.t_vocab]) logger.debug( f"Saved {self.s_vocab}->{self.t_vocab} mapping " f"to {pickle_filepath}" ) save_pickle(self.mapping, pickle_filepath) # load source and target vocabulary classes self.s_class = getattr(medcode, source_vocabulary)() self.t_class = getattr(medcode, target_vocabulary)() return def __repr__(self): return f"CrossMap(source_vocabulary={self.s_vocab}, source_class={self.s_class} target_vocabulary={self.t_vocab}, target_class={self.t_class})"
[docs] @classmethod def load( cls, source_vocabulary: str, target_vocabulary: str, refresh_cache: bool = False, ): """Initializes the mapping between two medical code systems. Args: source_vocabulary: source medical code system. target_vocabulary: target medical code system. refresh_cache: whether to refresh the cache. Default is False. Examples: >>> from pyhealth.medcode import CrossMap >>> mapping = CrossMap("ICD9CM", "CCSCM") >>> mapping.map("428.0") ['108'] >>> mapping = CrossMap.load("NDC", "ATC") >>> mapping.map("00527051210", target_kwargs={"level": 3}) ['A11C'] """ return cls(source_vocabulary, target_vocabulary, refresh_cache)
[docs] def map( self, source_code: str, source_kwargs: Optional[Dict] = None, target_kwargs: Optional[Dict] = None, ) -> List[str]: """Maps a source code to a list of target codes. Args: source_code: source code. **source_kwargs: additional arguments for the source code. Will be passed to `self.s_class.convert()`. Default is empty dict. **target_kwargs: additional arguments for the target code. Will be passed to `self.t_class.convert()`. Default is empty dict. Returns: A list of target codes. """ if source_kwargs is None: source_kwargs = {} if target_kwargs is None: target_kwargs = {} source_code = self.s_class.standardize(source_code) source_code = self.s_class.convert(source_code, **source_kwargs) target_codes = self.mapping[source_code] target_codes = [self.t_class.convert(c, **target_kwargs) for c in target_codes] return target_codes