Source code for pyhealth.metrics.fairness_utils.group

import numpy as np

"""
Notation:
    - Protected group: P
    - Unprotected group: U
"""

[docs]def disparate_impact(sensitive_attributes: np.ndarray, y_pred: np.ndarray, favorable_outcome: int = 1, allow_zero_division = False, epsilon: float = 1e-8) -> float:
    """
    Computes the disparate impact between the the protected and unprotected group.

    disparate_impact = P(y_pred = favorable_outcome | P) / P(y_pred = favorable_outcome | U)
    
    Args:
        sensitive_attributes: Sensitive attributes of shape (n_samples,) where 1 is the protected group and 0 is the unprotected group.
        y_pred: Predicted target values of shape (n_samples,).
        favorable_outcome: Label value which is considered favorable (i.e. "positive").
        allow_zero_division: If True, use epsilon instead of 0 in the denominator if the denominator is 0. Otherwise, raise a ValueError.
    
    Returns:
        The disparate impact between the protected and unprotected group.
    """
    
    p_fav_unpr = np.sum(y_pred[sensitive_attributes == 0] == favorable_outcome) / len(y_pred[sensitive_attributes == 0])
    p_fav_prot = np.sum(y_pred[sensitive_attributes == 1] == favorable_outcome) / len(y_pred[sensitive_attributes == 1])

    if p_fav_unpr == 0:
        if allow_zero_division:
            p_fav_unpr = epsilon
        else:
            raise ValueError("Unprotected group has no instances with a favorable outcome. Disparate impact is undefined.")

    disparate_impact_value = p_fav_prot / p_fav_unpr

    return disparate_impact_value

[docs]def statistical_parity_difference(sensitive_attributes: np.ndarray, y_pred: np.ndarray, favorable_outcome: int = 1) -> float:
    """
    Computes the statistical parity difference between the the protected and unprotected group.

    statistical_parity_difference = P(y_pred = favorable_outcome | P) - P(y_pred = favorable_outcome | U)
    Args:
        sensitive_attributes: Sensitive attributes of shape (n_samples,) where 1 is the protected group and 0 is the unprotected group.
        y_pred: Predicted target values of shape (n_samples,).
        favorable_outcome: Label value which is considered favorable (i.e. "positive").
    Returns:
        The statistical parity difference between the protected and unprotected group.
    """

    p_fav_unpr = np.sum(y_pred[sensitive_attributes == 0] == favorable_outcome) / len(y_pred[sensitive_attributes == 0])
    p_fav_prot = np.sum(y_pred[sensitive_attributes == 1] == favorable_outcome) / len(y_pred[sensitive_attributes == 1])
    
    statistical_parity_difference_value = p_fav_prot - p_fav_unpr

    return statistical_parity_difference_value