Source code for pyhealth.metrics.fairness_utils.group
import numpy as np
"""
Notation:
- Protected group: P
- Unprotected group: U
"""
[docs]def disparate_impact(sensitive_attributes: np.ndarray, y_pred: np.ndarray, favorable_outcome: int = 1, allow_zero_division = False, epsilon: float = 1e-8) -> float:
"""
Computes the disparate impact between the the protected and unprotected group.
disparate_impact = P(y_pred = favorable_outcome | P) / P(y_pred = favorable_outcome | U)
Args:
sensitive_attributes: Sensitive attributes of shape (n_samples,) where 1 is the protected group and 0 is the unprotected group.
y_pred: Predicted target values of shape (n_samples,).
favorable_outcome: Label value which is considered favorable (i.e. "positive").
allow_zero_division: If True, use epsilon instead of 0 in the denominator if the denominator is 0. Otherwise, raise a ValueError.
Returns:
The disparate impact between the protected and unprotected group.
"""
p_fav_unpr = np.sum(y_pred[sensitive_attributes == 0] == favorable_outcome) / len(y_pred[sensitive_attributes == 0])
p_fav_prot = np.sum(y_pred[sensitive_attributes == 1] == favorable_outcome) / len(y_pred[sensitive_attributes == 1])
if p_fav_unpr == 0:
if allow_zero_division:
p_fav_unpr = epsilon
else:
raise ValueError("Unprotected group has no instances with a favorable outcome. Disparate impact is undefined.")
disparate_impact_value = p_fav_prot / p_fav_unpr
return disparate_impact_value
[docs]def statistical_parity_difference(sensitive_attributes: np.ndarray, y_pred: np.ndarray, favorable_outcome: int = 1) -> float:
"""
Computes the statistical parity difference between the the protected and unprotected group.
statistical_parity_difference = P(y_pred = favorable_outcome | P) - P(y_pred = favorable_outcome | U)
Args:
sensitive_attributes: Sensitive attributes of shape (n_samples,) where 1 is the protected group and 0 is the unprotected group.
y_pred: Predicted target values of shape (n_samples,).
favorable_outcome: Label value which is considered favorable (i.e. "positive").
Returns:
The statistical parity difference between the protected and unprotected group.
"""
p_fav_unpr = np.sum(y_pred[sensitive_attributes == 0] == favorable_outcome) / len(y_pred[sensitive_attributes == 0])
p_fav_prot = np.sum(y_pred[sensitive_attributes == 1] == favorable_outcome) / len(y_pred[sensitive_attributes == 1])
statistical_parity_difference_value = p_fav_prot - p_fav_unpr
return statistical_parity_difference_value