Source code for wefe.debias.half_sibling_regression

"""Half Sibling Regression WEFE implementation."""

from copy import deepcopy
from typing import Dict, List, Optional

import numpy as np
from tqdm import tqdm

from wefe.debias.base_debias import BaseDebias
from wefe.preprocessing import get_embeddings_from_tuples
from wefe.utils import check_is_fitted
from wefe.word_embedding_model import WordEmbeddingModel


[docs]class HalfSiblingRegression(BaseDebias): r"""Half Sibling Debias method. This method proposes to learn spurious gender information via causal inference by utilizing the statistical dependency between gender-biased word vectors and gender definition word vectors. The learned spurious gender information is then subtracted from the gender-biased word vectors to achieve gender-debiasing as the following where :math:`V_n` are the debiased word vectors, Vn are non gender definition and :math:`G` is the approximated gender information: .. math:: V_n' := V_n - G G is obtained by predicting Non gender definition word vectors (:math:`V_n`) using the gender-definition word vectors (:math:`V_d`): .. math:: G := E[V_n|V_d] The Prediction is done by a Ridge Regression following the next steps: 1. Compute the weight matrix of a Ridge Regression using two sets of words .. math:: W = ((V_d)^T V_d + \alpha I)^{-1} (V_d)^TV_n 2. Compute the gender information: .. math:: G = V_d W 3. Subtract gender information from non gender definition words: .. math:: V_n' = V_n - G This method is binary because it only allows 2 classes of the same bias criterion, such as male or female. For a multiclass debias (such as for Latinos, Asians and Whites), it is recommended to visit MulticlassHardDebias class. .. warning:: This method requires three times the memory of the model when a copy of the model is made and two times the memory of the model if not. Make sure this much memory is available. Examples -------- The following example shows how to execute Half Sibling Regression Debias method that reduces bias in a word embedding model: >>> from wefe.debias.half_sibling_regression import HalfSiblingRegression >>> from wefe.utils import load_test_model >>> from wefe.datasets import fetch_debiaswe >>> >>> # load the model (in this case, the test model included in wefe) >>> model = load_test_model() >>> # load gender specific words, in this case the ones included in wefe >>> debiaswe_wordsets = fetch_debiaswe() >>> gender_specific = debiaswe_wordsets["gender_specific"] >>> >>> # instance and fit the method >>> hsr = HalfSiblingRegression().fit( ... model=model, definitional_words=gender_specific ... ) >>> # execute the debias on the words not included in the gender definition set >>> debiased_model = hsr.transform(model = model) Copy argument is True. Transform will attempt to create a copy of the original model. This may fail due to lack of memory. Model copy created successfully. >>> >>> >>> # if you want the debias over a specific set of words you can >>> #include them in the target parameter >>> debiased_model = hsr.transform( ... model=model, target=["doctor", "nurse", "programmer"] ... ) Copy argument is True. Transform will attempt to create a copy of the original model. This may fail due to lack of memory. Model copy created successfully. >>> >>> # if you want to exclude a set of words from the debias process >>> # you can include them in the ignore parameter >>> debiased_model = hsr.transform( ... model=model, ignore=["dress", "beard", "niece", "nephew"] ... ) Copy argument is True. Transform will attempt to create a copy of the original model. This may fail due to lack of memory. Model copy created successfully. References ---------- | [1]: Yang, Zekun y Juan Feng: A causal inference method for reducing | gender bias in word embedding relations. | In Proceedings of the AAAI Conference on Artificial Intelligence, volumen 34, pages 9434–9441, 2020 | [2]: https://github.com/KunkunYang/GenderBiasHSR | [3]: Bernhard Sch ̈olkopf, David W. Hogg, Dun Wang, | Daniel Foreman-Mackey, Dominik Jan-zing, Carl-Johann Simon-Gabriel, and Jonas Peters. | Modeling confounding by half-sibling regression. | Proceedings of the National Academy of Sciences, 113(27):7391–7398, 2016 """ name = "Half Sibling Regression" short_name = "HSR"
[docs] def __init__( self, verbose: bool = False, criterion_name: Optional[str] = None, ) -> None: """Initialize a Half Sibling Regression Debias instance. Parameters ---------- verbose : bool, optional True will print informative messages about the debiasing process, by default False. criterion_name : Optional[str], optional The name of the criterion for which the debias is being executed, e.g., 'Gender'. This will indicate the name of the model returning transform, by default None """ # check verbose if not isinstance(verbose, bool): raise TypeError(f"verbose should be a bool, got {verbose}.") self.verbose = verbose if criterion_name is None or isinstance(criterion_name, str): self.criterion_name_ = criterion_name else: raise ValueError(f"criterion_name should be str, got: {criterion_name}")
def _get_bias_vectors( self, model: WordEmbeddingModel, bias_definitional_words: List[str] ) -> np.ndarray: vectors = [model[word] for word in bias_definitional_words if word in model] return np.asarray(vectors) def _get_non_bias_dict( self, model: WordEmbeddingModel, non_bias: List[str] ) -> Dict[str, np.ndarray]: dictionary = get_embeddings_from_tuples( model=model, sets=[non_bias], sets_name="non_bias", normalize=False ) return dictionary[0] def _compute_weigth_matrix( self, bias_vectors: np.ndarray, non_bias_vectors: np.ndarray, alpha: float ) -> np.ndarray: a = bias_vectors.T @ bias_vectors + alpha * np.eye(bias_vectors.shape[1]) b = bias_vectors.T @ non_bias_vectors weight_matrix = np.linalg.inv(a) @ b return weight_matrix def _compute_bias_information( self, bias_vectors: np.ndarray, weight_matrix: np.ndarray ) -> np.ndarray: bias_information = bias_vectors @ weight_matrix return bias_information def _subtract_bias_information( self, non_bias_vectors: np.ndarray, bias_information: np.ndarray ) -> np.ndarray: debiased_vectors = non_bias_vectors - bias_information return debiased_vectors def _get_indexes( self, model: WordEmbeddingModel, target: List[str], non_bias: List[str], ) -> List[int]: return [non_bias.index(word) for word in target if word in model]
[docs] def fit( self, model: WordEmbeddingModel, definitional_words: List[str], alpha: float = 60, ) -> BaseDebias: """Compute the weight matrix and the bias information. Parameters ---------- model: WordEmbeddingModel The word embedding model to debias. definitional_words: List[str] List of strings. This list contains words that embody bias information by definition. alpha: float Ridge Regression constant. By default 60. Returns ------- BaseDebias The debias method fitted. """ self.bias_definitional_words = definitional_words self.non_bias = list( set(model.vocab.keys()) - set(self.bias_definitional_words) ) self.alpha = alpha bias_definitional_words_vectors = self._get_bias_vectors( model, self.bias_definitional_words ).T self.non_bias_dict = self._get_non_bias_dict(model, self.non_bias) # ------------------------------------------------------------------------------ # Compute the weight matrix . if self.verbose: print("Computing the weight matrix.") weigth_matrix = self._compute_weigth_matrix( bias_definitional_words_vectors, np.asarray(list(self.non_bias_dict.values())).T, alpha=self.alpha, ) # ------------------------------------------------------------------------------: # Compute the approximated bias information if self.verbose: print("Computing bias information") self.bias_information = self._compute_bias_information( bias_definitional_words_vectors, weigth_matrix ) return self
[docs] def transform( self, model: WordEmbeddingModel, target: Optional[List[str]] = None, ignore: Optional[List[str]] = None, copy: bool = True, ) -> WordEmbeddingModel: """Substracts the gender information from vectors. Parameters ---------- model : WordEmbeddingModel The word embedding model to mitigate. target : Optional[List[str]], optional If a set of words is specified in target, the debias method will be performed only on the word embeddings of this set. If `None` is provided, the debias will be performed on all non gender specific words (except those specified in ignore). Target words must not be included in the gender specific set. by default `None`. ignore : Optional[List[str]], optional If target is `None` and a set of words is specified in ignore, the debias method will perform the debias in all non gender specific words except those specified in this set, by default `[]`. copy : bool, optional If `True`, the debias will be performed on a copy of the model. If `False`, the debias will be applied on the same model delivered, causing its vectors to mutate. **WARNING:** Setting copy with `True` requires RAM at least 2x of the size of the model, otherwise the execution of the debias may raise to `MemoryError`, by default True. Returns ------- WordEmbeddingModel The debiased embedding model. """ # check if the following attributes exist in the object. check_is_fitted( self, ["bias_definitional_words", "non_bias", "alpha", "non_bias_dict"], ) if self.verbose: print(f"Executing Half Sibling Debias on {model.name}") # ------------------------------------------------------------------- # Copy if copy: print( "Copy argument is True. Transform will attempt to create a copy " "of the original model. This may fail due to lack of memory." ) model = deepcopy(model) print("Model copy created successfully.") else: print( "copy argument is False. The execution of this method will mutate " "the original model." ) # ------------------------------------------------------------------- # Substract bias information from vectors: if self.verbose: print("Subtracting bias information.") # if target or ignore are specified the debias is applied only in the # columns corresponding to those words embeddings if target or ignore: if target: target = target elif ignore: target = list(set(self.non_bias_dict.keys()) - set(ignore)) indexes = self._get_indexes(model, target, list(self.non_bias_dict.keys())) bias_info = self.bias_information[:, indexes] vectors = np.asarray(list(self.non_bias_dict.values())).T[:, indexes] debiased_vectors = self._subtract_bias_information(vectors, bias_info).T self.non_bias_dict = dict(zip(target, debiased_vectors)) # if target and ignores are not provided the debias is applied to # all non bias vectors else: vectors = np.asarray(list(self.non_bias_dict.values())).T debiased_vectors = self._subtract_bias_information( vectors, self.bias_information ).T self.non_bias_dict = dict(zip(self.non_bias_dict.keys(), debiased_vectors)) if self.verbose: print("Updating debiased vectors") # ------------------------------------------------------------------- # update the model with new vectors for word in tqdm(self.non_bias_dict.keys()): model.update(word, self.non_bias_dict[word].astype(model.wv.vectors.dtype)) # ------------------------------------------------------------------- # # Generate the new KeyedVectors if self.criterion_name_ is None: new_model_name = f"{model.name}_debiased" else: new_model_name = f"{model.name}_{self.criterion_name_}_debiased" model.name = new_model_name if self.verbose: print("Done!") return model