Source code for wefe.metrics.MAC

"""Mean Average Cosine Similarity (MAC) implementation."""
from typing import Any, Callable, Dict, List, Union

import numpy as np
from scipy.spatial import distance
from wefe.metrics.base_metric import BaseMetric
from wefe.preprocessing import get_embeddings_from_query
from wefe.query import Query
from wefe.word_embedding_model import WordEmbeddingModel


[docs]class MAC(BaseMetric):
    """Mean Average Cosine Similarity (MAC).

    The general steps of the test are as follows [1].

    1. Embed all target and attribute words.
    2. For each target set:

        * For each word embedding in the target set:

            * For each attribute set:

                * Calculate the cosine similarity of the target embedding and
                each attribute embedding of the set.

                * Calculate the mean of the cosines similarities and save it in a array.

    3. Average all the mean cosine similarities and return the calculated score.


    The closer the value is to 1, the less biased the query will be.

    References
    ----------
    | [1]: Thomas Manzini, Lim Yao Chong,Alan W Black, and Yulia Tsvetkov.
      Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass
      Bias in Word Embeddings.
      In Proceedings of the 2019 Conference of the North American Chapter of the
      Association for Computational Linguistics:
      Human Language Technologies, Volume 1 (Long and Short Papers), pages 615–621,
      Minneapolis, Minnesota, June 2019. Association for Computational Linguistics.
    | [2]: https://github.com/TManzini/DebiasMulticlassWordEmbedding/blob/master/Debiasing/evalBias.py
    """

    metric_template = ("n", "n")
    metric_name = "Mean Average Cosine Similarity"
    metric_short_name = "MAC"

    def _calc_s(self, t: np.ndarray, A_j: np.ndarray) -> np.number:
        """Calculate the mean cos similarity of a target embedding and a attribute set.

        Parameters
        ----------
        t : np.ndarray
            A target embedding
        A_j : np.ndarray
            An attribute embedding set.

        Returns
        -------
        float
            The mean cosine similarity between the target embedding and the attribute
            set calculated.
        """
        return np.mean([distance.cosine(t, a_i) for a_i in A_j])

    def _calc_mac(self, T, A):

        # dict that will store the s scores by target word and attribute set.
        targets_eval = {}
        # list that will store the s scores
        targets_eval_scores = []

        # T_i -> Current target set
        # t_i -> Current target embedding
        # A_j -> Current attribute set

        # for each target set
        for T_i_name, T_i_vecs in T.items():
            targets_eval[T_i_name] = {}
            # for each embedding in the current target set
            for t_i_word, t_i_vec in T_i_vecs.items():
                targets_eval[T_i_name][t_i_word] = {}
                # for each attribute set
                for A_j_name, A_j_vecs in A.items():
                    # calculate s score
                    score = self._calc_s(t_i_vec, A_j_vecs.values())
                    # add the score to the variables that will store it
                    targets_eval[T_i_name][t_i_word][A_j_name] = score
                    targets_eval_scores.append(score)

        # obtain mac by calculating the mean over the s scores in targets_eval_scores.
        mac_score = np.mean(np.array(targets_eval_scores))

        return mac_score, targets_eval

[docs]    def run_query(
        self,
        query: Query,
        model: WordEmbeddingModel,
        lost_vocabulary_threshold: float = 0.2,
        preprocessors: List[Dict[str, Union[str, bool, Callable]]] = [{}],
        strategy: str = "first",
        normalize: bool = False,
        warn_not_found_words: bool = False,
        *args: Any,
        **kwargs: Any
    ) -> Dict[str, Any]:
        """Calculate the MAC metric over the provided parameters.

        Parameters
        ----------
        query : Query
            A Query object that contains the target and attribute word sets
            for be tested.

        model : WordEmbeddingModel
            A word embedding model.

        preprocessors : List[Dict[str, Union[str, bool, Callable]]]
            A list with preprocessor options.

            A ``preprocessor`` is a dictionary that specifies what processing(s) are
            performed on each word before it is looked up in the model vocabulary.
            For example, the ``preprocessor``
            ``{'lowecase': True, 'strip_accents': True}`` allows you to lowercase
            and remove the accent from each word before searching for them in the
            model vocabulary. Note that an empty dictionary ``{}`` indicates that no
            preprocessing is done.

            The possible options for a preprocessor are:

            *   ``lowercase``: ``bool``. Indicates that the words are transformed to
                lowercase.
            *   ``uppercase``: ``bool``. Indicates that the words are transformed to
                uppercase.
            *   ``titlecase``: ``bool``. Indicates that the words are transformed to
                titlecase.
            *   ``strip_accents``: ``bool``, ``{'ascii', 'unicode'}``: Specifies that
                the accents of the words are eliminated. The stripping type can be
                specified. True uses ‘unicode’ by default.
            *   ``preprocessor``: ``Callable``. It receives a function that operates
                on each word. In the case of specifying a function, it overrides the
                default preprocessor (i.e., the previous options stop working).

            A list of preprocessor options allows you to search for several
            variants of the words into the model. For example, the preprocessors
            ``[{}, {"lowercase": True, "strip_accents": True}]``
            ``{}`` allows first to search for the original words in the vocabulary of
            the model. In case some of them are not found,
            ``{"lowercase": True, "strip_accents": True}`` is executed on these words
            and then they are searched in the model vocabulary.

        strategy : str, optional
            The strategy indicates how it will use the preprocessed words: 'first' will
            include only the first transformed word found. all' will include all
            transformed words found, by default "first".

        normalize : bool, optional
            True indicates that embeddings will be normalized, by default False

        warn_not_found_words : bool, optional
            Specifies if the function will warn (in the logger)
            the words that were not found in the model's vocabulary, by default False.

        Returns
        -------
        Dict[str, Any]
            A dictionary with the query name, the resulting score of the metric,
            and a dictionary with the distances of each attribute word
            with respect to the target sets means.

        Examples
        --------
        >>> from wefe.metrics import MAC
        >>> from wefe.query import Query
        >>> from wefe.utils import load_test_model
        >>>
        >>> query = Query(
        ...     target_sets=[
        ...         ["female", "woman", "girl", "sister", "she", "her", "hers",
        ...          "daughter"],
        ...         ["male", "man", "boy", "brother", "he", "him", "his", "son"],
        ...     ],
        ...     attribute_sets=[
        ...         ["home", "parents", "children", "family", "cousins", "marriage",
        ...          "wedding", "relatives",
        ...         ],
        ...         ["executive", "management", "professional", "corporation", "salary",
        ...          "office", "business", "career",
        ...         ],
        ...     ],
        ...     target_sets_names=["Female terms", "Male Terms"],
        ...     attribute_sets_names=["Family", "Careers"],
        ... )
        >>>
        >>> # load the model (in this case, the test model included in wefe)
        >>> model = load_test_model()
        >>>
        >>> # instance the metric and run the query
        >>> MAC().run_query(query, model) # doctest: +SKIP
        {'query_name': 'Female terms and Male Terms wrt Family and Careers',
        'result': 0.8416415235615204,
        'mac': 0.8416415235615204,
        'targets_eval': {'Female terms': {'female': {'Family': 0.9185737599618733,
            'Careers': 0.916069650076679},
            'woman': {'Family': 0.752434104681015, 'Careers': 0.9377805145923048},
            'girl': {'Family': 0.707457959651947, 'Careers': 0.9867974997032434},
            'sister': {'Family': 0.5973392464220524, 'Careers': 0.9482253392925486},
            'she': {'Family': 0.7872791914269328, 'Careers': 0.9161583095556125},
            'her': {'Family': 0.7883057091385126, 'Careers': 0.9237247597193345},
            'hers': {'Family': 0.7385367527604103, 'Careers': 0.9480051446007565},
            'daughter': {'Family': 0.5472579970955849, 'Careers': 0.9277344475267455}},
        'Male Terms': {'male': {'Family': 0.8735092766582966,
            'Careers': 0.9468009045813233},
            'man': {'Family': 0.8249392118304968, 'Careers': 0.9350165261421353},
            'boy': {'Family': 0.7106057899072766, 'Careers': 0.9879048476286698},
            'brother': {'Family': 0.6280269809067249, 'Careers': 0.9477180293761194},
            'he': {'Family': 0.8693044614046812, 'Careers': 0.8771287016716087},
            'him': {'Family': 0.8230192996561527, 'Careers': 0.888683641096577},
            'his': {'Family': 0.8876195731572807, 'Careers': 0.8920885202242061},
            'son': {'Family': 0.5764635019004345, 'Careers': 0.9220191016211174}}}}
        """
        # check the types of the provided arguments (only the defaults).
        self._check_input(query, model, locals())

        # transform query word sets into embeddings
        embeddings = get_embeddings_from_query(
            model=model,
            query=query,
            lost_vocabulary_threshold=lost_vocabulary_threshold,
            preprocessors=preprocessors,
            strategy=strategy,
            normalize=normalize,
            warn_not_found_words=warn_not_found_words,
        )

        # if there is any/some set has less words than the allowed limit,
        # return the default value (nan)
        if embeddings is None:
            return {
                "query_name": query.query_name,
                "result": np.nan,
                "mac": np.nan,
                "targets_eval": None,
            }

        # get the targets and attribute sets transformed into embeddings.
        target_sets, attribute_sets = embeddings

        mac, targets_eval = self._calc_mac(target_sets, attribute_sets)

        return {
            "query_name": query.query_name,
            "result": mac,
            "mac": mac,
            "targets_eval": targets_eval,
        }