"""Mean Average Cosine Similarity (MAC) implementation."""
from typing import Any, Callable, Dict, List, Union
import numpy as np
from scipy.spatial import distance
from wefe.metrics.base_metric import BaseMetric
from wefe.preprocessing import get_embeddings_from_query
from wefe.query import Query
from wefe.word_embedding_model import WordEmbeddingModel
[docs]class MAC(BaseMetric):
"""Mean Average Cosine Similarity (MAC).
The general steps of the test are as follows [1].
1. Embed all target and attribute words.
2. For each target set:
* For each word embedding in the target set:
* For each attribute set:
* Calculate the cosine similarity of the target embedding and
each attribute embedding of the set.
* Calculate the mean of the cosines similarities and save it in a array.
3. Average all the mean cosine similarities and return the calculated score.
The closer the value is to 1, the less biased the query will be.
References
----------
| [1]: Thomas Manzini, Lim Yao Chong,Alan W Black, and Yulia Tsvetkov.
Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass
Bias in Word Embeddings.
In Proceedings of the 2019 Conference of the North American Chapter of the
Association for Computational Linguistics:
Human Language Technologies, Volume 1 (Long and Short Papers), pages 615–621,
Minneapolis, Minnesota, June 2019. Association for Computational Linguistics.
| [2]: https://github.com/TManzini/DebiasMulticlassWordEmbedding/blob/master/Debiasing/evalBias.py
"""
metric_template = ("n", "n")
metric_name = "Mean Average Cosine Similarity"
metric_short_name = "MAC"
def _calc_s(self, t: np.ndarray, A_j: np.ndarray) -> np.number:
"""Calculate the mean cos similarity of a target embedding and a attribute set.
Parameters
----------
t : np.ndarray
A target embedding
A_j : np.ndarray
An attribute embedding set.
Returns
-------
float
The mean cosine similarity between the target embedding and the attribute
set calculated.
"""
return np.mean([distance.cosine(t, a_i) for a_i in A_j])
def _calc_mac(self, T, A):
# dict that will store the s scores by target word and attribute set.
targets_eval = {}
# list that will store the s scores
targets_eval_scores = []
# T_i -> Current target set
# t_i -> Current target embedding
# A_j -> Current attribute set
# for each target set
for T_i_name, T_i_vecs in T.items():
targets_eval[T_i_name] = {}
# for each embedding in the current target set
for t_i_word, t_i_vec in T_i_vecs.items():
targets_eval[T_i_name][t_i_word] = {}
# for each attribute set
for A_j_name, A_j_vecs in A.items():
# calculate s score
score = self._calc_s(t_i_vec, A_j_vecs.values())
# add the score to the variables that will store it
targets_eval[T_i_name][t_i_word][A_j_name] = score
targets_eval_scores.append(score)
# obtain mac by calculating the mean over the s scores in targets_eval_scores.
mac_score = np.mean(np.array(targets_eval_scores))
return mac_score, targets_eval
[docs] def run_query(
self,
query: Query,
model: WordEmbeddingModel,
lost_vocabulary_threshold: float = 0.2,
preprocessors: List[Dict[str, Union[str, bool, Callable]]] = [{}],
strategy: str = "first",
normalize: bool = False,
warn_not_found_words: bool = False,
*args: Any,
**kwargs: Any
) -> Dict[str, Any]:
"""Calculate the MAC metric over the provided parameters.
Parameters
----------
query : Query
A Query object that contains the target and attribute word sets
for be tested.
model : WordEmbeddingModel
A word embedding model.
preprocessors : List[Dict[str, Union[str, bool, Callable]]]
A list with preprocessor options.
A ``preprocessor`` is a dictionary that specifies what processing(s) are
performed on each word before it is looked up in the model vocabulary.
For example, the ``preprocessor``
``{'lowecase': True, 'strip_accents': True}`` allows you to lowercase
and remove the accent from each word before searching for them in the
model vocabulary. Note that an empty dictionary ``{}`` indicates that no
preprocessing is done.
The possible options for a preprocessor are:
* ``lowercase``: ``bool``. Indicates that the words are transformed to
lowercase.
* ``uppercase``: ``bool``. Indicates that the words are transformed to
uppercase.
* ``titlecase``: ``bool``. Indicates that the words are transformed to
titlecase.
* ``strip_accents``: ``bool``, ``{'ascii', 'unicode'}``: Specifies that
the accents of the words are eliminated. The stripping type can be
specified. True uses ‘unicode’ by default.
* ``preprocessor``: ``Callable``. It receives a function that operates
on each word. In the case of specifying a function, it overrides the
default preprocessor (i.e., the previous options stop working).
A list of preprocessor options allows you to search for several
variants of the words into the model. For example, the preprocessors
``[{}, {"lowercase": True, "strip_accents": True}]``
``{}`` allows first to search for the original words in the vocabulary of
the model. In case some of them are not found,
``{"lowercase": True, "strip_accents": True}`` is executed on these words
and then they are searched in the model vocabulary.
strategy : str, optional
The strategy indicates how it will use the preprocessed words: 'first' will
include only the first transformed word found. all' will include all
transformed words found, by default "first".
normalize : bool, optional
True indicates that embeddings will be normalized, by default False
warn_not_found_words : bool, optional
Specifies if the function will warn (in the logger)
the words that were not found in the model's vocabulary, by default False.
Returns
-------
Dict[str, Any]
A dictionary with the query name, the resulting score of the metric,
and a dictionary with the distances of each attribute word
with respect to the target sets means.
Examples
--------
>>> from wefe.metrics import MAC
>>> from wefe.query import Query
>>> from wefe.utils import load_test_model
>>>
>>> query = Query(
... target_sets=[
... ["female", "woman", "girl", "sister", "she", "her", "hers",
... "daughter"],
... ["male", "man", "boy", "brother", "he", "him", "his", "son"],
... ],
... attribute_sets=[
... ["home", "parents", "children", "family", "cousins", "marriage",
... "wedding", "relatives",
... ],
... ["executive", "management", "professional", "corporation", "salary",
... "office", "business", "career",
... ],
... ],
... target_sets_names=["Female terms", "Male Terms"],
... attribute_sets_names=["Family", "Careers"],
... )
>>>
>>> # load the model (in this case, the test model included in wefe)
>>> model = load_test_model()
>>>
>>> # instance the metric and run the query
>>> MAC().run_query(query, model) # doctest: +SKIP
{'query_name': 'Female terms and Male Terms wrt Family and Careers',
'result': 0.8416415235615204,
'mac': 0.8416415235615204,
'targets_eval': {'Female terms': {'female': {'Family': 0.9185737599618733,
'Careers': 0.916069650076679},
'woman': {'Family': 0.752434104681015, 'Careers': 0.9377805145923048},
'girl': {'Family': 0.707457959651947, 'Careers': 0.9867974997032434},
'sister': {'Family': 0.5973392464220524, 'Careers': 0.9482253392925486},
'she': {'Family': 0.7872791914269328, 'Careers': 0.9161583095556125},
'her': {'Family': 0.7883057091385126, 'Careers': 0.9237247597193345},
'hers': {'Family': 0.7385367527604103, 'Careers': 0.9480051446007565},
'daughter': {'Family': 0.5472579970955849, 'Careers': 0.9277344475267455}},
'Male Terms': {'male': {'Family': 0.8735092766582966,
'Careers': 0.9468009045813233},
'man': {'Family': 0.8249392118304968, 'Careers': 0.9350165261421353},
'boy': {'Family': 0.7106057899072766, 'Careers': 0.9879048476286698},
'brother': {'Family': 0.6280269809067249, 'Careers': 0.9477180293761194},
'he': {'Family': 0.8693044614046812, 'Careers': 0.8771287016716087},
'him': {'Family': 0.8230192996561527, 'Careers': 0.888683641096577},
'his': {'Family': 0.8876195731572807, 'Careers': 0.8920885202242061},
'son': {'Family': 0.5764635019004345, 'Careers': 0.9220191016211174}}}}
"""
# check the types of the provided arguments (only the defaults).
self._check_input(query, model, locals())
# transform query word sets into embeddings
embeddings = get_embeddings_from_query(
model=model,
query=query,
lost_vocabulary_threshold=lost_vocabulary_threshold,
preprocessors=preprocessors,
strategy=strategy,
normalize=normalize,
warn_not_found_words=warn_not_found_words,
)
# if there is any/some set has less words than the allowed limit,
# return the default value (nan)
if embeddings is None:
return {
"query_name": query.query_name,
"result": np.nan,
"mac": np.nan,
"targets_eval": None,
}
# get the targets and attribute sets transformed into embeddings.
target_sets, attribute_sets = embeddings
mac, targets_eval = self._calc_mac(target_sets, attribute_sets)
return {
"query_name": query.query_name,
"result": mac,
"mac": mac,
"targets_eval": targets_eval,
}