Source code for ynlu.sdk.evaluation.intent_topk_accuracy_score
from typing import List, Dict
[docs]def single__intent_topk_accuracy_score(
intent_prediction: List[Dict[str, str]],
y_true: List[str],
k: int = 1,
) -> float:
"""Compute the Accuracy of a single utterance with multi-intents
Accuracy of a single utterance is defined as the proportion of
correctly predicted labels to the total number (predicted and true)
of labels. It can be formulated as
.. math::
\\text{Accuracy of single utterance}=\\frac{|\\text{pred}_i \\cap \\text{true}_i|}{|\\text{true}_i \\cap \\text{pred}_i|}
Args:
intent_prediction (a list of dictionaries):
A sorted intent prediction (by score) of a single utterance.
y_true (a list of strings):
The corresponding true intent of that utterance.
Note that it can be more than one intents.
k (an integer):
The top k prediction of intents we take for computing accuracy.
Returns:
accuracy score (a float):
accuracy of a single utterance given top k prediction.
Examples:
>>> intent_prediction, _ = model.predict("I like apple.")
>>> print(intent_prediction)
[
{"intent": "blabla", "score": 0.7},
{"intent": "ohoh", "score": 0.2},
{"intent": "preference", "score": 0.1},
]
>>> accuracy = single__intent_topk_accuracy_score(
intent_prediction=intent_prediction,
y_true=["preference", "ohoh", "YY"],
k=2,
)
>>> print(accuracy)
0.2499999
""" # noqa
top_k_pred = [pred["intent"] for pred in intent_prediction[: k]]
accuracy_score = (
len(set(y_true) & set(top_k_pred)) /
len(set(y_true) | set(top_k_pred))
)
return accuracy_score
[docs]def intent_topk_accuracy_score(
intent_predictions: List[List[Dict[str, str]]],
y_trues: List[List[str]],
k: int=1,
) -> float:
"""Compute the Accuracy of all utterances with multi-intents
Please take a look at function **single__intent_topk_accuracy_score** first.
This function is JUST a batch version of that. It would send all data to
that function, then collect and average the output.
.. math::
\\text{Accuracy of all utterances}=\\frac{1}{n}\\sum_{i=1}^{n}\\frac{|\\text{pred}_i \\cap \\text{true}_i|}{|\\text{true}_i|}
""" # noqa
if len(intent_predictions) != len(y_trues):
raise ValueError(
"Intent predictions and labels must have same amount!!!",
)
accuracy_scores = []
for y_pred, y_true in zip(intent_predictions, y_trues):
accuracy_scores.append(
single__intent_topk_accuracy_score(
intent_prediction=y_pred,
y_true=y_true,
k=k,
),
)
return sum(accuracy_scores) / len(intent_predictions)