Source code for wsknn.evaluate.metrics

from typing import Dict, List, Tuple

import numpy as np
from wsknn.model.wsknn import WSKNN
from wsknn.evaluate.scores.scores import mrr_func, precision_func, recall_func
from wsknn.utils.errors import TooShortSessionException


[docs]def score_model(sessions: List,
                trained_model: WSKNN,
                k=0,
                skip_short_sessions=True,
                calc_mrr: bool = True,
                calc_precision: bool = True,
                calc_recall: bool = True,
                sliding_window: bool = False) -> Dict:
    """
    Function get Precision@k, Recall@k and MRR@k.

    Parameters
    ----------
    sessions : List of sessions
        >>> [
        ...     [
        ...         [ sequence_of_items ],
        ...         [ sequence_of_timestamps ],
        ...         [ [OPTIONAL] sequence_of_event_type ]
        ...     ],
        ... ]

    trained_model : WSKNN
        The trained WSKNN model.

    k : int, default=0
        Number of top recommendations. Session must have n+1 items minimum to calculate **MRR**. Default is 0 and
        when it is set, then ``k`` is equal to the number of recommendations from a trained model. If ``k``
        is greater than the number of recommendations then the latter is adjusted to it.

    skip_short_sessions : bool, default=True
        Should the algorithm skip short sessions when calculating MRR or should it raise an error?

    calc_mrr : bool, default = True
        Should **MRR** be calculated?

    calc_precision : bool, default = True
        Should **precision** be calculated?

    calc_recall : bool, default = True
        Should **recall** be calculated?

    sliding_window : bool, default = False
        When calculating metrics slide through a single session up to the point when it is not possible to have the
        same number of evaluation products as the number of recommendations.

    Returns
    -------
    scores : Dict
        ``{'MRR': float, 'Recall': float, 'Precision': float}``
    """
    mrrs = list()
    precisions = list()
    recalls = list()

    k, trained_model = _set_number_of_recommendations(k, trained_model)

    for session in sessions:

        s_length = len(session[0])
        session_length_test = _should_skip_short_session(s_length, k, skip_short_sessions)

        if session_length_test:
            # Session is too short to make any valuable scoring
            pass
        else:
            eval_items, predictions = _prepare_metrics_data(session, trained_model, sliding_window)

            for i in range(len(eval_items)):
                recommendations = predictions[i]
                recommendations = [x[0] for x in recommendations]  # We are not interested in the weights
                evaluation_items = eval_items[i]

                # Get rank
                if calc_mrr:
                    partial_rank = mrr_func(recommendations, evaluation_items)
                    mrrs.append(partial_rank)

                # Get precisions
                if calc_precision:
                    partial_precision = precision_func(recommendations, evaluation_items)
                    precisions.append(partial_precision)

                # Get recalls
                if calc_recall:
                    partial_recall = recall_func(recommendations, evaluation_items)
                    recalls.append(partial_recall)

    mrr = float(np.mean(mrrs))
    prec = float(np.mean(precisions))
    rec = float(np.mean(recalls))

    scores = {
        'MRR': mrr,
        'Precision': prec,
        'Recall': rec
    }

    return scores


[docs]def get_mean_reciprocal_rank(sessions: List,
                             trained_model: WSKNN,
                             k=0,
                             skip_short_sessions=True,
                             sliding_window=False) -> float:
    """
    The function calculates the mean reciprocal rank of a top ``k`` recommendations. Given session must be longer
    than ``k`` events.

    Parameters
    ----------
    sessions : List
        >>> [
        ...     [
        ...         [ sequence_of_items ],
        ...         [ sequence_of_timestamps ],
        ...         [ [OPTIONAL] sequence_of_event_type ]
        ...     ],
        ... ]

    trained_model : WSKNN
        The trained WSKNN model.

    k : int, default=0
        Number of top recommendations. Session must have n+1 items minimum to calculate **MRR**. Default is 0 and
        when it is set, then ``k`` is equal to the number of recommendations from a trained model. If ``k``
        is greater than the number of recommendations then the latter is adjusted to it.

    skip_short_sessions : bool, default=True
        Should the algorithm skip short sessions when calculating **MRR** or should it raise an error?

    sliding_window : bool, default = False
        When calculating metrics slide through a single session up to the point when it is not possible to have the
        same number of evaluation products as the number of recommendations.

    Returns
    -------
    mrr : float
        Mean Reciprocal Rank: The average score of **MRR** per ``n`` sessions.
    """
    mrrs = list()

    k, trained_model = _set_number_of_recommendations(k, trained_model)

    for session in sessions:

        s_length = len(session[0])
        session_length_test = _should_skip_short_session(s_length, k, skip_short_sessions)
        if session_length_test:
            # Session is too short to make any valuable scoring
            pass
        else:
            eval_items, predictions = _prepare_metrics_data(session, trained_model, sliding_window)

            # Get rank
            for i in range(len(eval_items)):
                recommendations = predictions[i]
                recommendations = [x[0] for x in recommendations]  # We are not interested in the weights
                evaluation_items = eval_items[i]
                partial_rank = mrr_func(recommendations, evaluation_items)
                mrrs.append(partial_rank)

    mrr = np.mean(mrrs)
    return float(mrr)


[docs]def get_precision(sessions: List,
                  trained_model: WSKNN,
                  k=0,
                  skip_short_sessions=True,
                  sliding_window=False) -> float:
    """
    The function calculates the precision score of a top ``k`` recommendations. Given session must be longer than
    ``k`` events.

    Parameters
    ----------
    sessions : List
        >>> [
        ...     [
        ...         [ sequence_of_items ],
        ...         [ sequence_of_timestamps ],
        ...         [ [OPTIONAL] sequence_of_event_type ]
        ...     ],
        ... ]

    trained_model : WSKNN
        The trained WSKNN model.

    k : int, default=0
        Number of top recommendations. Session must have n+1 items minimum to calculate **precision**. Default is 0 and
        when it is set, then ``k`` is equal to the number of recommendations from a trained model. If ``k``
        is greater than the number of recommendations then the latter is adjusted to it.

    skip_short_sessions : bool, default=True
        Should the algorithm skip short sessions when calculating **precision** or should it raise an error?

    sliding_window : bool, default = False
        When calculating metrics slide through a single session up to the point when it is not possible to have the
        same number of evaluation products as the number of recommendations.

    Returns
    -------
    precision : float
        Precision: The average score of **precision** per ``n`` sessions.

    Notes
    -----
    Precision is defined as ``(no of recommendations that are relevant) / (number of items recommended)``.
    """

    precisions = list()

    k, trained_model = _set_number_of_recommendations(k, trained_model)

    for session in sessions:

        s_length = len(session[0])

        session_length_test = _should_skip_short_session(s_length, k, skip_short_sessions)

        if session_length_test:
            # Session is too short to make any valuable scoring
            pass
        else:
            eval_items, predictions = _prepare_metrics_data(session, trained_model, sliding_window)

            # Get rank
            for i in range(len(eval_items)):
                recommendations = predictions[i]
                recommendations = [x[0] for x in recommendations]  # We are not interested in the weights
                evaluation_items = eval_items[i]
                partial_precision = precision_func(recommendations, evaluation_items)
                precisions.append(partial_precision)

    precision = np.mean(precisions)
    return float(precision)


[docs]def get_recall(sessions: List,
               trained_model: WSKNN,
               k=0,
               skip_short_sessions=True,
               sliding_window=False) -> float:
    """
    The function calculates the **recall** score of a top ``k`` recommendations. Given session must be longer than
    ``k`` events.

    Parameters
    ----------
    sessions : List
        >>> [
        ...     [
        ...         [ sequence_of_items ],
        ...         [ sequence_of_timestamps ],
        ...         [ [OPTIONAL] sequence_of_event_type ]
        ...     ],
        ... ]

    trained_model : WSKNN
        The trained WSKNN model.

    k : int, default=0
        Number of top recommendations. Session must have n+1 items minimum to calculate **recall**. Default is 0 and
        when it is set, then ``k`` is equal to the number of recommendations from a trained model. If ``k``
        is greater than the number of recommendations then the latter is adjusted to it.

    skip_short_sessions : bool, default=True
        Should the algorithm skip short sessions when calculating **recall** or should it raise an error?

    sliding_window : bool, default = False
        When calculating metrics slide through a single session up to the point when it is not possible to have the
        same number of evaluation products as the number of recommendations.

    Returns
    -------
    recall : float
        The average score of Recall per ``n`` sessions.

    Notes
    -----
    Recall is defined as (no of recommendations that are relevant) / (all relevant items for a user).
    """
    recalls = list()

    k, trained_model = _set_number_of_recommendations(k, trained_model)

    for session in sessions:

        s_length = len(session[0])

        session_length_test = _should_skip_short_session(s_length, k, skip_short_sessions)

        if session_length_test:
            # Session is too short to make any valuable scoring
            pass
        else:
            eval_items, predictions = _prepare_metrics_data(session, trained_model, sliding_window)

            # Get rank
            for i in range(len(eval_items)):
                recommendations = predictions[i]
                recommendations = [x[0] for x in recommendations]  # We are not interested in the weights
                evaluation_items = eval_items[i]
                partial_recall = recall_func(recommendations, evaluation_items)

                recalls.append(partial_recall)

    recall = np.mean(recalls)
    return float(recall)


def _prepare_metrics_data(session, trained_model, sliding_window):
    """
    Function prepares metrics data.

    Parameters
    ----------
    session : Any
              Array or list with a session.

    trained_model : WSKNN
                    Model to make predictions.

    sliding_window : bool, default = False
                     When calculating metrics slide through a single session up to the point when it is not possible
                     to have the same number of evaluation products as the number of recommendations.

    Returns
    -------
    : Tuple[List, List]
        (relevant items, recommended items)
    """
    relevant_items, recommends = _get_test_eval_preds(session, trained_model, sliding_window)
    return relevant_items, recommends


def _get_test_eval_preds(session, trained_model: WSKNN, sliding_window: bool):
    """
    Function parses session into test session, evaluation items (relevant items), and recommendations.

    Parameters
    ----------
    session : Any
              Array or list with a session.

    trained_model : WSKNN
                    Model to make predictions.

    sliding_window : bool, default = False
                     When calculating metrics slide through a single session up to the point when it is not possible
                     to have the same number of evaluation products as the number of recommendations.

    Returns
    -------
    : Tuple[List, List]
        (relevant items, recommended items)
    """
    recommended_items_list = list()
    relevant_items_list = list()

    session_range = len(session[0])
    k = trained_model.n_of_recommendations

    if sliding_window:
        srange = range(session_range-k, session_range)
        for i in srange:
            test_session = [x[:i] for x in session]
            relevant_items = session[0][i:]
            recommended_items = trained_model.recommend(test_session)

            recommended_items_list.append(recommended_items)
            relevant_items_list.append(relevant_items)
    else:

        test_session = [x[:k] for x in session]
        recommended_items = trained_model.recommend(test_session)
        relevant_items = session[0][k:]

        recommended_items_list.append(recommended_items)
        relevant_items_list.append(relevant_items)

    return relevant_items_list, recommended_items_list


def _set_number_of_recommendations(k: int, wsknn_model: WSKNN) -> Tuple[int, WSKNN]:
    """
    Function checks if k parameter is different than no_of_neighbors and sets it to

    Parameters
    ----------
    k : int
        Number of recommendations.

    wsknn_model : WSKNN
                  Trained wsknn model.

    Returns
    -------
    : Tuple[int, WSKNN]
        Number of recommendations, Trained model with selected number of recommendations
    """

    # First, check k
    if k == 0:
        k = wsknn_model.n_of_recommendations

    if k != wsknn_model.n_of_recommendations:
        wsknn_model.n_of_recommendations = k

    return k, wsknn_model


def _should_skip_short_session(s_length: int, k: int, skip_short: bool) -> bool:
    """
    Function checks if session length is smaller than number of recommendations -> in this case it is not possible
    to clip session into a prediction and evaluation parts.

    Parameters
    ----------
    s_length : int
               The length of a session.

    k : int
        The number of neighbors.

    skip_short : bool
                 If True then nothing happens even if session is short. Else error is raised.

    Returns
    -------
    : bool
        True, if session is too short and should be skipped.

    Raises
    ------
    TooShortSessionException : Raised if session length is < k and if skip_short parameter is set to False.
    """
    if s_length <= k:
        if skip_short:
            return True
        else:
            raise TooShortSessionException(s_length, k)
    else:
        return False