from typing import Dict, Union
from wsknn.preprocessing.structure.item import Items
from wsknn.preprocessing.structure.session import Sessions
from wsknn.model.wsknn import WSKNN
[docs]def fit(sessions: Union[Dict, Sessions],
items: Union[Dict, Items] = None,
number_of_recommendations: int = 5,
number_of_neighbors: int = 10,
sampling_strategy: str = 'common_items',
sample_size: int = 1000,
weighting_func: str = 'linear',
ranking_strategy: str = 'linear',
return_events_from_session: bool = True,
required_sampling_event: Union[int, str] = None,
required_sampling_event_index: int = None,
sampling_str_event_weights_index: int = None,
recommend_any: bool = False):
"""
Sets input session-items and item-sessions maps.
Parameters
----------
sessions : Dict or Sessions
>>> sessions = {
... session_id: (
... [ sequence_of_items ],
... [ sequence_of_timestamps ],
... [ [OPTIONAL] sequence_of_event_types ],
... [ [OPTIONAL] sequence_of_event_weights]
... )
...}
items : Dict or Items, optional
If not provided then item-sessions map is created from the `sessions` parameter.
>>> items = {
... item_id: (
... [ sequence_of_sessions ],
... [ sequence_of_the_first_session_timestamps ]
... )
...}
number_of_recommendations : int, default=5
The number of recommended items.
number_of_neighbors : int, default=10
The number of the closest sessions to choose the items from.
sampling_strategy : str, default='common_items'
How to filter the initial sample of sessions. Available strategies are:
- 'common_items': sample sessions with the same items as the input session,
- 'recent': sample the most actual sessions,
- 'random': get a random sample of sessions,
- 'weighted_events': select sessions based on the specific weights assigned to events.
sample_size : int, default=1000
How many sessions from the model are sampled to make a recommendation.
weighting_func : str, default='linear'
The similarity measurement between sessions. Available options: 'linear', 'log' and 'quadratic'.
ranking_strategy : str, default='linear'
How we calculate an item rank (based on its position in a session sequence). Available options
are: 'inv', 'linear', 'log', 'quadratic'.
return_events_from_session : bool, default = True
Should algorithm return the same events as in session if this is only neighbor?
required_sampling_event : int or str, default = None
Set this paramater to the event name if sessions with it must be included in the neighbors selection.
For example, this event may be a "purchase".
required_sampling_event_index : int, default = None
If the `required_sampling_event` parameter is filled then you must pass an index of a row with event names.
sampling_str_event_weights_index : int, default = None
If `sampling_strategy` is set to `weighted_events` then you must pass an index of a row with event weights.
recommend_any : bool, default = False
If recommender returns less than number of recommendations items then return random items.
Returns
-------
wsknn : WSKNN
The trained Weighted session-based K-nn model.
Examples
--------
>>> input_sessions = {
... 'session_x': (
... ['a', 'b', 'c'],
... [10001, 10002, 10004],
... ['view', 'click', 'click']
... )
... }
>>> input_items = {
... 'a': (
... ['session_x'],
... [10001]
... ),
... 'b': (
... ['session_x'],
... [10001]
... ),
... 'c': (
... ['session_x'],
... [10001]
... ),
... }
>>> fitted_model = fit(input_sessions, input_items)
"""
if isinstance(sessions, Sessions):
sessions = sessions.session_items_actions_map
if items is not None:
if isinstance(items, Items):
items = items.item_sessions_map
# Set model parameters
wsknn = WSKNN(number_of_recommendations=number_of_recommendations,
number_of_neighbors=number_of_neighbors,
sampling_strategy=sampling_strategy,
sample_size=sample_size,
weighting_func=weighting_func,
ranking_strategy=ranking_strategy,
return_events_from_session=return_events_from_session,
required_sampling_event=required_sampling_event,
required_sampling_event_index=required_sampling_event_index,
sampling_event_weights_index=sampling_str_event_weights_index,
recommend_any=recommend_any)
# Fit sessions and items
wsknn.fit(sessions, items)
return wsknn