Source code for wsknn.fit_transform

from typing import Dict, Union

from wsknn.preprocessing.structure.item import Items
from wsknn.preprocessing.structure.session import Sessions
from wsknn.model.wsknn import WSKNN


[docs]def fit(sessions: Union[Dict, Sessions], items: Union[Dict, Items] = None, number_of_recommendations: int = 5, number_of_neighbors: int = 10, sampling_strategy: str = 'common_items', sample_size: int = 1000, weighting_func: str = 'linear', ranking_strategy: str = 'linear', return_events_from_session: bool = True, required_sampling_event: Union[int, str] = None, required_sampling_event_index: int = None, sampling_str_event_weights_index: int = None, recommend_any: bool = False): """ Sets input session-items and item-sessions maps. Parameters ---------- sessions : Dict or Sessions >>> sessions = { ... session_id: ( ... [ sequence_of_items ], ... [ sequence_of_timestamps ], ... [ [OPTIONAL] sequence_of_event_types ], ... [ [OPTIONAL] sequence_of_event_weights] ... ) ...} items : Dict or Items, optional If not provided then item-sessions map is created from the `sessions` parameter. >>> items = { ... item_id: ( ... [ sequence_of_sessions ], ... [ sequence_of_the_first_session_timestamps ] ... ) ...} number_of_recommendations : int, default=5 The number of recommended items. number_of_neighbors : int, default=10 The number of the closest sessions to choose the items from. sampling_strategy : str, default='common_items' How to filter the initial sample of sessions. Available strategies are: - 'common_items': sample sessions with the same items as the input session, - 'recent': sample the most actual sessions, - 'random': get a random sample of sessions, - 'weighted_events': select sessions based on the specific weights assigned to events. sample_size : int, default=1000 How many sessions from the model are sampled to make a recommendation. weighting_func : str, default='linear' The similarity measurement between sessions. Available options: 'linear', 'log' and 'quadratic'. ranking_strategy : str, default='linear' How we calculate an item rank (based on its position in a session sequence). Available options are: 'inv', 'linear', 'log', 'quadratic'. return_events_from_session : bool, default = True Should algorithm return the same events as in session if this is only neighbor? required_sampling_event : int or str, default = None Set this paramater to the event name if sessions with it must be included in the neighbors selection. For example, this event may be a "purchase". required_sampling_event_index : int, default = None If the `required_sampling_event` parameter is filled then you must pass an index of a row with event names. sampling_str_event_weights_index : int, default = None If `sampling_strategy` is set to `weighted_events` then you must pass an index of a row with event weights. recommend_any : bool, default = False If recommender returns less than number of recommendations items then return random items. Returns ------- wsknn : WSKNN The trained Weighted session-based K-nn model. Examples -------- >>> input_sessions = { ... 'session_x': ( ... ['a', 'b', 'c'], ... [10001, 10002, 10004], ... ['view', 'click', 'click'] ... ) ... } >>> input_items = { ... 'a': ( ... ['session_x'], ... [10001] ... ), ... 'b': ( ... ['session_x'], ... [10001] ... ), ... 'c': ( ... ['session_x'], ... [10001] ... ), ... } >>> fitted_model = fit(input_sessions, input_items) """ if isinstance(sessions, Sessions): sessions = sessions.session_items_actions_map if items is not None: if isinstance(items, Items): items = items.item_sessions_map # Set model parameters wsknn = WSKNN(number_of_recommendations=number_of_recommendations, number_of_neighbors=number_of_neighbors, sampling_strategy=sampling_strategy, sample_size=sample_size, weighting_func=weighting_func, ranking_strategy=ranking_strategy, return_events_from_session=return_events_from_session, required_sampling_event=required_sampling_event, required_sampling_event_index=required_sampling_event_index, sampling_event_weights_index=sampling_str_event_weights_index, recommend_any=recommend_any) # Fit sessions and items wsknn.fit(sessions, items) return wsknn