Source code for wsknn.utils.transform

import gzip
import json
import pickle


def _df_to_mapping(df, main_col, time_col, index_col=None, action_col=None, weights_col=None):
    """
    Transforms dataframe to the dictionary mapping.

    Parameters
    ----------
    df : DataFrame
        Session-items or item-sessions map as pandas DataFrame object.

    main_col :
        Column with mapped items or sessions.

    time_col :
        Column with timestamps.

    index_col : optional
        Column with key values, if not provided then ``DataFrame.index`` is used.

    action_col : optional
        Column with event actions.

    weights_col : optional
        Column with event weights.

    Returns
    -------
    : Dict
    """
    sessions_map = {}

    if index_col is not None:
        indexes = df[index_col].values
    else:
        indexes = df.index

    # Prepare values
    cols = [main_col, time_col]
    if action_col:
        cols.append(action_col)
    if weights_col:
        cols.append(weights_col)

    vals = df[cols].values

    # Parse
    for _no, _id in enumerate(indexes):
        row = vals[_no].tolist()
        sessions_map[_id] = row

    return sessions_map


[docs]def dataframe_to_item_sessions_map(df, main_col, time_col, index_col=None): """ Function transforms given item sessions dataframe to dictionary used by the WSKNN model. Parameters ---------- df : DataFrame Session-items or item-sessions map as pandas DataFrame object. main_col : Column with mapped items or sessions. time_col : Column with timestamps. index_col : optional Column with key values, if not provided then ``DataFrame.index`` is used. Returns ------- : Dict """ item_sessions_map = _df_to_mapping( df=df, main_col=main_col, time_col=time_col, index_col=index_col ) return item_sessions_map
[docs]def dataframe_to_session_items_map(df, main_col, time_col, index_col=None, action_col=None, weights_col=None): """ Function transforms given session items dataframe to dictionary used by the WSKNN model. Parameters ---------- df : DataFrame Session-items or item-sessions map as pandas DataFrame object. main_col : Column with mapped items or sessions. time_col : Column with timestamps. index_col : optional Column with key values, if not provided then ``DataFrame.index`` is used. action_col : optional Column with event actions. weights_col : optional Column with event weights. Returns ------- : Dict """ session_items_map = _df_to_mapping( df=df, main_col=main_col, time_col=time_col, index_col=index_col, action_col=action_col, weights_col=weights_col ) return session_items_map
[docs]def load_pickled(filename: str) -> dict: """ The function loads pickled items / sessions object. Parameters ---------- filename : str Returns ------- pickled_object : dict """ with open(filename, 'rb') as stored_data: pickled_object = pickle.load(stored_data) return pickled_object
[docs]def load_jsonl(filename: str) -> dict: """ Function loads data stored in JSON Lines. Parameters ---------- filename : str Path to the file. Returns ------- datadict : dict Python dictionary with unique records. """ datadict = {} with open(filename, 'r') as fstream: for fline in fstream: pdict = json.loads(fline) datadict.update(pdict) return datadict
[docs]def load_gzipped_jsonl(filename: str, encoding: str = 'UTF-8') -> dict: """ Function loads data stored in gzipped JSON Lines. Parameters ---------- filename : str Path to the file. encoding : str, default = 'utf-8' Returns ------- datadict : dict Python dictionary with unique records. """ datadict = {} with gzip.open(filename, 'rt', encoding=encoding) as fstream: for fline in fstream: datadict.update(json.loads(fline)) return datadict
[docs]def load_gzipped_pickle(filename: str) -> dict: """ The function loads gzipped and pickled items / sessions object. Parameters ---------- filename : str Returns ------- pickled_object : dict """ with gzip.open(filename, 'rb') as fstream: pickled_object = pickle.load(fstream) return pickled_object