Source code for pycircular.training


import pandas as pd
import numpy as np

from .circular import bwEstimation, kernel
from .utils import _date2rad
from .stats import kuiper_two


[docs]def train_time_periodic(trx_train, n=256, idname='account'):
    """Evaluate the time periodic risk of different accounts

    Parameters
    ----------
    trx_train : pd.DataFrame of the transactions

    n : number of points of the kernel

    Returns
    -------
    risks_all : dictionary of shape n_accounts where each value is a
        pd.DataFrame of shape = [3, n + 1] for each account
        where the rows are the different time segments ['hour', 'dayweek', 'daymonth']
        the columns are the n points of the kernel and the confidence of the kernel

    """

    # For each account
    accounts = trx_train[f'{idname}'].unique()

    risks_all = dict()
    for account in accounts:
        dates = trx_train.query(f'{idname} == {account}')['date']
        risks_all[account] = _train_time_periodic_account(dates, n=256)

    return risks_all


def _train_time_periodic_account(dates, n=256,
                                 time_segments=('hour', 'dayweek', 'daymonth')):
    """Evaluate the time periodic risk of a set of dates

    Parameters
    ----------
    dates : pandas DatetimeIndex array-like of shape = [n_samples] of dates.

    n : number of points of the kernel

    #TODO timesegments

    Returns
    -------
    risks : pd.DataFrame of shape = [3, n + 1]
        where the rows are the different time segments ['hour', 'dayweek', 'daymonth']
        the columns are the n points of the kernel and the confidence of the kernel

    """

    # Create the DataFrame to store the results
    risks = pd.DataFrame(np.nan, index=time_segments,
                         columns=['Risk_p' + str(i) for i in range(n)] +
                                 ['Risk_confidence', 'bw'])

    for time_segment in time_segments:

        radians = _date2rad(dates, time_segment=time_segment).to_list()

        # Find bw
        bw = bwEstimation(radians, upper=500)

        # Estimate kernel
        y = kernel(radians, bw=bw, n=n)

        # Test the kernel
        p = kuiper_two(radians, y)

        risks.loc[time_segment].iloc[:-2] = np.round((1 - (y / y.max())) * 100)
        risks.loc[time_segment].iloc[-2] = p
        risks.loc[time_segment].iloc[-1] = bw

    return risks