generate_backtest_statistics

backtestStatistics

In [6]:

import numpy as np

def backtestStatistics(arithmeticProfits, timeframe):
    # Drop any NaN values from arithmeticProfits so that the actual length is captured
    arithmeticProfits = arithmeticProfits.dropna()
    
    # Calculate geometric profits
    geometricProfits = (arithmeticProfits + 1).cumprod()
    
    # Initialize variables for drawdown calculations
    drawdowns = []
    past_percents = []
    drawdownLength = []
    currentDrawdown_length = 0
    
    # Calculate drawdowns and drawdown lengths
    for cum_percent in geometricProfits:
        past_percents.append(cum_percent)
        draw = min(0, (cum_percent - max(past_percents)) / max(past_percents))
        
        if draw != 0:
            currentDrawdown_length += 1
        if draw == 0:
            currentDrawdown_length = 0
            
        drawdownLength.append(currentDrawdown_length)
        drawdowns.append(draw)
    
    # establish timeframe and number of intervals within a trading year
    if timeframe == 'D':
        quantity_in_year = 252
    if timeframe == 'W':
        quantity_in_year = 52
    if timeframe == 'M':
        quantity_in_year = 12
    
    # Calculate backtest statistics
    max_drawdown = abs(min(drawdowns)) * 100
    annual_return = (geometricProfits.iloc[-1] ** (quantity_in_year / geometricProfits.shape[0]) - 1) * 100
    win_rate = arithmeticProfits[arithmeticProfits > 0].shape[0] / arithmeticProfits.shape[0] * 100
    average_wins = arithmeticProfits[arithmeticProfits > 0].mean() * 100
    average_loss = arithmeticProfits[arithmeticProfits < 0].mean() * 100
    biggestWin = arithmeticProfits.max() * 100
    biggestLoss = arithmeticProfits.min() * 100
    volatility = arithmeticProfits.std() * 100
    sharpe = (annual_return - 4) / (volatility * np.sqrt(quantity_in_year))
    maxDrawdownLength = max(drawdownLength)
    
    # Print the backtest statistics
    print('       max drawdown: ' + str(round(max_drawdown, 2)) + '%')
    print('      annual return: ' + str(round(annual_return, 2)) + '%')
    print('           win rate: ' + str(round(win_rate, 2)) + '%')
    print('average winning day: ' + str(round(average_wins, 2)) + '%')
    print(' average losing day: ' + str(round(average_loss, 2)) + '%')
    print('biggest winning day: ' + str(round(biggestWin, 2)) + '%')
    print(' biggest losing day: ' + str(round(biggestLoss, 2)) + '%')
    print('       sharpe ratio: ' + str(round(sharpe, 2)))
    print('   longest drawdown: ' + str(maxDrawdownLength) + ' days')

Version History

calculate_arithmetic_returns

In [7]:

def calculate_arithmetic_returns(series):
    """
    Calculate arithmetic returns for a given series.

    Args:
        data (pandas.DataFrame): DataFrame containing the data.
        column (str): Name of the column for which to calculate returns.

    Returns:
        pandas.Series: Series containing the arithmetic returns.

    """
    # Calculate the difference between consecutive values in the column
    returns = series.shift(-1) - series
    
    # Divide the difference by the original value to get the returns
    returns = returns / series
    
    return returns

read_csv_to_datetimes

In [12]:

def read_csv_to_datetimes(csv_file):
    """
    Read a CSV file and convert the 'time' column to datetime objects and set as the index.

    Args:
        csv_file (str): Name of the csv file. Assumes that it's in the same directory.

    Returns:
        pandas.DataFrame: DataFrame with the index set to datetime objects.

    """
    
    import pandas as pd
    from datetime import datetime
    
    # Read the CSV file into a DataFrame
    data = pd.read_csv(csv_file)
    
    # Convert Unix timestamp to datetime objects
    dateTimes = []
    for unixTime in data['time']:
        # Convert Unix timestamp to datetime string, then parse it into a datetime object
        date = datetime.strptime(datetime.fromtimestamp(unixTime).strftime('%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S')
        dateTimes.append(date)
    
    # Set the datetimes as the index for the DataFrame
    data.index = dateTimes
    
    # Drop the 'time' column from the DataFrame
    data = data.drop(['time'], axis=1)
    
    return data

pass_through_kalman_filter

In [21]:

def pass_through_kalman_filter(series):
    """
    Applies a Kalman filter to a pandas Series.

    Args:
        series (pandas.Series): Input time series data to be filtered.

    Returns:
        pandas.Series: Filtered time series data after applying the Kalman filter.
    """

    import pandas as pd
    import numpy as np
    from pykalman import KalmanFilter

    # Create a pandas Series from the data
    series = series.dropna() # the filter doesn't work with NaN values

    # Define the Kalman filter model
    kf = KalmanFilter(
        initial_state_mean=series[0], # Set the initial state mean
        initial_state_covariance=1,   # Set the initial state covariance
        observation_covariance=1,     # Set the observation covariance
        transition_covariance=0.01    # Set the transition covariance
    )

    # Apply the Kalman filter to the series
    filtered_state_means, filtered_state_covariances = kf.filter(series.values)

    # Create a new pandas Series with the filtered values
    filtered_series = pd.Series(filtered_state_means.flatten(), index=series.index)

    return filtered_series