generate_backtest_statistics
In [6]:
import numpy as np
def backtestStatistics(arithmeticProfits, timeframe):
# Drop any NaN values from arithmeticProfits so that the actual length is captured
arithmeticProfits = arithmeticProfits.dropna()
# Calculate geometric profits
geometricProfits = (arithmeticProfits + 1).cumprod()
# Initialize variables for drawdown calculations
drawdowns = []
past_percents = []
drawdownLength = []
currentDrawdown_length = 0
# Calculate drawdowns and drawdown lengths
for cum_percent in geometricProfits:
past_percents.append(cum_percent)
draw = min(0, (cum_percent - max(past_percents)) / max(past_percents))
if draw != 0:
currentDrawdown_length += 1
if draw == 0:
currentDrawdown_length = 0
drawdownLength.append(currentDrawdown_length)
drawdowns.append(draw)
# establish timeframe and number of intervals within a trading year
if timeframe == 'D':
quantity_in_year = 252
if timeframe == 'W':
quantity_in_year = 52
if timeframe == 'M':
quantity_in_year = 12
# Calculate backtest statistics
max_drawdown = abs(min(drawdowns)) * 100
annual_return = (geometricProfits.iloc[-1] ** (quantity_in_year / geometricProfits.shape[0]) - 1) * 100
win_rate = arithmeticProfits[arithmeticProfits > 0].shape[0] / arithmeticProfits.shape[0] * 100
average_wins = arithmeticProfits[arithmeticProfits > 0].mean() * 100
average_loss = arithmeticProfits[arithmeticProfits < 0].mean() * 100
biggestWin = arithmeticProfits.max() * 100
biggestLoss = arithmeticProfits.min() * 100
volatility = arithmeticProfits.std() * 100
sharpe = (annual_return - 4) / (volatility * np.sqrt(quantity_in_year))
maxDrawdownLength = max(drawdownLength)
# Print the backtest statistics
print(' max drawdown: ' + str(round(max_drawdown, 2)) + '%')
print(' annual return: ' + str(round(annual_return, 2)) + '%')
print(' win rate: ' + str(round(win_rate, 2)) + '%')
print('average winning day: ' + str(round(average_wins, 2)) + '%')
print(' average losing day: ' + str(round(average_loss, 2)) + '%')
print('biggest winning day: ' + str(round(biggestWin, 2)) + '%')
print(' biggest losing day: ' + str(round(biggestLoss, 2)) + '%')
print(' sharpe ratio: ' + str(round(sharpe, 2)))
print(' longest drawdown: ' + str(maxDrawdownLength) + ' days')
Version History
calculate_arithmetic_returns
In [7]:
def calculate_arithmetic_returns(series):
"""
Calculate arithmetic returns for a given series.
Args:
data (pandas.DataFrame): DataFrame containing the data.
column (str): Name of the column for which to calculate returns.
Returns:
pandas.Series: Series containing the arithmetic returns.
"""
# Calculate the difference between consecutive values in the column
returns = series.shift(-1) - series
# Divide the difference by the original value to get the returns
returns = returns / series
return returns
read_csv_to_datetimes
In [12]:
def read_csv_to_datetimes(csv_file):
"""
Read a CSV file and convert the 'time' column to datetime objects and set as the index.
Args:
csv_file (str): Name of the csv file. Assumes that it's in the same directory.
Returns:
pandas.DataFrame: DataFrame with the index set to datetime objects.
"""
import pandas as pd
from datetime import datetime
# Read the CSV file into a DataFrame
data = pd.read_csv(csv_file)
# Convert Unix timestamp to datetime objects
dateTimes = []
for unixTime in data['time']:
# Convert Unix timestamp to datetime string, then parse it into a datetime object
date = datetime.strptime(datetime.fromtimestamp(unixTime).strftime('%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S')
dateTimes.append(date)
# Set the datetimes as the index for the DataFrame
data.index = dateTimes
# Drop the 'time' column from the DataFrame
data = data.drop(['time'], axis=1)
return data
pass_through_kalman_filter
In [21]:
def pass_through_kalman_filter(series):
"""
Applies a Kalman filter to a pandas Series.
Args:
series (pandas.Series): Input time series data to be filtered.
Returns:
pandas.Series: Filtered time series data after applying the Kalman filter.
"""
import pandas as pd
import numpy as np
from pykalman import KalmanFilter
# Create a pandas Series from the data
series = series.dropna() # the filter doesn't work with NaN values
# Define the Kalman filter model
kf = KalmanFilter(
initial_state_mean=series[0], # Set the initial state mean
initial_state_covariance=1, # Set the initial state covariance
observation_covariance=1, # Set the observation covariance
transition_covariance=0.01 # Set the transition covariance
)
# Apply the Kalman filter to the series
filtered_state_means, filtered_state_covariances = kf.filter(series.values)
# Create a new pandas Series with the filtered values
filtered_series = pd.Series(filtered_state_means.flatten(), index=series.index)
return filtered_series