Model Fitting – Using Autocorrelation to Detect The Nature of a Time Series

I believe a solid approach in developing trading strategies is to fit the correct model to the under lying. In a previous post we studied the Hurst exponent and its ability to detect if a series was mean reverting, momentum or a random walk. In this post we will look at a 2 lag autocorrelation on the S&P500. The long and short of it is: if there is high degree of correlation from one point in time to a previous point in time then we have momentum. Conversely, if we have weak correlation between one point in time to a previous point in time, we can say we have mean reversion.

In order to test this we use the acf R function. We run this over daily S&P500 log returns using rollyapply on a (252 trading days * 3, 756 bars = 3 trading years) rolling window. Thus each data point is the correlation coefficient of the previous 3 year rolling window.

The code that achieves this:

# Rolling auto correlation 

# Required Packages
require(ggplot2)
require(TTR)

# SP500 
SP500 <- read.csv("C:/Stock Market Analysis/Norgate Market Data/MASTER_DATA_DUMP/$SPX.csv", header=TRUE,stringsAsFactors = FALSE)
SP500$Date <- ymd(SP500$Date)
SP500$log.close <- log(SP500$Close)

# rets 
SP500$rets <- ROC(SP500$log.close, type = c("discrete"))
SP500$rets[1] <-0

# Rolling acf width 
acf.width <- 756 # bars ,1008 bars =  4 years (252 * 4 = 1008)

# Dates 
SP500dates <- SP500[acf.width:nrow(SP500),]
dates.df <- data.frame(Date=SP500dates$Date)
str(dates.df)
head(dates.df )

# Rolling auto correlation 
result <- rollapply(SP500$rets, width = acf.width, FUN=acf, 
                    lag.max = 30,type = "correlation",plot = FALSE)

# Extract different correlations 
cor2 <- lapply(result[,1], function(x) x[2, drop=FALSE])
cor3 <- lapply(result[,1], function(x) x[3, drop=FALSE])
cor4 <- lapply(result[,1], function(x) x[4, drop=FALSE])
cor5 <- lapply(result[,1], function(x) x[5, drop=FALSE])
cor6 <- lapply(result[,1], function(x) x[6, drop=FALSE])
cor7 <- lapply(result[,1], function(x) x[7, drop=FALSE])
cor8 <- lapply(result[,1], function(x) x[8, drop=FALSE])
cor9 <- lapply(result[,1], function(x) x[9, drop=FALSE])
cor10 <- lapply(result[,1], function(x) x[10, drop=FALSE])
cor11 <- lapply(result[,1], function(x) x[11, drop=FALSE])
cor12 <- lapply(result[,1], function(x) x[12, drop=FALSE])
cor13 <- lapply(result[,1], function(x) x[13, drop=FALSE])
cor14 <- lapply(result[,1], function(x) x[14, drop=FALSE])
cor15 <- lapply(result[,1], function(x) x[15, drop=FALSE])
cor16 <- lapply(result[,1], function(x) x[16, drop=FALSE])
cor17 <- lapply(result[,1], function(x) x[17, drop=FALSE])
cor18 <- lapply(result[,1], function(x) x[18, drop=FALSE])
cor19 <- lapply(result[,1], function(x) x[19, drop=FALSE])
cor20 <- lapply(result[,1], function(x) x[20, drop=FALSE])
cor21 <- lapply(result[,1], function(x) x[21, drop=FALSE])
cor22 <- lapply(result[,1], function(x) x[22, drop=FALSE])
cor23 <- lapply(result[,1], function(x) x[23, drop=FALSE])
cor24 <- lapply(result[,1], function(x) x[24, drop=FALSE])
cor25 <- lapply(result[,1], function(x) x[25, drop=FALSE])
cor26 <- lapply(result[,1], function(x) x[26, drop=FALSE])
cor27 <- lapply(result[,1], function(x) x[27, drop=FALSE])
cor28 <- lapply(result[,1], function(x) x[28, drop=FALSE])
cor29 <- lapply(result[,1], function(x) x[29, drop=FALSE])
cor30 <- lapply(result[,1], function(x) x[30, drop=FALSE])

# cbind outputs
cor2.df <- as.data.frame(do.call(rbind, cor2))
cor2.df <- data.frame(Date=dates.df$Date,cor2.df)
cor3.df <- as.data.frame(do.call(rbind, cor3))
cor3.df <- data.frame(Date=dates.df$Date,cor3.df)
cor4.df <- as.data.frame(do.call(rbind, cor4))
cor4.df <- data.frame(Date=dates.df$Date,cor4.df)
cor5.df <- as.data.frame(do.call(rbind, cor5))
cor5.df <- data.frame(Date=dates.df$Date,cor5.df)
cor6.df <- as.data.frame(do.call(rbind, cor6))
cor6.df <- data.frame(Date=dates.df$Date,cor6.df)
cor7.df <- as.data.frame(do.call(rbind, cor7))
cor7.df <- data.frame(Date=dates.df$Date,cor7.df)
cor8.df <- as.data.frame(do.call(rbind, cor8))
cor8.df <- data.frame(Date=dates.df$Date,cor8.df)
cor9.df <- as.data.frame(do.call(rbind, cor9))
cor9.df <- data.frame(Date=dates.df$Date,cor9.df)
cor10.df <- as.data.frame(do.call(rbind, cor10))
cor10.df <- data.frame(Date=dates.df$Date,cor10.df)
cor11.df <- as.data.frame(do.call(rbind, cor11))
cor11.df <- data.frame(Date=dates.df$Date,cor11.df)
cor12.df <- as.data.frame(do.call(rbind, cor12))
cor12.df <- data.frame(Date=dates.df$Date,cor12.df)
cor13.df <- as.data.frame(do.call(rbind, cor13))
cor13.df <- data.frame(Date=dates.df$Date,cor13.df)
cor14.df <- as.data.frame(do.call(rbind, cor14))
cor14.df <- data.frame(Date=dates.df$Date,cor14.df)
cor15.df <- as.data.frame(do.call(rbind, cor15))
cor15.df <- data.frame(Date=dates.df$Date,cor15.df)
cor16.df <- as.data.frame(do.call(rbind, cor16))
cor16.df <- data.frame(Date=dates.df$Date,cor16.df)
cor17.df <- as.data.frame(do.call(rbind, cor17))
cor17.df <- data.frame(Date=dates.df$Date,cor17.df)
cor18.df <- as.data.frame(do.call(rbind, cor18))
cor18.df <- data.frame(Date=dates.df$Date,cor18.df)
cor19.df <- as.data.frame(do.call(rbind, cor19))
cor19.df <- data.frame(Date=dates.df$Date,cor19.df)
cor20.df <- as.data.frame(do.call(rbind, cor20))
cor20.df <- data.frame(Date=dates.df$Date,cor20.df)
cor21.df <- as.data.frame(do.call(rbind, cor21))
cor21.df <- data.frame(Date=dates.df$Date,cor21.df)
cor22.df <- as.data.frame(do.call(rbind, cor22))
cor22.df <- data.frame(Date=dates.df$Date,cor22.df)
cor23.df <- as.data.frame(do.call(rbind, cor23))
cor23.df <- data.frame(Date=dates.df$Date,cor23.df)
cor24.df <- as.data.frame(do.call(rbind, cor24))
cor24.df <- data.frame(Date=dates.df$Date,cor24.df)
cor25.df <- as.data.frame(do.call(rbind, cor25))
cor25.df <- data.frame(Date=dates.df$Date,cor25.df)
cor26.df <- as.data.frame(do.call(rbind, cor26))
cor26.df <- data.frame(Date=dates.df$Date,cor26.df)
cor27.df <- as.data.frame(do.call(rbind, cor27))
cor27.df <- data.frame(Date=dates.df$Date,cor27.df)
cor28.df <- as.data.frame(do.call(rbind, cor28))
cor28.df <- data.frame(Date=dates.df$Date,cor28.df)
cor29.df <- as.data.frame(do.call(rbind, cor29))
cor29.df <- data.frame(Date=dates.df$Date,cor29.df)
cor30.df <- as.data.frame(do.call(rbind, cor30))
cor30.df <- data.frame(Date=dates.df$Date,cor30.df)

# smooth 
#cor2.df$sma <- SMA(cor2.df$V1,100)

# Plot ACF 
ggplot(cor2.df,aes(Date,V1))+geom_line(aes(color="Lag 2"))+
    geom_line(data=cor3.df,aes(color="Lag 3"))+
  geom_line(data=cor4.df,aes(color="Lag 4"))+
  geom_line(data=cor5.df,aes(color="Lag 5"))+
  geom_line(data=cor6.df,aes(color="Lag 6"))+
  geom_line(data=cor7.df,aes(color="Lag 7"))+
  geom_line(data=cor8.df,aes(color="Lag 8"))+
  geom_line(data=cor9.df,aes(color="Lag 9"))+
  geom_line(data=cor10.df,aes(color="Lag 10"))+
  geom_line(data=cor11.df,aes(color="Lag 11"))+
  geom_line(data=cor12.df,aes(color="Lag 12"))+
  geom_line(data=cor13.df,aes(color="Lag 13"))+
  geom_line(data=cor14.df,aes(color="Lag 14"))+
  geom_line(data=cor15.df,aes(color="Lag 15"))+
  geom_line(data=cor16.df,aes(color="Lag 16"))+
  geom_line(data=cor17.df,aes(color="Lag 17"))+
  geom_line(data=cor18.df,aes(color="Lag 18"))+
  geom_line(data=cor19.df,aes(color="Lag 19"))+
  geom_line(data=cor20.df,aes(color="Lag 20"))+
  geom_line(data=cor21.df,aes(color="Lag 21"))+
  geom_line(data=cor22.df,aes(color="Lag 22"))+
  geom_line(data=cor23.df,aes(color="Lag 23"))+
  geom_line(data=cor24.df,aes(color="Lag 24"))+
  geom_line(data=cor25.df,aes(color="Lag 25"))+
  geom_line(data=cor26.df,aes(color="Lag 26"))+
  geom_line(data=cor27.df,aes(color="Lag 27"))+
  geom_line(data=cor28.df,aes(color="Lag 28"))+
  geom_line(data=cor29.df,aes(color="Lag 29"))+
  geom_line(data=cor30.df,aes(color="Lag 30"))+
labs(color="Legend text")


# Plot with shade
ggplot(cor2.df,aes(Date,V1))+geom_line(aes(colour="V1"))+
  geom_area(aes(y = V1, fill="V1"))+
  ggtitle("Rolling Auto Correlation, 2 lag, rolling window = 3 years", subtitle = "S&P500 Log Returns") +
  labs(x="Date",y="Correlation")+
  theme(plot.title = element_text(hjust=0.5),plot.subtitle =element_text(hjust=0.5))+
  theme(legend.position="none")+
  scale_y_continuous(breaks = seq(-.5, .50, by = 0.05))

# Write 
write.csv(cor2.df,"C:/R Projects/lag2.autocorr.csv")

Plot the 2 lag autocorrelation:

Rplot172

A few things are obvious… but first… lets plot a simple strategy to test for buying high and selling higher (momentum), buying low and selling higher (mean reversion). We want to simply test what happens if we executed the above.

The rules:
1. Take a rolling z-score of S&P500 close prices
2. Momentum buy and exit: Buy when rolling z-score is over 0 and sell when its below 0.
3. Mean reversion buy and exit: Buy when rolling z-score is below 0 and sell when its above 0.

The code that achieves this:

# Mean Reversion @ Momentum S&P500 
# Andrew Bannerman 1.17.2018

######################################
# Required Packages 
#####################################
require(xts)
require(data.table)
require(ggplot2)
require(lubridate)
require(magrittr)
require(scales)
require(reshape2)
require(PerformanceAnalytics)
require(dplyr)
require(TTR)
require(gridExtra)

## Load Data / Format handling
SP500 <- read.csv("C:/Stock Market Analysis/Norgate Market Data/MASTER_DATA_DUMP/$SPX.csv", header=TRUE,stringsAsFactors = FALSE)
SP500$Date <- ymd(SP500$Date)

# Use TTR package to create rolling SMA n day moving average 
# Create function and loop in order to repeat the desired number of SMAs for example 2:30
getSMA <- function(numdays) {
  function(SP500) {
    SMA(SP500[,"Close"], numdays)    # Calls TTR package to create SMA
  }
}
# Create a matrix to put the SMAs in
sma.matrix <- matrix(nrow=nrow(SP500), ncol=0)

# Loop for filling it
for (i in 2:60) {
  sma.matrix <- cbind(sma.matrix, getSMA(i)(SP500))
}

# Rename columns
colnames(sma.matrix) <- sapply(2:60, function(n)paste("close.sma.n", n, sep=""))

# Bind to existing dataframe
SP500 <-  cbind(SP500, sma.matrix)

# Use TTR package to create rolling Standard Deviation
# Create function and loop in order to repeat the desired number of Stdev for example 2:30
getSD <- function(numdays) {
  function(SP500) {
    runSD(SP500$Close, numdays, cumulative = FALSE)    # Calls TTR package to create SMA
  }
}
# Create a matrix to put the SMAs in
sd.matrix <- matrix(nrow=nrow(SP500), ncol=0)

# Loop for filling it
for (i in 2:60) {
  sd.matrix <- cbind(sd.matrix, getSD(i)(SP500))
}

# Rename columns
colnames(sd.matrix) <- sapply(2:60, function(n)paste("close.sd.n", n, sep=""))

# Bind to existing dataframe
SP500 <-  cbind(SP500, sd.matrix)


# Use base R to work out the rolling z-score (Close - roll mean) / stdev
SP500$close.zscore.n2 <- apply(SP500[,c('Close','close.sma.n2', 'close.sd.n2')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n3 <- apply(SP500[,c('Close','close.sma.n3', 'close.sd.n3')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n4 <- apply(SP500[,c('Close','close.sma.n4', 'close.sd.n4')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n5 <- apply(SP500[,c('Close','close.sma.n5', 'close.sd.n5')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n6 <- apply(SP500[,c('Close','close.sma.n6', 'close.sd.n6')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n7 <- apply(SP500[,c('Close','close.sma.n7', 'close.sd.n7')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n8 <- apply(SP500[,c('Close','close.sma.n8', 'close.sd.n8')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n9 <- apply(SP500[,c('Close','close.sma.n9', 'close.sd.n9')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n10 <- apply(SP500[,c('Close','close.sma.n10', 'close.sd.n10')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n11 <- apply(SP500[,c('Close','close.sma.n11', 'close.sd.n11')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n12 <- apply(SP500[,c('Close','close.sma.n12', 'close.sd.n12')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n13 <- apply(SP500[,c('Close','close.sma.n13', 'close.sd.n13')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n14 <- apply(SP500[,c('Close','close.sma.n14', 'close.sd.n14')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n15 <- apply(SP500[,c('Close','close.sma.n15', 'close.sd.n15')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n16 <- apply(SP500[,c('Close','close.sma.n16', 'close.sd.n16')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n17 <- apply(SP500[,c('Close','close.sma.n17', 'close.sd.n17')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n18 <- apply(SP500[,c('Close','close.sma.n18', 'close.sd.n18')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n19 <- apply(SP500[,c('Close','close.sma.n19', 'close.sd.n19')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n20 <- apply(SP500[,c('Close','close.sma.n20', 'close.sd.n20')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n21 <- apply(SP500[,c('Close','close.sma.n21', 'close.sd.n21')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n22 <- apply(SP500[,c('Close','close.sma.n22', 'close.sd.n22')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n23 <- apply(SP500[,c('Close','close.sma.n23', 'close.sd.n23')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n24 <- apply(SP500[,c('Close','close.sma.n24', 'close.sd.n24')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n25 <- apply(SP500[,c('Close','close.sma.n25', 'close.sd.n25')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n26 <- apply(SP500[,c('Close','close.sma.n26', 'close.sd.n26')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n27 <- apply(SP500[,c('Close','close.sma.n27', 'close.sd.n27')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n28 <- apply(SP500[,c('Close','close.sma.n28', 'close.sd.n28')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n29 <- apply(SP500[,c('Close','close.sma.n29', 'close.sd.n29')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n30 <- apply(SP500[,c('Close','close.sma.n30', 'close.sd.n30')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n31 <- apply(SP500[,c('Close','close.sma.n31', 'close.sd.n31')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n32 <- apply(SP500[,c('Close','close.sma.n32', 'close.sd.n32')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n33 <- apply(SP500[,c('Close','close.sma.n33', 'close.sd.n33')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n34 <- apply(SP500[,c('Close','close.sma.n34', 'close.sd.n34')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n35 <- apply(SP500[,c('Close','close.sma.n35', 'close.sd.n35')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n36 <- apply(SP500[,c('Close','close.sma.n36', 'close.sd.n36')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n37 <- apply(SP500[,c('Close','close.sma.n37', 'close.sd.n37')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n38 <- apply(SP500[,c('Close','close.sma.n38', 'close.sd.n38')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n39 <- apply(SP500[,c('Close','close.sma.n39', 'close.sd.n39')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n40 <- apply(SP500[,c('Close','close.sma.n40', 'close.sd.n40')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n41 <- apply(SP500[,c('Close','close.sma.n41', 'close.sd.n41')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n42 <- apply(SP500[,c('Close','close.sma.n42', 'close.sd.n42')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n43 <- apply(SP500[,c('Close','close.sma.n43', 'close.sd.n43')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n44 <- apply(SP500[,c('Close','close.sma.n44', 'close.sd.n44')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n45 <- apply(SP500[,c('Close','close.sma.n45', 'close.sd.n45')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n46 <- apply(SP500[,c('Close','close.sma.n46', 'close.sd.n46')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n47 <- apply(SP500[,c('Close','close.sma.n47', 'close.sd.n47')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n48 <- apply(SP500[,c('Close','close.sma.n48', 'close.sd.n48')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n49 <- apply(SP500[,c('Close','close.sma.n49', 'close.sd.n49')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n50 <- apply(SP500[,c('Close','close.sma.n50', 'close.sd.n50')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n51 <- apply(SP500[,c('Close','close.sma.n51', 'close.sd.n51')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n52 <- apply(SP500[,c('Close','close.sma.n52', 'close.sd.n52')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n53 <- apply(SP500[,c('Close','close.sma.n53', 'close.sd.n53')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n54 <- apply(SP500[,c('Close','close.sma.n54', 'close.sd.n54')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n55 <- apply(SP500[,c('Close','close.sma.n55', 'close.sd.n55')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n56 <- apply(SP500[,c('Close','close.sma.n56', 'close.sd.n56')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n57 <- apply(SP500[,c('Close','close.sma.n57', 'close.sd.n57')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n58 <- apply(SP500[,c('Close','close.sma.n58', 'close.sd.n58')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n59 <- apply(SP500[,c('Close','close.sma.n59', 'close.sd.n59')], 1, function(x) { (x[1]-x[2])/x[3] } )
SP500$close.zscore.n60 <- apply(SP500[,c('Close','close.sma.n60', 'close.sd.n60')], 1, function(x) { (x[1]-x[2])/x[3] } )

# Convert all NA to 0
SP500[is.na(SP500)] <- 0

# Calculate quartiles, where close is relation to range (Close - High) / (High - Low)
#SP500$quartile <- apply(SP500[,c('Close', 'Low', 'High')], 1, function(x) { (x[1]-x[2])/(x[3]-x[2])} )

# Calculate Returns from open to close 
SP500$ocret <- apply(SP500[,c('Open', 'Close')], 1, function(x) { (x[2]-x[1])/x[1]} )

# Calculate Close-to-Close returns
SP500$clret <- ROC(SP500$Close, type = c("discrete"))
SP500$clret[1] <- 0

#####################################################################
# Split Data To Train and Test Set
#####################################################################
#SP500 <- subset(SP500, Date >= as.Date("1993-01-01") )             #Input for subsetting by date versus splitting, works with train set only
train.index <- 1:(nrow(SP500)*.570) # Add *.666 for 2/3rd split
train.set <- SP500[train.index, ]
test.set <- SP500[-train.index, ]

#####################################################################
# Assign train and test set indicactors 
#####################################################################
# Name indicators #
train.indicator <- train.set$close.zscore.n11
test.indicator <- test.set$close.zscore.n11

######################################################################
# Develop Training Set Paramaters
# ####################################################################
# Enter buy / sell rules
train.set$enter.mean.rev <- ifelse(train.indicator < 0, 1, 0)
train.set$exit.mean.rev <- ifelse(train.indicator > 0, 1, 0)
train.set$enter.momo <- ifelse(train.indicator > 0, 1, 0)
train.set$exit.momo <- ifelse(train.indicator < 0, 1, 0)

# Mean Rev
train.set <- train.set %>%
  dplyr::mutate(sig.mean.rev = ifelse(enter.mean.rev == 1, 1,
                                      ifelse(exit.mean.rev == 1, 0, 0)))

# Momentum
train.set <- train.set %>%
  dplyr::mutate(sig.momo = ifelse(enter.momo == 1, 1,
                                      ifelse(exit.momo == 1, 0, 0)))


# lag signal by one forward day to signal entry next day 
train.set$sig.mean.rev <- lag(train.set$sig.mean.rev,1) # Note k=1 implies a move *forward*
train.set$sig.momo <- lag(train.set$sig.momo,1) # Note k=1 implies a move *forward*
train.set[is.na(train.set)] <- 0  # Set NA to 0

# Calculate equity curves
# Mean rev
train.set <- train.set %>%
  dplyr::mutate(RunID = rleid(sig.mean.rev)) %>%
  group_by(RunID) %>%
  dplyr::mutate(mean.rev.equity = ifelse(sig.mean.rev == 0, 0,
                                         ifelse(row_number() == 1, ocret, clret))) %>%
  ungroup() %>%
  select(-RunID)

# Momentum
train.set <- train.set %>%
  dplyr::mutate(RunID = rleid(sig.momo)) %>%
  group_by(RunID) %>%
  dplyr::mutate(momo.equity = ifelse(sig.momo == 0, 0,
                                         ifelse(row_number() == 1, ocret, clret))) %>%
  ungroup() %>%
  select(-RunID)

# Pull select columns from data frame to make XTS whilst retaining formats 
xts1 = xts(train.set$mean.rev.equity, order.by=as.POSIXct(train.set$Date, format="%Y-%m-%d")) 
xts2 = xts(train.set$momo.equity, order.by=as.POSIXct(train.set$Date, format="%Y-%m-%d")) 
xts3 = xts(train.set$clret, order.by=as.POSIXct(train.set$Date, format="%Y-%m-%d")) 

# Join XTS together 
train.compare <- cbind(xts1,xts2,xts3)

# Use the PerformanceAnalytics package for trade statistics
colnames(train.compare) <- c("Mean Reversion","Momentum","Buy And Hold")
charts.PerformanceSummary(train.compare,main="Cumulative Returns", wealth.index=TRUE, colorset=rainbow12equal)
#png(filename="20090606_rsi2_performance_updated.png", 720, 720)
#performance.table <- rbind(table.AnnualizedReturns(compare), maxDrawdown(compare), CalmarRatio(compare),table.DownsideRisk(compare))
#drawdown.table <- rbind(table.Drawdowns(compare))
#dev.off()

# Train Set Log Returns 
train.logRets <- log(cumprod(1+train.compare))
chart.TimeSeries(train.logRets, legend.loc='topleft', colorset=rainbow12equal)

print(performance.table)
print(drawdown.table)

######################################################
# Develop Test Set Paramaters (Unseen Data)
######################################################

# Enter buy / sell rules
test.set$enter.mean.rev <- ifelse(test.indicator < 0, 1, 0)
test.set$exit.mean.rev <- ifelse(test.indicator > 0, 1, 0)
test.set$enter.momo <- ifelse(test.indicator > 0, 1, 0)
test.set$exit.momo <- ifelse(test.indicator < 0, 1, 0)

# Mean Rev
test.set <- test.set %>%
  dplyr::mutate(sig.mean.rev = ifelse(enter.mean.rev == 1, 1,
                                      ifelse(exit.mean.rev == 1, 0, 0)))

# Momentum
test.set <- test.set %>%
  dplyr::mutate(sig.momo = ifelse(enter.momo == 1, 1,
                                  ifelse(exit.momo == 1, 0, 0)))


# lag signal by one forward day to signal entry next day 
test.set$sig.mean.rev <- lag(test.set$sig.mean.rev,1) # Note k=1 implies a move *forward*
test.set$sig.momo <- lag(test.set$sig.momo,1) # Note k=1 implies a move *forward*
test.set[is.na(test.set)] <- 0  # Set NA to 0

# Calculate equity curves
# Mean rev
test.set <- test.set %>%
  dplyr::mutate(RunID = rleid(sig.mean.rev)) %>%
  group_by(RunID) %>%
  dplyr::mutate(mean.rev.equity = ifelse(sig.mean.rev == 0, 0,
                                         ifelse(row_number() == 1, ocret, clret))) %>%
  ungroup() %>%
  select(-RunID)

# Momentum
test.set <- test.set %>%
  dplyr::mutate(RunID = rleid(sig.momo)) %>%
  group_by(RunID) %>%
  dplyr::mutate(momo.equity = ifelse(sig.momo == 0, 0,
                                     ifelse(row_number() == 1, ocret, clret))) %>%
  ungroup() %>%
  select(-RunID)

# Pull select columns from data frame to make XTS whilst retaining formats 
xts1 = xts(test.set$mean.rev.equity, order.by=as.POSIXct(test.set$Date, format="%Y-%m-%d")) 
xts2 = xts(test.set$momo.equity, order.by=as.POSIXct(test.set$Date, format="%Y-%m-%d")) 
xts3 = xts(test.set$clret, order.by=as.POSIXct(test.set$Date, format="%Y-%m-%d")) 

# Join XTS together 
test.compare <- cbind(xts1,xts2,xts3)

# Use the PerformanceAnalytics package for trade statistics
colnames(test.compare) <- c("Mean Reversion","Momentum","Buy And Hold")
charts.PerformanceSummary(test.compare,main="Cumulative Returns", wealth.index=TRUE, colorset=rainbow12equal)
#png(filename="20090606_rsi2_performance_updated.png", 720, 720)
#performance.table <- rbind(table.AnnualizedReturns(compare), maxDrawdown(compare), CalmarRatio(compare),table.DownsideRisk(compare))
#drawdown.table <- rbind(table.Drawdowns(compare))
#dev.off()

# Test Set Log Returns 
test.logRets <- log(cumprod(1+test.compare))
chart.TimeSeries(test.logRets, legend.loc='topleft', colorset=rainbow12equal)

print(performance.table)
print(drawdown.table)
head(train.logRets)

tail(train.logRets)

# Prepare data for plotting 
train.reps <- rep(1,nrow(train.logRets))
test.reps <- rep(2,nrow(test.logRets))
id <- c(train.reps,test.reps)
final <- rbind(test.compare,train.compare)
final <-log(cumprod(1+final))
final.df <- data.frame(final,id=id)
final.df <- setDT(final.df, keep.rownames = TRUE)[] # Set row names
colnames(final.df)[1] <- "Date"
final.df$Date <- ymd_hms(final.df$Date)

# negative 2 lag auto correlation dates 
start_one <- as.POSIXct("1931-9-25")
end_one <- as.POSIXct("1932-7-21")
start_two <- as.POSIXct("1936-1-13")
end_two <- as.POSIXct("1940-5-13")
start_three <- as.POSIXct("1940-12-26")
end_three <- as.POSIXct("1941-1-11")
start_four <- as.POSIXct("1941-4-4")
end_four <- as.POSIXct("1941-7-29")
start_five <- as.POSIXct("1965-9-23")
end_five <- as.POSIXct("1965-10-22")
start_six <- as.POSIXct("1990-10-12")
end_six <- as.POSIXct("1990-10-19")
start_seven <- as.POSIXct("1994-12-23")
end_seven <- as.POSIXct("1995-1-16")
start_eight <- as.POSIXct("1998-9-8")
end_eight <- as.POSIXct("2017-10-20")

# Plot train and test set equity curves with ggplot2 
p1<- ggplot(data = final.df, aes(x =Date, y = Mean.Reversion,colour=id))+
  geom_line(data = final.df, aes(x =Date, y = Mean.Reversion,colour=id))+
  geom_line(data = final.df, aes(x =Date, y = Momentum,colour=id))+
  geom_line(data = final.df, aes(x =Date, y = Buy.And.Hold,colour=id))+
  geom_line(data = final.df, aes(x =Date, y = Mean.Reversion,colour=id))+
  geom_line(data = final.df, aes(x =Date, y = Momentum,colour=id))+
  geom_line(data = final.df, aes(x =Date, y = Buy.And.Hold,colour=id))+
  theme_bw()+
  geom_vline(xintercept=as.numeric(as.POSIXct("1977-08-18 19:00:00"),linetype="dashed"))+
  theme(legend.position = "none")+
  scale_y_continuous(breaks = seq(-7, 7, by = .5))+
  ggtitle("Mean Reversion, Momentum, Buy And Hold - S&P500 Index",subtitle="1928 to Present") +
  labs(x="Date",y="Cumulative Log Return")+
  theme(plot.title = element_text(hjust=0.5),plot.subtitle =element_text(hjust=0.5))+
  #geom_rect(aes(xmin=start_one,xmax=end_one,ymin=-Inf,ymax=Inf),alpha=0.01,fill="#CC6666")+
  #geom_rect(aes(xmin=start_two,xmax=end_two,ymin=-Inf,ymax=Inf),alpha=0.01,fill="#CC6666")+
  #geom_rect(aes(xmin=start_three,xmax=end_three,ymin=-Inf,ymax=Inf),alpha=0.01,fill="#CC6666")+
  #geom_rect(aes(xmin=start_four,xmax=end_four,ymin=-Inf,ymax=Inf),alpha=0.01,fill="#CC6666")+
  #geom_rect(aes(xmin=start_five,xmax=end_five,ymin=-Inf,ymax=Inf),alpha=0.01,fill="#CC6666")+
  #geom_rect(aes(xmin=start_six,xmax=end_six,ymin=-Inf,ymax=Inf),alpha=0.01,fill="#CC6666")+
  #geom_rect(aes(xmin=start_seven,xmax=end_seven,ymin=-Inf,ymax=Inf),alpha=0.01,fill="#CC6666")+
  #geom_rect(aes(xmin=start_eight,xmax=end_eight,ymin=-Inf,ymax=Inf),alpha=0.01,fill="#CC6666")+
  annotate("text", label = "Mean Reversion", x = as.POSIXct("2010-01-01"), y = -2.0, color = "#003A90")+
  annotate("text", label = "Momentum", x = as.POSIXct("2010-01-01"), y = 6.5, color = "#003A90")+
  annotate("text", label = "Buy and Hold", x = as.POSIXct("2010-01-01"), y = 3.5, color = "#003A90")+
annotate("text", label = "Out of Sample", x = as.POSIXct("2000-01-01"), y = .5, color = "#56B1F7")+
  annotate("text", label = "In Sample", x = as.POSIXct("1940-01-01"), y = 4.0, color = "#132B43")
  
  

p2 <- ggplot(cor2.df,aes(Date,V1))+geom_line(aes(colour="V1"))+
  geom_area(aes(y = V1, fill="V1"))+
  ggtitle("Rolling Auto Correlation, 2 lag, rolling window = 3 years", subtitle = "S&P500 Log Returns") +
  labs(x="Date",y="Correlation")+
  theme(plot.title = element_text(hjust=0.5),plot.subtitle =element_text(hjust=0.5))+
  theme(legend.position="none")+
  scale_y_continuous(breaks = seq(-.5, .50, by = 0.05))
gridExtra::grid.arrange(p1, p2, ncol = 1)

Now lets plot the strategies against the lag 2 autocorrelation:

Rplot173

The success of a momentum strategy relies (in this short term momentum rule) on a positive 2 lag autocorrelation. We see that the positive 2 lag autocorrelation peaked in 5/23/1973 and declined into negative territory around 1998 where it remains negative at present day. The success of a mean reversion strategy is largely dependent on a negative 2 lag autocorrelation, notably beginning post dot com bubble. This marked a new regime shift from momentum to mean reversion (interday basis).

To conclude, the 2 lag autocorrelation may be a useful tool for detecting which type of model to fit to the underlying series.

Optimal f – Monte Carlo R Implementation

As we go through the stages of strategy robustness testing. I thought it would be prudent to visit the subject of position sizing. World Cup Championship trader Kevin Davey (2014) uses monte carlo simulation to find the optimal f position size fraction. After finding optimal f the share/contract number can be found with:

N = f * equity / L

f= optimal f (found through simulation or pre selected)

L – Worst historical loss (absolute value dollar $ loss) note: often our worst draw down is ahead of us, one may want to increase this value also

If a trader wanted to risk a fraction of equity on a trade, for example , 0.4 or 40% of total equity, where equity is 25,000 and the historical worst loss was $500:

Nshares = 0.4 * 25,000 / 500  = 20 shares

We implement an R optimal f simulation. We plot various performance metrics against fractional (f). Cumulative returns, maximum draw down, risk of ruin and sharpe ratio. In the code example we run 252 days worth of random data in $ dollar gain / loss amounts. We then adjust these outputs by varying fractional (f). It should be noted that a real dollar strategy PnL may be added to this code to run simulations, there is a section that will sample with or without replacement to obtain many iterations of a back test PnL.

An actual strategy back test has a set sequence of wins and losses and by using monte carlo simulation we may mix up the original trade sequence. We may run 1000’s or millions of iterations to obtain a distribution of performance metrics. The distribution of performance metrics may provide a more realistic expectancy going forward. The R code to perform optimal f simulations:

 # Optimal F Simulation In R 
# Make random returns 
# Adjust $ return output by varying fractional (f)
#######################################################################
# Calculations 
# Calculation 1 = Optimal F calculation Start calculation = 1 + fractional amount * trade gain/loss (-/+) / abs(worst.loss)
# Calculation 2 = Optimal F Calculation = prev equity * (1+fractional (f) amount * trade gain/loss (-/+) / abs(worst.loss)
#####################################################################

# Required Packages 
require(reshape2)
require(PerformanceAnalytics)

# Set (f) values
f <- c(0,0.05,	0.1,	0.15,	0.2,	0.25,	0.3,	0.35,	0.4	,0.45,	0.5	,0.55,	0.6	,0.65,	0.7,	0.75,	0.8,	0.85,	0.9	,0.95	,1)

# Create emptry matrix's for each (f) iteration
f.0.00 <- matrix()
f.0.05 <- matrix()
f.0.10 <- matrix()
f.0.15 <- matrix()
f.0.05 <- matrix()
f.0.20 <- matrix()
f.0.25 <- matrix()
f.0.30 <- matrix()
f.0.35 <- matrix()
f.0.40 <- matrix()
f.0.45 <- matrix()
f.0.50 <- matrix()
f.0.55 <- matrix()
f.0.60 <- matrix()
f.0.65 <- matrix()
f.0.70 <- matrix()
f.0.75 <- matrix()
f.0.80 <- matrix()
f.0.85 <- matrix()
f.0.90 <- matrix()
f.0.95 <- matrix()
f.1 <- matrix()

# Create random $ amount gains and losses
random.rets <- ifelse(runif(252)<0.40,-1,1)*round(252*runif(252),2)

# Sample and replace original return stream (note this may be real life gains and losses from a strategy)
# One may wish to enter actual trade results here
# and sample with or without replacement
rets <- sample(random.rets,replace=TRUE) # Remove repace=TRUE for without replacement
rets <- random.rets
i=2# Start on second location (calculation 1 is fixed)
for (i in 2:length(rets)) { 
  f.0.00[1] = (1+0.00 * rets[1] / abs(min(rets)))
  f.0.00[i] = f.0.00[i-1] * ((1+0.00* rets[i] / abs(min(rets))))
f.0.05[1] = (1+0.05 * rets[1] / abs(min(rets)))
f.0.05[i] = f.0.05[i-1] * ((1+0.05* rets[i] / abs(min(rets))))
f.0.10[1] = (1+0.10 * rets[1] / abs(min(rets)))
f.0.10[i] = f.0.10[i-1] * ((1+0.10* rets[i] / abs(min(rets))))
f.0.15[1] = (1+0.15 * rets[1] / abs(min(rets)))
f.0.15[i] = f.0.15[i-1] * ((1+0.15* rets[i] / abs(min(rets))))
f.0.20[1] = (1+0.20 * rets[1] / abs(min(rets)))
f.0.20[i] = f.0.20[i-1] * ((1+0.20* rets[i] / abs(min(rets))))
f.0.25[1] = (1+0.25 * rets[1] / abs(min(rets)))
f.0.25[i] = f.0.25[i-1] * ((1+0.25* rets[i] / abs(min(rets))))
f.0.30[1] = (1+0.30 * rets[1] / abs(min(rets)))
f.0.30[i] = f.0.30[i-1] * ((1+0.30* rets[i] / abs(min(rets))))
f.0.35[1] = (1+0.35 * rets[1] / abs(min(rets)))
f.0.35[i] = f.0.35[i-1] * ((1+0.35* rets[i] / abs(min(rets))))
f.0.40[1] = (1+0.40 * rets[1] / abs(min(rets)))
f.0.40[i] = f.0.40[i-1] * ((1+0.40* rets[i] / abs(min(rets))))
f.0.45[1] = (1+0.45 * rets[1] / abs(min(rets)))
f.0.45[i] = f.0.45[i-1] * ((1+0.45* rets[i] / abs(min(rets))))
f.0.50[1] = (1+0.50 * rets[1] / abs(min(rets)))
f.0.50[i] = f.0.50[i-1] * ((1+0.50* rets[i] / abs(min(rets))))
f.0.55[1] = (1+0.55 * rets[1] / abs(min(rets)))
f.0.55[i] = f.0.55[i-1] * ((1+0.55* rets[i] / abs(min(rets))))
f.0.60[1] = (1+0.60 * rets[1] / abs(min(rets)))
f.0.60[i] = f.0.60[i-1] * ((1+0.60* rets[i] / abs(min(rets))))
f.0.65[1] = (1+0.65 * rets[1] / abs(min(rets)))
f.0.65[i] = f.0.65[i-1] * ((1+0.65* rets[i] / abs(min(rets))))
f.0.70[1] = (1+0.70 * rets[1] / abs(min(rets)))
f.0.70[i] = f.0.70[i-1] * ((1+0.70* rets[i] / abs(min(rets))))
f.0.75[1] = (1+0.75 * rets[1] / abs(min(rets)))
f.0.75[i] = f.0.75[i-1] * ((1+0.75* rets[i] / abs(min(rets))))
f.0.80[1] = (1+0.80 * rets[1] / abs(min(rets)))
f.0.80[i] = f.0.80[i-1] * ((1+0.80* rets[i] / abs(min(rets))))
f.0.85[1] = (1+0.85 * rets[1] / abs(min(rets)))
f.0.85[i] = f.0.85[i-1] * ((1+0.85* rets[i] / abs(min(rets))))
f.0.90[1] = (1+0.90 * rets[1] / abs(min(rets)))
f.0.90[i] = f.0.90[i-1] * ((1+0.90* rets[i] / abs(min(rets))))
f.0.95[1] = (1+0.95 * rets[1] / abs(min(rets)))
f.0.95[i] = f.0.95[i-1] * ((1+0.95* rets[i] / abs(min(rets))))
f.1[1] = (1+1 * rets[1] / abs(min(rets)))
f.1[i] = f.1[i-1] * ((1+1* rets[i] / abs(min(rets))))
}

# Output
out <- cbind(f.0.00,f.0.05,f.0.10,f.0.15,f.0.20,f.0.25,f.0.30,f.0.35,f.0.40,f.0.45,f.0.50,f.0.55,f.0.60,f.0.65,f.0.70,f.0.75,f.0.80,f.0.85,f.0.90,f.0.95,f.1)
cnames <- f
colnames(out) <- cnames

######################################################
# Find cumulative return for any given fractional (f)
#######################################################
cr.list <- list()
i=1
for (i in 1:21) { 
  cr.rets <- ROC(out[,i],type = "discrete",na.pad=TRUE)
  cr.list[[i]] <- rbind(as.numeric(as.data.frame(Return.cumulative(xts(cr.rets, Sys.Date()-length(cr.rets):1)))))
}

# cbind output 
cr_out <- do.call(cbind,cr.list)
cnames <- f
colnames(cr_out) <- cnames
# Plot cumulative return 
cr_plot <- melt(cr_out)
cr_plot <- setDT(cr_plot, keep.rownames=TRUE)
cr_plot <- data.frame(cr_plot)
plot(cr_plot$Var2,cr_plot$value,type="o",col="red",ann=FALSE,xaxt="n")
title("Cumulative Return Vs Fractional (f)",xlab="Fractional (f)", ylab="Cumulative Return (%)")
axis(side = 1, at = f, las=2)

# Plot for final $ equity
#plot.df <- out[nrow(out),] # Extract last row of simulation
#plots <- melt(plot.df)
#plots <- setDT(plots, keep.rownames=TRUE)
#plots <- data.frame(plots)
# Plot
#plot(plots$rn,plots$value,type="o",col="red", ann=FALSE)
#title("Final Equity Vs Fractional (f)",xlab="Fractional (f)", ylab="Final Equity ($)")

################################################################################
# Find maximum drawdown for any given fractioanl (f)
################################################################################
dd <- data.frame(out)
cnames <- f
colnames(dd) <- cnames

# loop to pull max dd 
dd.list <- list()
i=1
for (i in 1:length(dd)) { 
 dd.rets <- ROC(dd[,i],type = "discrete",na.pad=TRUE)
  dd.list[[i]] <- rbind(as.numeric(as.data.frame(maxDrawdown(xts(dd.rets, Sys.Date()-length(dd.rets):1)))))
}

# Validation
#rets <- ROC(dd[,2],type = "discrete",na.pad=TRUE)
#maxDrawdown(xts(rets, Sys.Date()-length(rets):1))
#chart.Drawdown(xts(rets$rets, Sys.Date()-nrow(rets):1), legend.loc = NULL, colorset = (1:12))

# cbind output 
dd_out <- do.call(cbind,dd.list)
cnames <- f
colnames(dd_out) <- cnames

# Plot DD vs fractional (f)
  dd.plot <- melt(dd_out)
# Plot
plot(dd.plot$Var2,dd.plot$value,type="o",col="red", ann=FALSE,xaxt="n")
title("Maximum Drawdown Vs Fractional (f)",xlab="Fractional (f)", ylab="Maximum Draw Down (%)")
axis(side = 1, at = f, las=2)

# Combine Plots 
par(mfrow=c(2,2))
# Cumulative Return
plot(cr_plot$Var2,cr_plot$value,type="o",col="red", ann=FALSE, xaxt="n")
title("Cumulative Return Vs Fractional (f)",xlab="Fractional (f)", ylab="Cumulative Return (%)")
axis(side = 1, at = f, las=2)
# Maximum Draw down
plot(dd.plot$Var2,dd.plot$value,type="o",col="red", ann=FALSE,xaxt="n")
title("Maximum Drawdown Vs Fractional (f)",xlab="Fractional (f)", ylab="Maximum Draw Down (%)")
axis(side = 1, at = f, las=2)

# Find win / loss % of sample
pw <- sum(rets >0) / length(rets)
pl <- sum(rets <0)/ length(rets)
pw
pl

###############################################################
# Find Risk of ruin for any given fractional (f)
###############################################################
# Calculate daily returns
rets.list <- list()
mean.win.list <- list()
mean.loss.list <- list()
sd.ret.list <- list()
i=1
for (i in 1:21) { 
  rets.rets <- ROC(out[,i],type = "discrete",na.pad=TRUE)
  rets.list[[i]] <- rets.rets
  mean.win.list[[i]] <- mean(rets.rets[rets.rets>0],na.rm=TRUE)
  mean.loss.list[[i]] <- mean(rets.rets[rets.rets<0],na.rm=TRUE)
    sd.ret.list[[i]] <- sd(rets.rets,na.rm=TRUE)
}

# cbind output 
rets_out <- do.call(cbind,rets.list)
cnames <- f
colnames(rets_out) <- cnames

mean_win_out <- do.call(cbind,mean.win.list)
cnames <- f
colnames(mean_win_out) <- cnames

mean_loss_out <- do.call(cbind,mean.loss.list)
cnames <- f
colnames(mean_loss_out) <- cnames

sd_out <- do.call(cbind,mean.ret.list)
cnames <- f
colnames(sd_out) <- cnames

# Fixed fractional risk of ruin
# Ralph Vince 1990 (Portfolio Management Formulas : Mathematical Trading Methods for the Futures, Options, and Stock Markets)
risk_ruin.list <- list()
  i=1
q=2
g= 0.3   # Set maximum % loss willing to accept
for (i in 1:21) { 
  aw  = mean_win_out[,i]
  al  = mean_loss_out[,i]
  pw = pw
  p = .5 * (1+(z/a))
  z = (abs((aw/q)*pw)) - (abs(al/q)*(1-pw))
  a = ((pw * (aw/q)^2) + ((1-pw) * (al/q)^2)) ^(1/2)
  u = g/a
  risk_ruin.list[[i]] =  ((1-p)/p)^u *100
  }

# cbind output 
risk_ruin_out <- do.call(cbind,risk_ruin.list)
cnames <- f
colnames(risk_ruin_out) <- cnames

# Plot risk of ruin vs fractional (f)
rr.plot <- melt(risk_ruin_out)
rr.plot <- replace(rr.plot, is.na(rr.plot), 0)
#options(scipen = 0)
# Plot
plot(rr.plot$Var2,rr.plot$value,type="o",col="red", ann=FALSE,xaxt="n")
title("Risk Of Ruin Vs Fractional (f)",xlab="Fractional (f)", ylab="Risk of Ruin (%)")
axis(side = 1, at = f, las=2)

########################################################################################
# Find sharpe ratio for any given fractional (f)
################################################################################
sr.list <- list()
i=1
for (i in 1:21) { 
  sr.rets <- ROC(out[,i],type = "discrete",na.pad=TRUE)
  sr.list[[i]] <- rbind(as.numeric(as.data.frame(SharpeRatio.annualized(xts(sr.rets, Sys.Date()-length(sr.rets):1)))))
}

# cbind output 
sr_out <- do.call(cbind,sr.list)
cnames <- f
colnames(sr_out) <- cnames
# Plot cumulative return 
sr_plot <- melt(sr_out)
sr_plot <- setDT(sr_plot, keep.rownames=TRUE)
sr_plot <- data.frame(sr_plot)
plot(sr_plot$Var2,sr_plot$value,type="o",col="red",ann=FALSE,xaxt="n")
title("Sharpe Ratio Vs Fractional (f)",xlab="Fractional (f)", ylab="Sharpe Ratio")
axis(side = 1, at = f, las=2)

############################################################
# Find optimal F 
###########################################################
find.max <- which(cr_plot$value==max(cr_plot$value))
optimal.f <- cr_plot$Var2[find.max]
cat("Optimal f for this iteration = ", optimal.f)

########################################################################################################
# Highlight code and ctrl+r to run many iterations

 And the output:

Rplot273

With an optimal f of:

> cat("Optimal f = ", optimal.f)
Optimal f =  0.2

And for reference, the random sample winning / losing %:

> # Find win / loss % of sample
> pw <- sum(rets >0) / length(rets) # win
> pl <- sum(rets <0)/ length(rets)  # loss
> pw
[1] 0.5674603
> pl
[1] 0.4325397

For this particular trade sequence, we see that optimal f is 0.20. If we connect the dots for maximum draw down, 0.20 = max dd of 74% and a risk of ruin for a 30% draw down at 17.4%. One would select a lower optimal f, or abandon the strategy all together.

A few notes about the risk of ruin calculation. It was taken from Ralph Vince(1990) and is the R3 risk of ruin calculation as described in his book.

Next steps for the monte carlo simulator involve adding in $ PnL of back tested results. And run 1000’s of simulations. I will be working on this over the coming month.

Full code on my github.

Thanks for reading, any questions drop me a line.

Monthly Investing vs Yearly Investing – Effects on Average Share Cost

I invest in a long term S&P500 momentum strategy. I have been making monthly contributions and over the short term. The average trade cost seemed to be much higher than initial cost. I wanted to see if buying in bulk 1x a year was better than investing every month over a long run of history.

The calculation to work out the average trade cost is as follows:

Total $ amount invested / Total shares invested.

If we invest at each time increment:

# Invest - Avg Share Cost
# Rolling Calculation
prices <- c(12.56,13.45,11.10,10.09,12.3,16.7)
total.to.invest <- c(10000,7000,1600,6000,3000,12000)
table <- data.frame(prices,total.to.invest)
table$total.shares <- apply(table[,c('prices','total.to.invest')], 1, function(x) { round((x[2]/x[1])) } )
table$total.shares.sum <- cumsum(table$total.shares)
table$total.to.invest.sum <- cumsum(table$total.to.invest)
table$roll.cost.avg <- apply(table[,c('total.shares.sum','total.to.invest.sum')], 1, function(x) { (x[2]/x[1]) } ) > head(table)
  prices total.to.invest total.shares total.shares.sum total.to.invest.sum roll.cost.avg
1  12.56           10000          796              796               10000      12.56281
2  13.45            7000          520             1316               17000      12.91793
3  11.10            1600          144             1460               18600      12.73973
4  10.09            6000          595             2055               24600      11.97080
5  12.30            3000          244             2299               27600      12.00522
6  16.70           12000          719             3018               39600      13.12127

This is on a rolling basis, we see that the average cost is higher than our initial entry which will be the case with an up trending series.

If we invest less at the beginning and more at the end:

# Invest less at beginning, Invest more later
prices <- c(12.56,13.45,11.10,10.09,12.3,16.7)
total.to.invest <- c(100,700,1600,6000,3000,120000)
table <- data.frame(prices,total.to.invest)
table$total.shares <- apply(table[,c('prices','total.to.invest')], 1, function(x) { round((x[2]/x[1])) } )
table$total.shares.sum <- cumsum(table$total.shares)
table$total.to.invest.sum <- cumsum(table$total.to.invest)
table$roll.cost.avg <- apply(table[,c('total.shares.sum','total.to.invest.sum')], 1, function(x) { (x[2]/x[1]) } )

  prices total.to.invest total.shares total.shares.sum total.to.invest.sum roll.cost.avg
1  12.56             100            8                8                 100      12.50000
2  13.45             700           52               60                 800      13.33333
3  11.10            1600          144              204                2400      11.76471
4  10.09            6000          595              799                8400      10.51314
5  12.30            3000          244             1043               11400      10.93001
6  16.70          120000         7186             8229              131400      15.96792

As we invested a higher amount of capital later than earlier, we brought our average cost up and closer to the current price. Its a weighted average after all.

Next we simulate adding a fixed amount of capital every month and a fixed amount of capital every year.

Two investment capitals:

# Enter monthly and yearly capital investments
capital.invest.e.month <- 10000/12 # Invest 10,000 a year, split into months
bulk.invest.1.year <- 10000

We invest 10,000 per year divided by 12 months, 833.33 each month and 10,000 every year buying the S&P500. Orders are placed at the start of every month and yearly orders at the start of every year.

We then compute a rolling average cost, the code to do this:

# Bulk Buy 1x a year Vs investing every month
# Andrew Bannerman 1.3.2018

require(xts)
require(data.table)
require(ggplot2)
require(lubridate)
require(magrittr)
require(scales)
require(reshape2)
require(PerformanceAnalytics)
require(dplyr)
require(TTR)

# Download SPX data
require(quantmod)
startdate<- "1930-01-01"
SPX <- getSymbols("^GSPC",from=startdate,auto.assign=FALSE)
SPX <-  data.frame(Date=index(SPX), coredata(SPX)) # Change XTS to data frame and retain Date column

# Add day of month column 
# This is a helper column for creating buy/sell rules
months <- SPX %>% dplyr::mutate(month = lubridate::month(Date)) %>% group_by(month) 
days <- SPX %>% dplyr::mutate(day = lubridate::day(Date)) %>% group_by(day) 
years <- SPX %>% dplyr::mutate(year = lubridate::year(Date)) %>% group_by(year) 
df <- data.frame(SPX,month=months$month,day=days$day,year=years$year)

# Subset df by date 
#df <- subset(df, Date >= as.Date("2009-01-01"))
head(df$Date)

# Enter monthly and yearly capital investments
capital.invest.e.month <- 10000/12 # Invest 10,000 a year, split into months
bulk.invest.1.year <- 10000

# Simulate buying every month
output <- df %>%
  dplyr::mutate(RunID = data.table::rleid(month)) %>%
  group_by(RunID) %>%
  mutate(ID.No = row_number()) %>%
  dplyr::mutate(total.shares.months = ifelse(ID.No == 1,first(capital.invest.e.month) / first(GSPC.Adjusted),0)) %>%  # Divide total purchased by cost price for total share
  dplyr::mutate(total.cost.months = ifelse(ID.No == 1,first(total.shares.months) * first(GSPC.Adjusted),0)) %>%  # Divide total purchased by cost price for total share
  ungroup() %>%
  select(-RunID)

df <- data.frame(output)
head(df$Date)
# Simulate buying 1x share start of each month
#output <- df %>%
#dplyr::mutate(RunID = data.table::rleid(month)) %>%
#group_by(RunID) %>%
#  mutate(ID.No = row_number()) %>%
#  dplyr::mutate(first.month.total.cost = ifelse(ID.No == 1,first(GSPC.Adjusted) * 1,0)) %>% # Own 1x share at close price change 1 to 2 for more..
#  dplyr::mutate(total.shares = ifelse(ID.No == 1,first(first.month.total.cost) / first(GSPC.Adjusted),0)) %>%  # Divide total purchased by cost price for total share
#    ungroup() %>%
#  select(-RunID)

# Simulate bulk investing 1x a year
output <- df %>%
  dplyr::mutate(RunID = data.table::rleid(year)) %>%
  group_by(RunID) %>%
  mutate(ID.No = row_number()) %>%
  dplyr::mutate(total.shares.years = ifelse(ID.No == 1,round(first(bulk.invest.1.year) / first(GSPC.Adjusted)),0)) %>%  # Divide total purchased by cost price for total share
  dplyr::mutate(total.cost.years = ifelse(ID.No == 1,first(total.shares.years) * first(GSPC.Adjusted),0)) %>%  # Divide total purchased by cost price for total share
  ungroup() %>%
  select(-RunID)
# output data frame
df <- data.frame(output)

# Calculate average cost per share 
# sum first.month.total cost / sum of total shares bought
month.invest.avg.cost <- sum(df$total.cost.months) / sum(df$total.shares.months)
year.invest.avg.cost <- sum(df$total.cost.years) / sum(df$total.shares.years)
find.first.price <- head(df$GSPC.Adjusted,1)
find.last.price <- tail(df$GSPC.Adjusted,1)

# Subset for month avg cost
# index
df$index <- seq(1:nrow(df))
df.month <- subset(df,total.shares.months >0)
# totals 
df.month$total.shares.months.sum <- cumsum(df.month$total.shares.months)
df.month$total.cost.months.sum <- cumsum(df.month$total.cost.months)
df.month$month.roll.avg.cost <- apply(df.month[,c('total.cost.months.sum','total.shares.months.sum')], 1, function(x) { (x[1]/x[2]) } )
head(df.month$Date)
# Join original df 
df.join.month <- full_join(df, df.month, by = c("Date" = "Date"))
df.join.month$month.roll.avg.cost <- na.locf(df.join.month$month.roll.avg.cost)
head(df.join.month$Date)

# Subset for year avg year cost
df.year <- subset(df,total.shares.years >0)
# totals 
df.year$year.total.shares.years.sum <- cumsum(df.year$total.shares.years)
df.year$year.total.cost.years.sum <- cumsum(df.year$total.cost.years)
df.year$year.roll.avg.cost <- apply(df.year[,c('year.total.cost.years.sum','year.total.shares.years.sum')], 1, function(x) { (x[1]/x[2]) } )

# Join original df 
df.join.year <- full_join(df, df.year, by = c("Date" = "Date"))
df.join.year$year.roll.avg.cost <- na.locf(df.join.year$year.roll.avg.cost)
tail(plot.df,1000)
# Plot 
plot.df  <- data.frame("Date" = df.join.month$Date, "Rolling Average Cost Monthly" = df.join.month$month.roll.avg.cost,"Rolling Average Cost Yeary" = df.join.year$year.roll.avg.cost, "SPX Adjusted Close" = df$GSPC.Adjusted)
# Melt for plotting
plot.df <- melt(plot.df, id.vars="Date")
ggplot(plot.df, aes(x=Date, y=value, fill=variable)) +
  geom_bar(stat='identity', position='dodge')+  
  ggtitle("Average Share Cost - Investing Monthly Vs 1x Bulk Investing Each Year",subtitle="SPX 1950 To Present")+
  labs(x="Date",y="SPX Close Price")+
  theme(plot.title = element_text(hjust=0.5),plot.subtitle =element_text(hjust=0.5))

And the output vs SPX price:

Rplot238
Average Share Cost – Invest start of every month and start of every year – 1950 To Present

From 1950 to present we see we lock in a better average cost pre dot com up trend.

Rplot234
Average Share Cost – Invest start of every month and start of every year – 2000 To Present

If we started investing at the top of the dot com bubble. We see the effects of averaging down. Both bear markets are somewhat averaged out so to speak. Investing at the top of the market, buy and hold gains were somewhat stagnant for an entire decade.

Rplot235
Average Share Cost – Invest start of every month and start of every year – 2009 To Present

There is not much variation between electing to invest every month vs every year. The difference is negligible over the long haul. It is however, notable that investing incrementally affects the total % return. If we bulk invest 10,000 at one price and it rises 10%. We have +$1000. If we dollar cost average, we invest 20,000 and dilute the initial cost, we have a 5% gain instead of a 10% gain. However, 5% gain on 20,000 is 1000.

Raising the cost per share as price trends does affect the amount of cushion one has. As in this example, this is conditionally investing in the momentum strategy:

Rplot252

This is bulk investing $50,000 at a new leg and adding a fixed amount every month / year. We see our average cost chases the current price. One way one could avoid is bulk invest at the start of each new leg without adding any new funds. Only large contributions are made when a new momentum trade is entered, which is in this case is every other year for my S&P500 strategy. More to come.

Thanks for reading!

Full code can be found on my github.

Testing A Strategy For Robustness With Time Series Cross Validation

In two previous posts we attempted to increase our sample size for back testing using time series bootstrapping. One method used the maximum entropy bootstrap for time series using the meboot R package, the post can be found here. The second method involved using block bootstrap with sample with replacement (one may edit code for without), by any means the post can be found here.

After questioning the validity of the above procedures. We now focus on applying cross validation techniques on our time series data. Instead of optimizing over the full data set which can lead to over fitting. We split the data into many train (in-sample) and test (out-sample) periods. A good explanation can be found here.

In this example. We split each train set by a total number of 252 rows, which equates to 1 year of trading data. Each test period is split into a total number of 126 rows, which equates to 6 months of trading data. For this example we will use the VXV / VXMT sma ratio strategy. As a reminder the rules are as follows:

  1. long VXX when VXV/VXMT ratio is over 1 & the VXV/VXMT ratio is over an ‘n’ period sma of the VXV/VXMT ratio.
  2. long XIV when VXV/VXMT ratio is less than 1 & the VXV/VXMT ratio is below an ‘n’ period sma of the VXV/VXMT ratio.

The free parameters which can be optimized are the VXV/VXMT ratio value and also the VXV/VXMT simple moving average look back period.

We optimize the vxv/vxmt ratio sma look back period over a sma length of 2:170. Each sma ratio is optimized over each train set and ranked per their sharpe ratio. Those that produced the highest sharpe ratio were used in the next test set period.

An example of the procedure: after optimizing the sma lookback on train set 1, from rows 1:253, [1+252 = 253] we use the sma look back which produced the highest sharpe ratio in the next test set 1, rows 253:378 (+125 rows, inclusive start row). After this test set period. We move the whole train set window up by +126 rows, making train set 2 range from rows, 127 to 379 (+252 row width) the next test set 1 then begins on row 379 to 504 (+126 row width, inclusive start row). After this test set, we then move the train set 3 window up another 126 rows, so on and so forth until we reach the end of the series.

First before splitting the train and test sets, we wish to continuously calculate the VXV/VXMT ratio and all VXV/VXMT ratio SMA’s. This is so that we have the long run history and we do not try and calculate anything on each smaller train/test block.

Next we run the back test over all train set periods and record the highest sharpe ratio in each train period, the code that achieves this:


# Train / Test Set VXV / VXMT Strategy 
# Andrew Bannerman 

################################################
# Procedure 
# 1. Split data into train / test sets 
# 2. Select 1 year look back for each train set (252 days)
# 3. Select 6 months test set (126)
# 4. Re-calibrate on a moving window through entire time series 
#################################################

# Required Packages 
library(readxl)
require(xts)
require(data.table)
require(ggplot2)
require(RColorBrewer)
require(lubridate)
require(magrittr)
require(scales)
require(reshape2)
require(PerformanceAnalytics)
require(dplyr)
require(TTR)

#################################################
# 1. Load Data 
#################################################
# Load Syntehtic and join to alpha vantage adjusted prices 
# Load synthetic VXX and XIV data 
synth <- read_excel("D:/R Projects/Final Scripts/VIX_term_structure/vix-funds-models-no-formulas.xls", col_names = TRUE)
synth1 <- read_excel("D:/R Projects/Final Scripts/VIX_term_structure/vix-mt-funds-models-no-formulas.xls", col_names = TRUE)
synth <- as.data.frame(synth)
synth1 <- as.data.frame(synth1)
# Extract synthetic series 
vxx.synth <- data.frame(synth$Date, synth$'VXX calc')
xiv.synth <- data.frame(synth$Date, synth$'XIV calc')
ziv.synth <- data.frame(synth1$Date, synth1$'ZIV calc')
vxz.synth <- data.frame(synth1$Date, synth1$'VXZ calc')
colnames(vxx.synth)[1] <- "Date"
colnames(vxx.synth)[2] <- "vxx_close"
colnames(xiv.synth)[1] <- "Date"
colnames(xiv.synth)[2] <- "xiv_close"
colnames(ziv.synth)[1] <- "Date"
colnames(ziv.synth)[2] <- "ziv_close"
colnames(vxz.synth)[1] <- "Date"
colnames(vxz.synth)[2] <- "vxz_close"
vxx.synth$Date <- ymd(vxx.synth$Date)
xiv.synth$Date <- ymd(xiv.synth$Date)
ziv.synth$Date <- ymd(ziv.synth$Date)
vxz.synth$Date <- ymd(vxz.synth$Date)

# Download data from alphavantage
VXX <- fread("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=VXX&outputsize=full&apikey=your_api_key&datatype=csv") #fread() data.table for downloading directly to a data frame
VXX$timestamp <- ymd(VXX$timestamp)   #Lubridate to change character date to date format
VXX <- arrange(VXX,timestamp)   #dplyr to sort data frame by date ascending order
colnames(VXX)[1] <- "Date"
VXX$Date <- ymd(VXX$Date)
VXX <- as.data.frame(VXX)

XIV <- fread("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=XIV&outputsize=full&apikey=your_api_key&datatype=csv") #fread() data.table for downloading directly to a data frame
XIV$timestamp <- ymd(XIV$timestamp)   #Lubridate to change character date to date format
XIV <- arrange(XIV,timestamp)   #dplyr to sort data frame by date ascending order
colnames(XIV)[1] <- "Date"
XIV$Date <- ymd(XIV$Date)
XIV <- as.data.frame(XIV)

# Join sythentic data to alpha vantage 
vxx.synth <- subset(vxx.synth, Date <= as.POSIXct("2009-01-29"))
xiv.synth <- subset(xiv.synth, Date <= as.POSIXct("2010-11-29"))
# Subset only date and close from alpha vantage data 
VXX <- VXX[ -c(2:5, 7:9) ]  # subset adjusted close
XIV <- XIV[ -c(2:5, 7:9) ]  # subset adjusted close
colnames(VXX)[2] <- "vxx_close"
colnames(XIV)[2] <- "xiv_close"

# row bind 
VXX <- rbind(vxx.synth,VXX)
XIV <- rbind(xiv.synth,XIV)
df <- cbind(VXX,XIV)

# Download VXV and VXMT data from CBOE website
# Download VXV Data From CBOE website 
VXV_cboe <- fread("http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/vix3mdailyprices.csv")
VXV_cboe <- as.data.frame(VXV_cboe)
VXV_cboe <- VXV_cboe[3:nrow(VXV_cboe), ]
colnames(VXV_cboe)[1] = "Date"
colnames(VXV_cboe)[2] = "vxv_cboe_open"
colnames(VXV_cboe)[3] = "vxv_cboe_high"
colnames(VXV_cboe)[4] = "vxv_cboe_low"
colnames(VXV_cboe)[5] = "vxv_cboe_close"
VXV_cboe$Date <- mdy(VXV_cboe$Date)
cols <-c(2:5)
VXV_cboe[,cols] %<>% lapply(function(x) as.numeric(as.character(x)))

# Download VXMT Data from CBOE website
VXMT_cboe <- fread("http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/vxmtdailyprices.csv")
VXMT_cboe <- as.data.frame(VXMT_cboe)
VXMT_cboe <- VXMT_cboe[3:nrow(VXMT_cboe), ]
colnames(VXMT_cboe)[1] = "Date"
colnames(VXMT_cboe)[2] = "vxmt_cboe_open"
colnames(VXMT_cboe)[3] = "vxmt_cboe_high"
colnames(VXMT_cboe)[4] = "vxmt_cboe_low"
colnames(VXMT_cboe)[5] = "vxmt_cboe_close"
VXMT_cboe$Date <- mdy(VXMT_cboe$Date)
cols <-c(2:5)
VXMT_cboe[,cols] %<>% lapply(function(x) as.numeric(as.character(x)))

# Join VIX, VIX3m (VXV) and VXMT CBOE data to ETF df
cboe.df <- merge(VIX_cboe,VXV_cboe, by="Date")
cboe.df <- merge(cboe.df,VXMT_cboe, by="Date")
start_date <- cboe.df$Date[1]
# Subset Strategy To Start At VXMT Date 
df <- subset(df, Date >= as.POSIXct(start_date) )
df <- df[,c(-3)] # Drop unused dates
df <- full_join(df, cboe.df, by = c("Date" = "Date"))

####################################################################
# Create VXV VXMT Ratio and ratio SMA 
###################################################################
# VXV / VXMT Ratio 
df$vxv.vxmt.ratio <- df$vxv_cboe_close / df$vxmt_cboe_close
# Calculate Close-to-Close returns
df$vxx.close.ret <- ROC(df$vxx_close, type = c("discrete"))
df$xiv.close.ret <- ROC(df$xiv_close, type = c("discrete"))

# Calculate SMA of VXV , VXMT ratio 
sma.lookback <- 2:170
getSMA <- function(y) {
  function(x) {
    SMA(df[,"vxv.vxmt.ratio"], sma.lookback[i])    # Calls TTR package to create SMA
  }
}
# Create a matrix to put the SMAs in
sma.matrix <- matrix(nrow=nrow(df), ncol=0)
# Loop for filling it
for (i in 1:length(sma.lookback)) {
  sma.matrix <- cbind(sma.matrix, getSMA(i)(x))
}
# Rename columns
colnames(sma.matrix) <- sapply(sma.lookback, function(n)paste("ratio.sma.n", n, sep=""))
# Bind to existing dataframe
df <-  cbind(df, sma.matrix)

####################################################################
# Split Data Into Train and Test Sets
# 252 days train , 126 days test 
##################################################################
# Set row numbers for splitting to train and test sets
total.row <- nrow(df) # total rows
train.length <- 252
test.length <- 125
train.list <- list()
test.list <- list()
# Set start and ending rows for subsetting by row
train_num_set <- seq(1, nrow(df), by=test.length+1)
test_num_set <- seq(train.length+1, nrow(df), by=test.length+1)
train.list<- lapply(train_num_set, function(i) df[c(i:(i+train.length)),])
test.list<- lapply(test_num_set, function(i) df[c(i:(i+test.length)),])


####################################################################
# Run Optimizations over all training windows
# Select optimal sharpe ratio 
####################################################################
train.set.results <- data.frame()
i=1
train.loop.length[i]
train.loop.length <- rep(1:length(test.list),each=length(sma.lookback)) # run through the dfs the same length as the number of smas
sma.loop.length <- rep(2:length(sma.lookback),length(train.list)) # run each sma over every df
train.xts <- list()
# Loop for running all smas through all train sets, note need to wrap train.loop.length and sma.loop.lenght in [[i]]
for (i in 1:length(train.loop.length)) {
  tryCatch({
    df.num <- train.loop.length[[i]]
  # Enter buy / sell rules
  train.temp.df <- data.frame(Date=train.list[[df.num]]$Date)
  train.temp.df$vxx.signal <- ifelse(train.list[[df.num]]$vxv.vxmt.ratio > 1 & train.list[[df.num]]$vxv.vxmt.ratio > train.list[[df.num]][,paste0("ratio.sma.n", sma.loop.length[[i]])], 1,0)
  train.temp.df$xiv.signal <- ifelse(train.list[[df.num]]$vxv.vxmt.ratio < 1 & train.list[[df.num]]$vxv.vxmt.ratio < train.list[[df.num]][,paste0("ratio.sma.n", sma.loop.length[[i]])], 1,0)
  
  # lag signal by two forward days
  # CBOE data is available next day
  train.temp.df$vxx.signal <- lag(train.temp.df$vxx.signal,2) # Note k=1 implies a move *forward*
  train.temp.df$xiv.signal <- lag(train.temp.df$xiv.signal,2) # Note k=1 implies a move *forward*
  
  train.temp.df[is.na(train.temp.df)] <- as.Date(0)  # Set NA to 0
  
  # Calculate equity curves
  train.temp.df$vxx.signal.ret <-   train.temp.df$vxx.signal *  train.list[[df.num]]$vxx.close.ret
  train.temp.df$xiv.signal.ret <-   train.temp.df$xiv.signal *  train.list[[df.num]]$xiv.close.ret
  
  # Combine signals 
   train.temp.df$total.signal.ret <-  train.temp.df$vxx.signal.ret +  train.temp.df$xiv.signal.ret
  
  # Pull select columns from data frame to make XTS whilst retaining formats
  xts1 = xts(train.temp.df$vxx.signal.ret, order.by=as.Date(train.temp.df$Date, format="%y-%m-%d"))
  xts2 = xts(train.temp.df$xiv.signal.ret, order.by=as.Date(train.temp.df$Date, format="%y/%m/%d")) 
  xts3 = xts(train.temp.df$total.signal.ret, order.by=as.Date(train.temp.df$Date, format="%y/%m/%d")) 
  
  # Join XTS together
  compare <- cbind(xts1,xts2,xts3)
  
  # Use the PerformanceAnalytics package for trade statistics
  require(PerformanceAnalytics)
  colnames(compare) <- c("vxx","xiv","combined")
  #charts.PerformanceSummary(compare,main="Long when current month is higher than previous 12 month", wealth.index=TRUE, colorset=rainbow12equal)
  #performance.table <- rbind(table.AnnualizedReturns(compare),maxDrawdown(compare), CalmarRatio(compare),table.DownsideRisk(compare))
  #drawdown.table <- rbind(table.Drawdowns(xts3))
  #dev.off()  
  # logRets <- log(cumprod(1+compare))
  # chart.TimeSeries(logRets, legend.loc='topleft', colorset=rainbow12equal,main="Log Returns")
  
  #print(performance.table)
  #print(drawdown.table)
  cum.ret <- Return.cumulative(xts3, geometric = TRUE)
  annualized <- Return.annualized(xts3, scale = NA, geometric = TRUE)
  dd <- maxDrawdown(xts3)
  sharpe <- SharpeRatio.annualized(xts3, Rf = 0, scale = NA, geometric = TRUE)
  
  
  # Output data
  temp <- data.frame("Annualized Return" = annualized,"Annualized Sharpe" = sharpe,"Cumulative Return" = cum.ret,"Maximum Draw Down" =dd, ID=as.numeric(sma.loop.length[[i]]))
  rownames(temp) <- paste0("train.set.",train.loop.length[[i]],"_sma_",sma.loop.length[[i]])
  train.set.results <- rbind.data.frame(train.set.results,temp)
 compare <- data.frame(compare)
 compare <-  data.frame(date=index(compare), coredata(compare))
 train.xts[[i]] <- cbind.data.frame(compare)
  
  ptm0 <- proc.time()
  Sys.sleep(0.1)  
  ptm1=proc.time() - ptm0
  time=as.numeric(ptm1[3])
  cat('\n','Iteration Train Set',train.loop.length[i],'sma',sma.loop.length[i],'took', time, "seconds to complete")
}, error = function(e) { print(paste("i =", i, "failed:")) })
}


# Seperate train set results from data frame output
train.set.results <- setDT(train.set.results, keep.rownames = TRUE)[] # Set row names
train.set.results$ID <- as.numeric(train.set.results$ID)
# Subset test set results
l <- nrow(train.set.results)
train.result.list <- list()
output_num <- seq(1, l, by=length(sma.lookback))
train.result.list<- lapply(output_num, function(i) train.set.results[c(i:(i+length(sma.lookback)-1)),])

# Extract highest performance metric from each data frame within list
# Sharpe Ratio in this case
highest.sharpe <- list()
i=1
for (i in 1:length(train.result.list)) {  # for every data frame inside results list
  temp <- train.result.list[[i]][order(train.result.list[[i]]$Annualized.Sharpe),]
  highest.sharpe[[i]] <- as.data.frame(tail(temp[,6],1))
}

# Save performance metric
sharpe.df <- do.call(cbind,highest.sharpe)
sharpe.vec <- paste(sharpe.df)
sharpe.vec <- as.numeric(sharpe.vec)
cat("Optimal In-Sample Train Set Parameters:SMA of VXV/VXMT Ratio - Optimal SMAs =",sharpe.vec)

###### Sharpe Optimization Complete ##########

As we can see, there is a large degree of movement in the optimal sma look back period:

cat("Optimal In-Sample Train Set Parameters:SMA of VXV/VXMT Ratio - Optimal SMAs =",sharpe.vec)
Optimal In-Sample Train Set Parameters:SMA of VXV/VXMT Ratio - Optimal SMAs = 35 108 139 138 135 65 63 63 145 2 18 23 21 32 4 13 24 66 136 2

For train set 1, optimal sma of 35, train set 2, optimal sma of 108, train set 3, optimal sma of 139 etc..

These smas produced the highest sharpe ratios during in-sample training. We then use these sma parameters for each out of sample test set. Data that the model has never seen before. The code to run optimized parameters over each test test period:

#################################################################
# Run Optimal Parameters over all train sets 
#################################################################

test.set.results <- data.frame()
test.xts <- list()
test.loop.length <- rep(1:length(test.list),1) # run through the all df's 1x 
optimal.sma <- sharpe.vec # run each sma over every df
i=1
################### Manual Intervention ############################ 
# Remove nas on last test data frame inside list (Note this willl change adjust as needed)
#######################################################################
length.test.set <- length(test.list)
tail(test.list[[length.test.set]])
nrow.test.list <- NROW(test.list[[length.test.set]])
test.list[[length.test.set]] <- test.list[[length.test.set]][1:((nrow.test.list)-5),]
# Loop for running all smas through all train sets, note need to wrap train.loop.length and sma.loop.lenght in [[i]]
for (i in 1:length(test.loop.length )) {
  tryCatch({
    df.num <- test.loop.length[i]
    # Enter buy / sell rules
    test.temp.df <- data.frame(Date=test.list[[df.num]]$Date)
    test.temp.df$vxx.signal <- ifelse(test.list[[df.num]]$vxv.vxmt.ratio > 1 & test.list[[df.num]]$vxv.vxmt.ratio > test.list[[df.num]][,paste0("ratio.sma.n", optimal.sma[[i]])], 1,0)
    test.temp.df$xiv.signal <- ifelse(test.list[[df.num]]$vxv.vxmt.ratio < 1 & test.list[[df.num]]$vxv.vxmt.ratio < test.list[[df.num]][,paste0("ratio.sma.n", optimal.sma[[i]])], 1,0)
    
    # lag signal by two forward days
    # CBOE data is available next day
    test.temp.df$vxx.signal <- lag(test.temp.df$vxx.signal,2) # Note k=1 implies a move *forward*
    test.temp.df$xiv.signal <- lag(test.temp.df$xiv.signal,2) # Note k=1 implies a move *forward*
    
    test.temp.df[is.na(test.temp.df)] <- as.Date(0)  # Set NA to 0
    
    # Calculate equity curves
    test.temp.df$vxx.signal.ret <-   test.temp.df$vxx.signal *  test.list[[df.num]]$vxx.close.ret
    test.temp.df$xiv.signal.ret <-   test.temp.df$xiv.signal *  test.list[[df.num]]$xiv.close.ret
    
    # Combine signals 
    test.temp.df$total.signal.ret <-  test.temp.df$vxx.signal.ret +  test.temp.df$xiv.signal.ret
    
    # Pull select columns from data frame to make XTS whilst retaining formats
    xts1 = xts(test.temp.df$vxx.signal.ret, order.by=as.Date(test.temp.df$Date, format="%y-%m-%d"))
    xts2 = xts(test.temp.df$xiv.signal.ret, order.by=as.Date(test.temp.df$Date, format="%y/%m/%d")) 
    xts3 = xts(test.temp.df$total.signal.ret, order.by=as.Date(test.temp.df$Date, format="%y/%m/%d")) 
    
    # Join XTS together
    compare <- cbind(xts1,xts2,xts3)
    
    # Use the PerformanceAnalytics package for trade statistics
    require(PerformanceAnalytics)
    colnames(compare) <- c("vxx","xiv","combined")
    charts.PerformanceSummary(compare,main="Long when current month is higher than previous 12 month", wealth.index=TRUE, colorset=rainbow12equal)
    #performance.table <- rbind(table.AnnualizedReturns(compare),maxDrawdown(compare), CalmarRatio(compare),table.DownsideRisk(compare))
    #drawdown.table <- rbind(table.Drawdowns(xts3))
    #dev.off()  
    # logRets <- log(cumprod(1+compare))
    # chart.TimeSeries(logRets, legend.loc='topleft', colorset=rainbow12equal,main="Log Returns")
    
    #print(performance.table)
    #print(drawdown.table)
    cum.ret <- Return.cumulative(xts3, geometric = TRUE)
    annualized <- Return.annualized(xts3, scale = NA, geometric = TRUE)
    dd <- maxDrawdown(xts3)
    sharpe <- SharpeRatio.annualized(xts3, Rf = 0, scale = NA, geometric = TRUE)
    
    
    # Save test set results
    temp <- data.frame("Annualized Return" = annualized,"Annualized Sharpe" = sharpe,"Cumulative Return" = cum.ret,"Maximum Draw Down" =dd, ID=as.numeric(optimal.sma[[i]]))
    test.set.results <- rbind.data.frame(test.set.results)
    test.xts[[i]] <- cbind.data.frame(compare)
    
    ptm0 <- proc.time()
    Sys.sleep(0.1)  
    ptm1=proc.time() - ptm0
    time=as.numeric(ptm1[3])
    cat('\n','Iteration Test Set',test.loop.length[i],'optimal.sma',optimal.sma[i],'took', time, "seconds to complete")
  }, error = function(e) { print(paste("i =", i, "failed:")) })
}

# Extract xts3 cumulative returns 
cum.rets <- do.call(rbind,test.xts)
cum.rets <- setDT(cum.rets, keep.rownames = TRUE)[] # Set row names
colnames(cum.rets)[1] <- "Date"
cum.rets[is.na(cum.rets)] <- 0
cum.rets$Date <- ymd(cum.rets$Date)
cum.rets.xts <- xts(cum.rets$combined, order.by=as.Date(cum.rets$Date, format="%y-%m-%d")) 

# Summary Statistic on final walk forward curve 
charts.PerformanceSummary(cum.rets,main="Long when current month is higher than previous 12 month", wealth.index=TRUE, colorset=rainbow12equal)
table.AnnualizedReturns(cum.rets.xts)
maxDrawdown(cum.rets.xts)
CalmarRatio(cum.rets.xts)
table.DownsideRisk(cum.rets.xts)
drawdown.table <- rbind(table.Drawdowns(cum.rets.xts))
drawdown.table 

Here are some performance metrics for all test set periods:

 > table.AnnualizedReturns(cum.rets.xts)

Annualized Return         0.6797
Annualized Std Dev        0.4312
Annualized Sharpe (Rf=0%) 1.5765
> maxDrawdown(cum.rets.xts)
[1] 0.4312712
> CalmarRatio(cum.rets.xts)
                 [,1]
Calmar Ratio 1.576093
> table.DownsideRisk(cum.rets.xts)

Semi Deviation                 0.0192
Gain Deviation                 0.0235
Loss Deviation                 0.0266
Downside Deviation (MAR=210%)  0.0220
Downside Deviation (Rf=0%)     0.0183
Downside Deviation (0%)        0.0183
Maximum Drawdown               0.4313
Historical VaR (95%)          -0.0409
Historical ES (95%)           -0.0677
Modified VaR (95%)            -0.0385
Modified ES (95%)             -0.0621

The strategy reports an annualized return of 68% for all out of sample test periods with an annualized sharpe ratio of 1.57. Note that is data from VXMT inception, 2008 to present day.

Next we can validate our code by creating plots of the test set equity curve and plot the train and test set windows:

# Extract train and test dates for plotting 
# Train Dates
train.dateslist <- list()
train.dates.loop.length <- rep(1:length(train.list),each=1)
temp <- data.frame()
i=1
for (i in 1:length(train.dates.loop.length)) {
  temp <- data.frame("ID"=paste("train.id",train.dates.loop.length[[i]]))
  date <- train.list[[i]]$Date
  train.dateslist[[i]] <- cbind("Date" = date,ID=temp)

# Test Dates
test.dateslist <- list()
test.dates.loop.length <- rep(1:length(test.list),each=1)
i=1
for (i in 1:length(test.dates.loop.length)) {
  temp <- data.frame("ID"=paste("test.id",test.dates.loop.length[[i]]))
  date <- test.list[[i]]$Date
  test.dateslist[[i]] <- cbind("Date" = date,ID=temp)
  }

# Loop to extract start and end dates of each train / test segment 
train.dates.start <- list()
train.dates.end <- list()
for (i in 1:length(train.dateslist)) {
train.dates.start[[i]] <- train.dateslist[[i]]$Date[1]
train.dates.end[[i]] <- tail(train.dateslist[[i]]$Date,1)
}
test.dates.start <- list()
test.dates.end <- list()
for (i in 1:length(test.dateslist)) {
  test.dates.start[[i]] <- test.dateslist[[i]]$Date[1]
  test.dates.end[[i]] <- tail(test.dateslist[[i]]$Date,1)
}

# Plot train and test starting / ending periods
  ggplot()+
    theme_bw()+
    scale_y_continuous(expand = c(0, 0),breaks = round(seq(min(1), max(length(test.dateslist)), by = 1)))+ # set y axis to same amount as total train / test sample periosd
  theme(legend.position = "none")+
  theme(plot.title = element_text(hjust=0.5),plot.subtitle =element_text(hjust=0.5))+
  ggtitle("Train And Test Windows",subtitle=paste("Train Width =",train.length,"Days, Test Width =",test.length,"Days"))+
    geom_rect(aes(xmin=as.Date(train.dates.start[[1]]),xmax=as.Date(train.dates.end[[1]]),ymin=1, ymax=2),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[2]]),xmax=as.Date(train.dates.end[[2]]),ymin=2, ymax=3),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[3]]),xmax=as.Date(train.dates.end[[3]]),ymin=3, ymax=4),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[4]]),xmax=as.Date(train.dates.end[[4]]),ymin=4, ymax=5),fill="#0072B2",alpha=.8)+    
    geom_rect(aes(xmin=as.Date(train.dates.start[[5]]),xmax=as.Date(train.dates.end[[5]]),ymin=5, ymax=6),fill="#0072B2",alpha=.8)+    
    geom_rect(aes(xmin=as.Date(train.dates.start[[6]]),xmax=as.Date(train.dates.end[[6]]),ymin=6, ymax=7),fill="#0072B2",alpha=.8)+    
    geom_rect(aes(xmin=as.Date(train.dates.start[[7]]),xmax=as.Date(train.dates.end[[7]]),ymin=7, ymax=8),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[8]]),xmax=as.Date(train.dates.end[[8]]),ymin=8, ymax=9),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[9]]),xmax=as.Date(train.dates.end[[9]]),ymin=9, ymax=10),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[10]]),xmax=as.Date(train.dates.end[[10]]),ymin=10, ymax=11),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[11]]),xmax=as.Date(train.dates.end[[11]]),ymin=11, ymax=12),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[12]]),xmax=as.Date(train.dates.end[[12]]),ymin=12, ymax=13),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[13]]),xmax=as.Date(train.dates.end[[13]]),ymin=13, ymax=14),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[14]]),xmax=as.Date(train.dates.end[[14]]),ymin=14, ymax=15),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[15]]),xmax=as.Date(train.dates.end[[15]]),ymin=15, ymax=16),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[16]]),xmax=as.Date(train.dates.end[[16]]),ymin=16, ymax=17),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[17]]),xmax=as.Date(train.dates.end[[17]]),ymin=17, ymax=18),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(train.dates.start[[18]]),xmax=as.Date(train.dates.end[[18]]),ymin=18, ymax=19),fill="#0072B2",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[1]]),xmax=as.Date(test.dates.end[[1]]),ymin=1, ymax=2),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[2]]),xmax=as.Date(test.dates.end[[2]]),ymin=2, ymax=3),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[3]]),xmax=as.Date(test.dates.end[[3]]),ymin=3, ymax=4),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[4]]),xmax=as.Date(test.dates.end[[4]]),ymin=4, ymax=5),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[5]]),xmax=as.Date(test.dates.end[[5]]),ymin=5, ymax=6),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[6]]),xmax=as.Date(test.dates.end[[6]]),ymin=6, ymax=7),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[7]]),xmax=as.Date(test.dates.end[[7]]),ymin=7, ymax=8),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[8]]),xmax=as.Date(test.dates.end[[8]]),ymin=8, ymax=9),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[9]]),xmax=as.Date(test.dates.end[[9]]),ymin=9, ymax=10),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[10]]),xmax=as.Date(test.dates.end[[10]]),ymin=10, ymax=11),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[11]]),xmax=as.Date(test.dates.end[[11]]),ymin=11, ymax=12),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[12]]),xmax=as.Date(test.dates.end[[12]]),ymin=12, ymax=13),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[13]]),xmax=as.Date(test.dates.end[[13]]),ymin=13, ymax=14),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[14]]),xmax=as.Date(test.dates.end[[14]]),ymin=14, ymax=15),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[15]]),xmax=as.Date(test.dates.end[[15]]),ymin=15, ymax=16),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[16]]),xmax=as.Date(test.dates.end[[16]]),ymin=16, ymax=17),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[17]]),xmax=as.Date(test.dates.end[[17]]),ymin=17, ymax=18),fill="#D55E00",alpha=.8)+
    geom_rect(aes(xmin=as.Date(test.dates.start[[18]]),xmax=as.Date(test.dates.end[[18]]),ymin=18, ymax=19),fill="#D55E00",alpha=.8)+
    labs(x="Date",y="Train / Test Set No.")
    
    
# Extract final equity curve from test set 
  final.plot <- list()
  i=1
  for (i in 1:length(test.xts)) {
    to.sum <- data.frame("to.sum"=test.xts[[i]]$combined)
    dates <- test.list[[i]]$Date
      final.plot[[i]] <- data.frame("Date"=dates,to.sum)
  }

  # Make df of output 
  final.df <- do.call(rbind,final.plot)
  # Cumsum equity curve
final.df$equity <- cumprod(final.df$to.sum + 1) -1 
  final.df <- final.df[c(1,3) ]
  # Melt df 
  final.plot.df <- melt(data = final.df,id.vars = 'Date')   # melt df for plotting with ggplot2
  # Create ID
  final.plot.df$final_num <- as.numeric(rep(1:test.length+1, each=test.length+1, length.out=nrow(final.plot.df)))
  # Subset plots to plot each test period a different colour
  subset.plots <- list()
  test.width <- 125
  subset.plots<- lapply(row_nums, function(i) final.plot.df[c(i:(i+test.length)),])
 # Create plot 
  # Create colour scheme
  myPalette <- colorRampPalette(rev(brewer.pal(11, "Paired")))
  sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 18))
  # Plot out of sample test results equity curve                           
  ggplot(data = subset.plots[[1]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[2]], aes(x =Date, y = value))+
    geom_line(data = subset.plots[[3]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[4]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[5]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[6]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[7]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[8]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[9]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[10]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[11]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[12]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[13]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[14]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[15]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[16]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[17]], aes(x =Date, y = value,colour=final_num))+
    geom_line(data = subset.plots[[18]], aes(x =Date, y = value,colour=final_num))+
    sc+ 
    theme_bw()+
    theme(legend.position = "none")+
    scale_y_continuous(breaks = seq(1, 110, by = 5))+
    ggtitle("Test Set Results", subtitle =paste("Test Set Lengths =",test.length," rows")) +
    labs(x="Date",y="Cumulative Return")+
    theme(plot.title = element_text(hjust=0.5),plot.subtitle =element_text(hjust=0.5))

  
Rplot204
Train and Test Sets – Train = 252 trading days. Test = 126 days.

The plot above was created using the same data created by subsetting the rows into train and test set periods and stitching them back together. The blue periods indicate in-sample test periods and the orange represent out of sample test periods. The final equity curve below is the result of all test set periods.

cumretrs
Cumulative Returns All Test Set Periods

Each color on the equity curve represents each test set period, 126 trading days long.

To conclude, the benefit of this type of cross validation is that it demonstrates robustness. Each in sample parameter optimization is subsequently used in the out of sample period. This is data that the model has never seen before. If an optimized parameter fell apart in out of sample testing, we know we have over fit in some way or another, or the edge itself has disappeared. Failing this test would be cause to abandon the strategy.

Thanks for reading!

Full code can be found on my github.