Blogs

Supervised ML - Simple KNN TA Strategy

Here's the long code with no explanation :) The goal of this example is to show a simple example, not to build a great model.

import pandas_datareader.data as pdr
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier


aapl = pdr.DataReader('AAPL', 'yahoo', start='2018-07-12', end='2022-06-30')
prices = aapl['Close']

# SMA
sma = prices.rolling(14).mean()
sma_signals = []
for day_price, day_sma in zip(prices, sma):
    if day_price < day_sma:
        sma_signals.append(0)
    elif day_price >= day_sma:
        sma_signals.append(1)
    else:
        sma_signals.append(np.nan)
sma_signals = pd.Series(sma_signals)

#RSI
dly_movement_amount = prices - prices.shift(1)
dly_up_movement = []
dly_down_movement = []
for movement_amount in dly_movement_amount:
    if movement_amount > 0:
        dly_up_movement.append(movement_amount)
        dly_down_movement.append(0)
    elif movement_amount <= 0:
        dly_up_movement.append(0)
        dly_down_movement.append(movement_amount)
    else:
        dly_down_movement.append(np.nan)
        dly_up_movement.append(np.nan)
rolling_up_movemnt = pd.Series(dly_up_movement).rolling(14).mean()
rolling_down_movement_absolute = pd.Series(
    dly_down_movement).abs().rolling(14).mean()
relative_strenght = rolling_up_movemnt / rolling_down_movement_absolute
rsi = 100 - (100 / (1 + relative_strenght))

rsi_signals = []
for val in rsi:
    if val < 70:
        rsi_signals.append(1)
    elif val >= 70:
        rsi_signals.append(0)
    else:
        rsi_signals.append(0)

rsi_signals = pd.Series(rsi_signals)

#RETURNS
logret = np.log(prices / prices.shift(1)).rename('return')
avg_return = logret.mean()
detrended_logret = logret - avg_return
diff = prices - prices.shift(1)
diff_binary = []
for i in range(len(diff)):
    if diff[i] > 0:
        diff_binary.append(1)
    elif diff[i] < 0:
        diff_binary.append(0)
    else:
        diff_binary.append(np.nan)
targets = pd.Series(diff_binary)

#ML
#features and targets
df = pd.DataFrame([rsi_signals,sma_signals, targets]).T
df.columns = ['rsi','sma','target']
df.index = prices.index
df.dropna(inplace=True)
#train-test split
split_idx = int(len(df)*0.7)
df_train = df[:split_idx]
df_test = df[split_idx:]
X_train = df_train[['rsi','sma']]
X_test = df_test[['rsi','sma']]
y_train = df_train[['target']]
y_test = df_test[['target']]
#model
model = KNeighborsClassifier() #default parameters
model.fit(X_train, y_train)
preds = model.predict(X_test)
#metrics
print('Model Accuracy: ', model.score(X_test, y_test))

Result: 0.62 accuracy score. This failed attempt to predict stock movement is not a suprise since this model is overly simple and the problem of predicting stock movements is too complex.