Blogs

Unsupervised Learning - Simple KMeans Clustering on Coins

This example uses stock mean daily return ans standard deviation of daily returns of a period as features. As a result of the code, K-Means clusters coins into 3 groups.

Here's the code:

import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from scipy.stats import pearsonr

# Data Prep
data = json.loads(requests.get('https://api.binance.com/api/v3/ticker/24hr').text)
symbols = pd.Series([_['symbol'] for _ in data])
selected_symbols = symbols[symbols.str.contains('.*USDT')][:100]

price, limit, exclude = {}, 120, []
for symbol in selected_symbols:
    data = json.loads(requests.get('https://api.binance.com/api/v3/klines?'
                                   f'symbol={symbol}&interval=1d&limit={limit}').text)
    if len(data) == limit:
        price[symbol] = [float(_[4]) for _ in data]
    else:
        exclude.append(symbol)

coins = pd.DataFrame.from_dict(price).T.pct_change(axis=1).dropna(axis=1)
features = pd.DataFrame([coins.mean(axis=1),coins.std(axis=1)]).T
X = features.values


# KMeans
model = KMeans(n_clusters=3) # 3 is based on anertia test below
model.fit(X)
labels = model.predict(X)
mean, std = X[:,0], X[:,1]
plt.scatter(mean, std, c=labels, alpha=0.5)
centroids = model.cluster_centers_
plt.scatter(centroids[:,0], centroids[:,1], marker='D', c='r', s=50)
plt.show()


# Cluster Number - Anertia
ks = range(1, 6)
inertias = []
for k in ks:
    model = KMeans(n_clusters= k)
    model.fit(X)
    inertias.append(model.inertia_)
plt.plot(ks, inertias, '-o')
plt.xlabel('number of clusters, k')
plt.ylabel('inertia')
plt.xticks(ks)
plt.show()


# PCA
print('features corr', pearsonr(mean, std)[0])
pca = PCA()
pca.fit(X)
features =range(pca.n_components_)
plt.bar(features, pca.explained_variance_)
plt.xticks(features)
plt.xlabel('features')
plt.ylabel('variances')
plt.show()