Let's walk through using the FSP model on of one of the most basic datasets used for machine learning: the Iris dataset
Step 1: Read in the data
First, load the Iris data from the Sklearn module and prepare it for the FSP software.
from sklearn import datasets
from sklearn.model_selection import train_test_split
#Load the dataset
data_file = datasets.load_iris()
#Extract the data, outcome variable, and labels from the source dataset
X =data_file.data
Y =data_file.target
#Create train and test splits of the data
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=451)
Step 2: Construct and Configure an Encoder
Create an instance of the encoder and configure it with the training data.
from encoder.encoder import Encoder
# Calculate the num_features given the data shape
num_features = X_train.shape[1]
# Build the encoder
my_encoder = Encoder(
set_bits=8,
sparsity=0.25,
field_types=["N"] * num_features,
)
# Configure the encoder according to the training split
my_encoder.config_encoder(input_data=X_train, label_col=None)
Step 3: Create and Train The Model
Define model parameters based on your understanding of the data. Use the model.fit() function to train the model.
from fsp.fsp import FSP
hyperparameters = {
"encoder": my_encoder,
"winner_func": "k_winners",
"winner_pct": 0.20,
"ensemble_size": 8,
"max_neurons": 50,
"pos_syn_updates": (-1, 3),
"neg_syn_updates": (0, -1),
"post_ensemble_epochs": 12,
"seed_isdrs_per_class": 1,
"random_state": 123,
"loss_func": "one_neuron",
"predict_func": "majority_vote",
"high_dis_penalty": 0,
"low_rep_penalty": 0
}
model = FSP(**hyperparameters)
# Fit the model to the training split
model.fit(X_train, y_train)
Step 4: Evaluate The Model
Provide the model with the train and test split data to gather predictions. After predicting, compare the predicted label to the ground-truth labels.
Train
# Predict on the train
train_predictions = model.predict(X_train)
# Assess the train accuracy
num_correct = 0
for idx, pred in enumerate(train_predictions):
if str(pred) == str(y_train[idx]):
num_correct += 1
else:
print(f"Missed on observation {idx:3d} GT: {repr(str(y_train[idx])):3s} Pred: {repr(str(pred))}")
print(f"Got {num_correct:3d}/{len(train_predictions):3d} correct = {100*(num_correct / len(train_predictions)):0.4f}%")
Missed on observation 34 GT: '2' Pred: '1'
Missed on observation 40 GT: '2' Pred: '1'
Missed on observation 53 GT: '1' Pred: '2'
Missed on observation 68 GT: '1' Pred: '2'
Missed on observation 69 GT: '2' Pred: '1'
Got 115/120 correct = 95.8333%
Test
# Predict on the test
test_predictions = model.predict(X_test)
# Assess the test accuracy
num_correct = 0
for idx, pred in enumerate(test_predictions):
if str(pred) == str(y_test[idx]):
num_correct += 1
else:
print(f"Missed on observation {idx:3d} GT: {repr(str(y_test[idx])):3s} Pred: {repr(str(pred))}")
print(f"Got {num_correct:3d}/{len(test_predictions):3d} correct = {100*(num_correct / len(test_predictions)):0.4f}%")
Got 30/ 30 correct = 100.0000%