Skip to content
English
  • There are no suggestions because the search field is empty.

Modeling the Iris Dataset

Let's walk through using the FSP model on of one of the most basic datasets used for machine learning: the Iris dataset

Step 1: Read in the data

First, load the Iris data from the Sklearn module and prepare it for the FSP software.

from sklearn import datasets
from sklearn.model_selection import train_test_split

#Load the dataset
data_file = datasets.load_iris()

#Extract the data, outcome variable, and labels from the source dataset
X =data_file.data
Y =data_file.target

#Create train and test splits of the data
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=451)

Step 2: Construct and Configure an Encoder

Create an instance of the encoder and configure it with the training data.

from encoder.encoder import Encoder

# Calculate the num_features given the data shape
num_features = X_train.shape[1]

# Build the encoder
my_encoder = Encoder(
    set_bits=8,
    sparsity=0.25,
  field_types=["N"] * num_features,
)
# Configure the encoder according to the training split
my_encoder.config_encoder(input_data=X_train, label_col=None)

Step 3: Create and Train The Model

Define model parameters based on your understanding of the data. Use the model.fit() function to train the model. 

from fsp.fsp import FSP
hyperparameters = {
    "encoder": my_encoder,
    "winner_func": "k_winners",
  "winner_pct": 0.20,
  "ensemble_size": 8,
  "max_neurons": 50,
    "pos_syn_updates": (-1, 3),
"neg_syn_updates": (0, -1),
  "post_ensemble_epochs": 12,
"seed_isdrs_per_class": 1,
    "random_state": 123,
    "loss_func": "one_neuron",
    "predict_func": "majority_vote",
    "high_dis_penalty": 0,
    "low_rep_penalty": 0
}
model = FSP(**hyperparameters)


# Fit the model to the training split
model.fit(X_train, y_train)

Step 4: Evaluate The Model

Provide the model with the train and test split data to gather predictions. After predicting, compare the predicted label to the ground-truth labels.

Train

# Predict on the train
train_predictions = model.predict(X_train)

# Assess the train accuracy
num_correct = 0
for idx, pred in enumerate(train_predictions):
    if str(pred) == str(y_train[idx]):
        num_correct += 1
    else:
        print(f"Missed on observation {idx:3d}  GT: {repr(str(y_train[idx])):3s} Pred: {repr(str(pred))}")

print(f"Got {num_correct:3d}/{len(train_predictions):3d} correct = {100*(num_correct / len(train_predictions)):0.4f}%")
Missed on observation  34  GT: '2' Pred: '1'
Missed on observation  40  GT: '2' Pred: '1'
Missed on observation  53  GT: '1' Pred: '2'
Missed on observation  68  GT: '1' Pred: '2'
Missed on observation  69  GT: '2' Pred: '1'
Got 115/120 correct = 95.8333%

Test

# Predict on the test
test_predictions = model.predict(X_test)

# Assess the test accuracy
num_correct = 0
for idx, pred in enumerate(test_predictions):
    if str(pred) == str(y_test[idx]):
        num_correct += 1
    else:
        print(f"Missed on observation {idx:3d}  GT: {repr(str(y_test[idx])):3s} Pred: {repr(str(pred))}")

print(f"Got {num_correct:3d}/{len(test_predictions):3d} correct = {100*(num_correct / len(test_predictions)):0.4f}%")
Got  30/ 30 correct = 100.0000%