add accelerate package. add generic dataset with random data.
This commit is contained in:
parent
404e39206b
commit
355e83843f
|
@ -1,2 +1,4 @@
|
||||||
storage/
|
storage/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
*.swp
|
||||||
|
*.tmp
|
||||||
|
|
35
src/data.py
35
src/data.py
|
@ -3,6 +3,28 @@ import numpy as np
|
||||||
import einops
|
import einops
|
||||||
import csv
|
import csv
|
||||||
import torch
|
import torch
|
||||||
|
import click
|
||||||
|
|
||||||
|
|
||||||
|
SAMPLES = 500
|
||||||
|
IN_DIM = 30
|
||||||
|
OUT_DIM = 20
|
||||||
|
|
||||||
|
|
||||||
|
class GenericDataset(Dataset):
|
||||||
|
def __init__(self):
|
||||||
|
rng = np.random.default_rng()
|
||||||
|
self.x = rng.normal(size=(SAMPLES, IN_DIM)).astype(np.float32)
|
||||||
|
self.y = 500 * rng.normal(size=(SAMPLES, OUT_DIM)).astype(np.float32)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
return (self.x[idx], self.y[idx])
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.x)
|
||||||
|
|
||||||
|
def get_in_out_size(self):
|
||||||
|
return self.x.shape[1], self.y.shape[1]
|
||||||
|
|
||||||
|
|
||||||
class FashionDataset(Dataset):
|
class FashionDataset(Dataset):
|
||||||
|
@ -41,6 +63,12 @@ class FashionDataset(Dataset):
|
||||||
return (images, classes)
|
return (images, classes)
|
||||||
|
|
||||||
|
|
||||||
|
@click.group()
|
||||||
|
def cli():
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
def main():
|
def main():
|
||||||
path = "fashion-mnist_train.csv"
|
path = "fashion-mnist_train.csv"
|
||||||
dataset = FashionDataset(path=path)
|
dataset = FashionDataset(path=path)
|
||||||
|
@ -50,5 +78,10 @@ def main():
|
||||||
print(f"mean shape: {mean.shape}")
|
print(f"mean shape: {mean.shape}")
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
def generic():
|
||||||
|
dataset = GenericDataset()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
cli()
|
||||||
|
|
|
@ -2,9 +2,18 @@ from torch import nn
|
||||||
|
|
||||||
|
|
||||||
class DNN(nn.Module):
|
class DNN(nn.Module):
|
||||||
def __init__(self, in_dim, out_dim):
|
def __init__(self, in_size, hidden_size, out_size):
|
||||||
super(DNN, self).__init__()
|
super().__init__()
|
||||||
self.layer1 = nn.Linear(in_dim, out_dim)
|
|
||||||
|
# Define the activation function and the linear functions
|
||||||
|
self.act = nn.ReLU()
|
||||||
|
self.in_linear = nn.Linear(in_size, hidden_size)
|
||||||
|
self.out_linear = nn.Linear(hidden_size, out_size)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
return self.layer1(x)
|
|
||||||
|
# Send x through first linear layer and activation function
|
||||||
|
x = self.act(self.in_linear(x))
|
||||||
|
|
||||||
|
# Return x through the out linear function
|
||||||
|
return self.out_linear(x)
|
||||||
|
|
158
src/mpv.py
158
src/mpv.py
|
@ -1,158 +0,0 @@
|
||||||
# pytorch mlp for multiclass classification
|
|
||||||
from numpy import vstack
|
|
||||||
from numpy import argmax
|
|
||||||
from pandas import read_csv
|
|
||||||
from sklearn.preprocessing import LabelEncoder
|
|
||||||
from sklearn.metrics import accuracy_score
|
|
||||||
from torch import Tensor
|
|
||||||
from torch.utils.data import Dataset
|
|
||||||
from torch.utils.data import DataLoader
|
|
||||||
from torch.utils.data import random_split
|
|
||||||
from torch.nn import Linear
|
|
||||||
from torch.nn import ReLU
|
|
||||||
from torch.nn import Softmax
|
|
||||||
from torch.nn import Module
|
|
||||||
from torch.optim import SGD
|
|
||||||
from torch.nn import CrossEntropyLoss
|
|
||||||
from torch.nn.init import kaiming_uniform_
|
|
||||||
from torch.nn.init import xavier_uniform_
|
|
||||||
|
|
||||||
# dataset definition
|
|
||||||
class CSVDataset(Dataset):
|
|
||||||
# load the dataset
|
|
||||||
def __init__(self, path):
|
|
||||||
# load the csv file as a dataframe
|
|
||||||
df = read_csv(path, header=None)
|
|
||||||
# store the inputs and outputs
|
|
||||||
self.X = df.values[:, :-1]
|
|
||||||
self.y = df.values[:, -1]
|
|
||||||
# ensure input data is floats
|
|
||||||
self.X = self.X.astype('float32')
|
|
||||||
# label encode target and ensure the values are floats
|
|
||||||
self.y = LabelEncoder().fit_transform(self.y)
|
|
||||||
|
|
||||||
# number of rows in the dataset
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.X)
|
|
||||||
|
|
||||||
# get a row at an index
|
|
||||||
def __getitem__(self, idx):
|
|
||||||
return [self.X[idx], self.y[idx]]
|
|
||||||
|
|
||||||
# get indexes for train and test rows
|
|
||||||
def get_splits(self, n_test=0.33):
|
|
||||||
# determine sizes
|
|
||||||
test_size = round(n_test * len(self.X))
|
|
||||||
train_size = len(self.X) - test_size
|
|
||||||
# calculate the split
|
|
||||||
return random_split(self, [train_size, test_size])
|
|
||||||
|
|
||||||
# model definition
|
|
||||||
class MLP(Module):
|
|
||||||
# define model elements
|
|
||||||
def __init__(self, n_inputs):
|
|
||||||
super(MLP, self).__init__()
|
|
||||||
# input to first hidden layer
|
|
||||||
self.hidden1 = Linear(n_inputs, 10)
|
|
||||||
kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
|
|
||||||
self.act1 = ReLU()
|
|
||||||
# second hidden layer
|
|
||||||
self.hidden2 = Linear(10, 8)
|
|
||||||
kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
|
|
||||||
self.act2 = ReLU()
|
|
||||||
# third hidden layer and output
|
|
||||||
self.hidden3 = Linear(8, 3)
|
|
||||||
xavier_uniform_(self.hidden3.weight)
|
|
||||||
self.act3 = Softmax(dim=1)
|
|
||||||
|
|
||||||
# forward propagate input
|
|
||||||
def forward(self, X):
|
|
||||||
# input to first hidden layer
|
|
||||||
X = self.hidden1(X)
|
|
||||||
X = self.act1(X)
|
|
||||||
# second hidden layer
|
|
||||||
X = self.hidden2(X)
|
|
||||||
X = self.act2(X)
|
|
||||||
# output layer
|
|
||||||
X = self.hidden3(X)
|
|
||||||
X = self.act3(X)
|
|
||||||
return X
|
|
||||||
|
|
||||||
# prepare the dataset
|
|
||||||
def prepare_data(path):
|
|
||||||
# load the dataset
|
|
||||||
dataset = CSVDataset(path)
|
|
||||||
# calculate split
|
|
||||||
train, test = dataset.get_splits()
|
|
||||||
# prepare data loaders
|
|
||||||
train_dl = DataLoader(train, batch_size=32, shuffle=True)
|
|
||||||
test_dl = DataLoader(test, batch_size=1024, shuffle=False)
|
|
||||||
return train_dl, test_dl
|
|
||||||
|
|
||||||
# train the model
|
|
||||||
def train_model(train_dl, model):
|
|
||||||
# define the optimization
|
|
||||||
criterion = CrossEntropyLoss()
|
|
||||||
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
|
|
||||||
# enumerate epochs
|
|
||||||
for epoch in range(500):
|
|
||||||
# enumerate mini batches
|
|
||||||
for i, (inputs, targets) in enumerate(train_dl):
|
|
||||||
# clear the gradients
|
|
||||||
optimizer.zero_grad()
|
|
||||||
# compute the model output
|
|
||||||
yhat = model(inputs)
|
|
||||||
# calculate loss
|
|
||||||
loss = criterion(yhat, targets)
|
|
||||||
# credit assignment
|
|
||||||
loss.backward()
|
|
||||||
# update model weights
|
|
||||||
optimizer.step()
|
|
||||||
|
|
||||||
# evaluate the model
|
|
||||||
def evaluate_model(test_dl, model):
|
|
||||||
predictions, actuals = list(), list()
|
|
||||||
for i, (inputs, targets) in enumerate(test_dl):
|
|
||||||
# evaluate the model on the test set
|
|
||||||
yhat = model(inputs)
|
|
||||||
# retrieve numpy array
|
|
||||||
yhat = yhat.detach().numpy()
|
|
||||||
actual = targets.numpy()
|
|
||||||
# convert to class labels
|
|
||||||
yhat = argmax(yhat, axis=1)
|
|
||||||
# reshape for stacking
|
|
||||||
actual = actual.reshape((len(actual), 1))
|
|
||||||
yhat = yhat.reshape((len(yhat), 1))
|
|
||||||
# store
|
|
||||||
predictions.append(yhat)
|
|
||||||
actuals.append(actual)
|
|
||||||
predictions, actuals = vstack(predictions), vstack(actuals)
|
|
||||||
# calculate accuracy
|
|
||||||
acc = accuracy_score(actuals, predictions)
|
|
||||||
return acc
|
|
||||||
|
|
||||||
# make a class prediction for one row of data
|
|
||||||
def predict(row, model):
|
|
||||||
# convert row to data
|
|
||||||
row = Tensor([row])
|
|
||||||
# make prediction
|
|
||||||
yhat = model(row)
|
|
||||||
# retrieve numpy array
|
|
||||||
yhat = yhat.detach().numpy()
|
|
||||||
return yhat
|
|
||||||
|
|
||||||
# prepare the data
|
|
||||||
path = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv'
|
|
||||||
train_dl, test_dl = prepare_data(path)
|
|
||||||
print(len(train_dl.dataset), len(test_dl.dataset))
|
|
||||||
# define the network
|
|
||||||
model = MLP(4)
|
|
||||||
# train the model
|
|
||||||
train_model(train_dl, model)
|
|
||||||
# evaluate the model
|
|
||||||
acc = evaluate_model(test_dl, model)
|
|
||||||
print('Accuracy: %.3f' % acc)
|
|
||||||
# make a single prediction
|
|
||||||
row = [5.1,3.5,1.4,0.2]
|
|
||||||
yhat = predict(row, model)
|
|
||||||
print('Predicted: %s (class=%d)' % (yhat, argmax(yhat)))
|
|
|
@ -3,13 +3,13 @@ main class for building a DL pipeline.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import click
|
from accelerate import Accelerator
|
||||||
from batch import Batch
|
from torch.utils.data import DataLoader
|
||||||
|
from torch.optim import AdamW
|
||||||
|
from data import GenericDataset
|
||||||
from model.linear import DNN
|
from model.linear import DNN
|
||||||
from model.cnn import VGG16, VGG11
|
from runner import Runner
|
||||||
from data import FashionDataset
|
import click
|
||||||
from utils import Stage
|
|
||||||
import torch
|
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
|
@ -19,29 +19,42 @@ def cli():
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
def train():
|
def train():
|
||||||
batch_size = 16
|
|
||||||
num_workers = 8
|
|
||||||
|
|
||||||
path = "fashion-mnist_train.csv"
|
# Initialize hyperparameters
|
||||||
trainset = FashionDataset(path=path)
|
hidden_size = 128
|
||||||
|
epochs = 1000
|
||||||
|
batch_size = 10
|
||||||
|
lr = 0.001
|
||||||
|
|
||||||
trainloader = torch.utils.data.DataLoader(
|
# Accelerator is in charge of auto casting tensors to the appropriate GPU device
|
||||||
trainset, batch_size=batch_size, shuffle=False, num_workers=num_workers
|
accelerator = Accelerator()
|
||||||
)
|
|
||||||
model = VGG11(in_channels=1, num_classes=10)
|
# Initialize the training set and a dataloader to iterate over the dataset
|
||||||
criterion = torch.nn.CrossEntropyLoss()
|
train_set = GenericDataset()
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)
|
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
|
||||||
batch = Batch(
|
|
||||||
stage=Stage.TRAIN,
|
# Get the size of the input and output vectors from the training set
|
||||||
|
in_features, out_features = train_set.get_in_out_size()
|
||||||
|
|
||||||
|
# Create the model and optimizer and cast model to the appropriate GPU
|
||||||
|
model = DNN(in_features, hidden_size, out_features).to(accelerator.device)
|
||||||
|
optimizer = AdamW(model.parameters(), lr=lr)
|
||||||
|
|
||||||
|
# Create a runner that will handle
|
||||||
|
runner = Runner(
|
||||||
|
train_set=train_set,
|
||||||
|
train_loader=train_loader,
|
||||||
|
accelerator=accelerator,
|
||||||
model=model,
|
model=model,
|
||||||
device=torch.device("cpu"),
|
|
||||||
loader=trainloader,
|
|
||||||
criterion=criterion,
|
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
)
|
)
|
||||||
batch.run(
|
|
||||||
"Run run run run. Run run run away. Oh Oh oH OHHHHHHH yayayayayayayayaya! - David Byrne"
|
# Train the model
|
||||||
)
|
for _ in range(epochs):
|
||||||
|
|
||||||
|
# Run one loop of training and record the average loss
|
||||||
|
train_stats = runner.next()
|
||||||
|
print(f"{train_stats}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
|
||||||
|
class Runner:
|
||||||
|
"""Runner class that is in charge of implementing routine training functions such as running epochs or doing inference time"""
|
||||||
|
|
||||||
|
def __init__(self, train_set, train_loader, accelerator, model, optimizer):
|
||||||
|
|
||||||
|
# Initialize class attributes
|
||||||
|
self.accelerator = accelerator
|
||||||
|
self.train_set = train_set
|
||||||
|
|
||||||
|
# Prepare opt, model, and train_loader (helps accelerator auto-cast to devices)
|
||||||
|
self.optimizer, self.model, self.train_loader = accelerator.prepare(
|
||||||
|
optimizer, model, train_loader
|
||||||
|
)
|
||||||
|
|
||||||
|
# Since data is for targets, use Mean Squared Error Loss
|
||||||
|
self.criterion = nn.MSELoss()
|
||||||
|
|
||||||
|
def next(self):
|
||||||
|
"""Runs an epoch of training.
|
||||||
|
|
||||||
|
Includes updating model weights and tracking training loss
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: The loss averaged over the entire epoch
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Turn the model to training mode (affects batchnorm and dropout)
|
||||||
|
self.model.train()
|
||||||
|
|
||||||
|
running_loss = 0.0
|
||||||
|
|
||||||
|
# Make sure there are no leftover gradients before starting training an epoch
|
||||||
|
self.optimizer.zero_grad()
|
||||||
|
|
||||||
|
for sample, target in self.train_loader:
|
||||||
|
|
||||||
|
prediction = self.model(sample) # Forward pass through model
|
||||||
|
loss = self.criterion(prediction, target) # Error calculation
|
||||||
|
running_loss += loss # Increment running loss
|
||||||
|
self.accelerator.backward(
|
||||||
|
loss
|
||||||
|
) # Increment gradients within model by sending loss backwards
|
||||||
|
self.optimizer.step() # Update model weights
|
||||||
|
self.optimizer.zero_grad() # Reset gradients to 0
|
||||||
|
|
||||||
|
# Take the average of the loss over each sample
|
||||||
|
avg_loss = running_loss / len(self.train_loader)
|
||||||
|
return avg_loss
|
Loading…
Reference in New Issue