add accelerate package. add generic dataset with random data.

This commit is contained in:
Matt 2023-01-18 11:26:48 -08:00
parent 404e39206b
commit 355e83843f
6 changed files with 137 additions and 187 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
storage/
__pycache__/
*.swp
*.tmp

View File

@ -3,6 +3,28 @@ import numpy as np
import einops
import csv
import torch
import click
SAMPLES = 500
IN_DIM = 30
OUT_DIM = 20
class GenericDataset(Dataset):
def __init__(self):
rng = np.random.default_rng()
self.x = rng.normal(size=(SAMPLES, IN_DIM)).astype(np.float32)
self.y = 500 * rng.normal(size=(SAMPLES, OUT_DIM)).astype(np.float32)
def __getitem__(self, idx):
return (self.x[idx], self.y[idx])
def __len__(self):
return len(self.x)
def get_in_out_size(self):
return self.x.shape[1], self.y.shape[1]
class FashionDataset(Dataset):
@ -41,6 +63,12 @@ class FashionDataset(Dataset):
return (images, classes)
@click.group()
def cli():
...
@cli.command()
def main():
path = "fashion-mnist_train.csv"
dataset = FashionDataset(path=path)
@ -50,5 +78,10 @@ def main():
print(f"mean shape: {mean.shape}")
@cli.command()
def generic():
dataset = GenericDataset()
if __name__ == "__main__":
main()
cli()

View File

@ -2,9 +2,18 @@ from torch import nn
class DNN(nn.Module):
def __init__(self, in_dim, out_dim):
super(DNN, self).__init__()
self.layer1 = nn.Linear(in_dim, out_dim)
def __init__(self, in_size, hidden_size, out_size):
super().__init__()
# Define the activation function and the linear functions
self.act = nn.ReLU()
self.in_linear = nn.Linear(in_size, hidden_size)
self.out_linear = nn.Linear(hidden_size, out_size)
def forward(self, x):
return self.layer1(x)
# Send x through first linear layer and activation function
x = self.act(self.in_linear(x))
# Return x through the out linear function
return self.out_linear(x)

View File

@ -1,158 +0,0 @@
# pytorch mlp for multiclass classification
from numpy import vstack
from numpy import argmax
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from torch import Tensor
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Softmax
from torch.nn import Module
from torch.optim import SGD
from torch.nn import CrossEntropyLoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
# dataset definition
class CSVDataset(Dataset):
# load the dataset
def __init__(self, path):
# load the csv file as a dataframe
df = read_csv(path, header=None)
# store the inputs and outputs
self.X = df.values[:, :-1]
self.y = df.values[:, -1]
# ensure input data is floats
self.X = self.X.astype('float32')
# label encode target and ensure the values are floats
self.y = LabelEncoder().fit_transform(self.y)
# number of rows in the dataset
def __len__(self):
return len(self.X)
# get a row at an index
def __getitem__(self, idx):
return [self.X[idx], self.y[idx]]
# get indexes for train and test rows
def get_splits(self, n_test=0.33):
# determine sizes
test_size = round(n_test * len(self.X))
train_size = len(self.X) - test_size
# calculate the split
return random_split(self, [train_size, test_size])
# model definition
class MLP(Module):
# define model elements
def __init__(self, n_inputs):
super(MLP, self).__init__()
# input to first hidden layer
self.hidden1 = Linear(n_inputs, 10)
kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
self.act1 = ReLU()
# second hidden layer
self.hidden2 = Linear(10, 8)
kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
self.act2 = ReLU()
# third hidden layer and output
self.hidden3 = Linear(8, 3)
xavier_uniform_(self.hidden3.weight)
self.act3 = Softmax(dim=1)
# forward propagate input
def forward(self, X):
# input to first hidden layer
X = self.hidden1(X)
X = self.act1(X)
# second hidden layer
X = self.hidden2(X)
X = self.act2(X)
# output layer
X = self.hidden3(X)
X = self.act3(X)
return X
# prepare the dataset
def prepare_data(path):
# load the dataset
dataset = CSVDataset(path)
# calculate split
train, test = dataset.get_splits()
# prepare data loaders
train_dl = DataLoader(train, batch_size=32, shuffle=True)
test_dl = DataLoader(test, batch_size=1024, shuffle=False)
return train_dl, test_dl
# train the model
def train_model(train_dl, model):
# define the optimization
criterion = CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
# enumerate epochs
for epoch in range(500):
# enumerate mini batches
for i, (inputs, targets) in enumerate(train_dl):
# clear the gradients
optimizer.zero_grad()
# compute the model output
yhat = model(inputs)
# calculate loss
loss = criterion(yhat, targets)
# credit assignment
loss.backward()
# update model weights
optimizer.step()
# evaluate the model
def evaluate_model(test_dl, model):
predictions, actuals = list(), list()
for i, (inputs, targets) in enumerate(test_dl):
# evaluate the model on the test set
yhat = model(inputs)
# retrieve numpy array
yhat = yhat.detach().numpy()
actual = targets.numpy()
# convert to class labels
yhat = argmax(yhat, axis=1)
# reshape for stacking
actual = actual.reshape((len(actual), 1))
yhat = yhat.reshape((len(yhat), 1))
# store
predictions.append(yhat)
actuals.append(actual)
predictions, actuals = vstack(predictions), vstack(actuals)
# calculate accuracy
acc = accuracy_score(actuals, predictions)
return acc
# make a class prediction for one row of data
def predict(row, model):
# convert row to data
row = Tensor([row])
# make prediction
yhat = model(row)
# retrieve numpy array
yhat = yhat.detach().numpy()
return yhat
# prepare the data
path = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv'
train_dl, test_dl = prepare_data(path)
print(len(train_dl.dataset), len(test_dl.dataset))
# define the network
model = MLP(4)
# train the model
train_model(train_dl, model)
# evaluate the model
acc = evaluate_model(test_dl, model)
print('Accuracy: %.3f' % acc)
# make a single prediction
row = [5.1,3.5,1.4,0.2]
yhat = predict(row, model)
print('Predicted: %s (class=%d)' % (yhat, argmax(yhat)))

View File

@ -3,13 +3,13 @@ main class for building a DL pipeline.
"""
import click
from batch import Batch
from accelerate import Accelerator
from torch.utils.data import DataLoader
from torch.optim import AdamW
from data import GenericDataset
from model.linear import DNN
from model.cnn import VGG16, VGG11
from data import FashionDataset
from utils import Stage
import torch
from runner import Runner
import click
@click.group()
@ -19,29 +19,42 @@ def cli():
@cli.command()
def train():
batch_size = 16
num_workers = 8
path = "fashion-mnist_train.csv"
trainset = FashionDataset(path=path)
# Initialize hyperparameters
hidden_size = 128
epochs = 1000
batch_size = 10
lr = 0.001
trainloader = torch.utils.data.DataLoader(
trainset, batch_size=batch_size, shuffle=False, num_workers=num_workers
)
model = VGG11(in_channels=1, num_classes=10)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)
batch = Batch(
stage=Stage.TRAIN,
# Accelerator is in charge of auto casting tensors to the appropriate GPU device
accelerator = Accelerator()
# Initialize the training set and a dataloader to iterate over the dataset
train_set = GenericDataset()
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
# Get the size of the input and output vectors from the training set
in_features, out_features = train_set.get_in_out_size()
# Create the model and optimizer and cast model to the appropriate GPU
model = DNN(in_features, hidden_size, out_features).to(accelerator.device)
optimizer = AdamW(model.parameters(), lr=lr)
# Create a runner that will handle
runner = Runner(
train_set=train_set,
train_loader=train_loader,
accelerator=accelerator,
model=model,
device=torch.device("cpu"),
loader=trainloader,
criterion=criterion,
optimizer=optimizer,
)
batch.run(
"Run run run run. Run run run away. Oh Oh oH OHHHHHHH yayayayayayayayaya! - David Byrne"
)
# Train the model
for _ in range(epochs):
# Run one loop of training and record the average loss
train_stats = runner.next()
print(f"{train_stats}")
if __name__ == "__main__":

51
src/runner.py Normal file
View File

@ -0,0 +1,51 @@
from torch import nn
class Runner:
"""Runner class that is in charge of implementing routine training functions such as running epochs or doing inference time"""
def __init__(self, train_set, train_loader, accelerator, model, optimizer):
# Initialize class attributes
self.accelerator = accelerator
self.train_set = train_set
# Prepare opt, model, and train_loader (helps accelerator auto-cast to devices)
self.optimizer, self.model, self.train_loader = accelerator.prepare(
optimizer, model, train_loader
)
# Since data is for targets, use Mean Squared Error Loss
self.criterion = nn.MSELoss()
def next(self):
"""Runs an epoch of training.
Includes updating model weights and tracking training loss
Returns:
float: The loss averaged over the entire epoch
"""
# Turn the model to training mode (affects batchnorm and dropout)
self.model.train()
running_loss = 0.0
# Make sure there are no leftover gradients before starting training an epoch
self.optimizer.zero_grad()
for sample, target in self.train_loader:
prediction = self.model(sample) # Forward pass through model
loss = self.criterion(prediction, target) # Error calculation
running_loss += loss # Increment running loss
self.accelerator.backward(
loss
) # Increment gradients within model by sending loss backwards
self.optimizer.step() # Update model weights
self.optimizer.zero_grad() # Reset gradients to 0
# Take the average of the loss over each sample
avg_loss = running_loss / len(self.train_loader)
return avg_loss