From 355e83843f404dcc38b1ebe27ebd0e5e5d6a3fc5 Mon Sep 17 00:00:00 2001
From: Matt <git@publicmatt.com>
Date: Wed, 18 Jan 2023 11:26:48 -0800
Subject: [PATCH] add accelerate package. add generic dataset with random data.

---
 .gitignore          |   2 +
 src/data.py         |  35 +++++++++-
 src/model/linear.py |  17 +++--
 src/mpv.py          | 158 --------------------------------------------
 src/pipeline.py     |  61 ++++++++++-------
 src/runner.py       |  51 ++++++++++++++
 6 files changed, 137 insertions(+), 187 deletions(-)
 delete mode 100644 src/mpv.py
 create mode 100644 src/runner.py

diff --git a/.gitignore b/.gitignore
index fb58064..cffc7d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 storage/
 __pycache__/
+*.swp
+*.tmp
diff --git a/src/data.py b/src/data.py
index 8605f75..6bdc402 100644
--- a/src/data.py
+++ b/src/data.py
@@ -3,6 +3,28 @@ import numpy as np
 import einops
 import csv
 import torch
+import click
+
+
+SAMPLES = 500
+IN_DIM = 30
+OUT_DIM = 20
+
+
+class GenericDataset(Dataset):
+    def __init__(self):
+        rng = np.random.default_rng()
+        self.x = rng.normal(size=(SAMPLES, IN_DIM)).astype(np.float32)
+        self.y = 500 * rng.normal(size=(SAMPLES, OUT_DIM)).astype(np.float32)
+
+    def __getitem__(self, idx):
+        return (self.x[idx], self.y[idx])
+
+    def __len__(self):
+        return len(self.x)
+
+    def get_in_out_size(self):
+        return self.x.shape[1], self.y.shape[1]
 
 
 class FashionDataset(Dataset):
@@ -41,6 +63,12 @@ class FashionDataset(Dataset):
             return (images, classes)
 
 
+@click.group()
+def cli():
+    ...
+
+
+@cli.command()
 def main():
     path = "fashion-mnist_train.csv"
     dataset = FashionDataset(path=path)
@@ -50,5 +78,10 @@ def main():
     print(f"mean shape: {mean.shape}")
 
 
+@cli.command()
+def generic():
+    dataset = GenericDataset()
+
+
 if __name__ == "__main__":
-    main()
+    cli()
diff --git a/src/model/linear.py b/src/model/linear.py
index a82fa13..1f11d2d 100644
--- a/src/model/linear.py
+++ b/src/model/linear.py
@@ -2,9 +2,18 @@ from torch import nn
 
 
 class DNN(nn.Module):
-    def __init__(self, in_dim, out_dim):
-        super(DNN, self).__init__()
-        self.layer1 = nn.Linear(in_dim, out_dim)
+    def __init__(self, in_size, hidden_size, out_size):
+        super().__init__()
+
+        # Define the activation function and the linear functions
+        self.act = nn.ReLU()
+        self.in_linear = nn.Linear(in_size, hidden_size)
+        self.out_linear = nn.Linear(hidden_size, out_size)
 
     def forward(self, x):
-        return self.layer1(x)
+
+        # Send x through first linear layer and activation function
+        x = self.act(self.in_linear(x))
+
+        # Return x through the out linear function
+        return self.out_linear(x)
diff --git a/src/mpv.py b/src/mpv.py
deleted file mode 100644
index 9bc08c8..0000000
--- a/src/mpv.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# pytorch mlp for multiclass classification
-from numpy import vstack
-from numpy import argmax
-from pandas import read_csv
-from sklearn.preprocessing import LabelEncoder
-from sklearn.metrics import accuracy_score
-from torch import Tensor
-from torch.utils.data import Dataset
-from torch.utils.data import DataLoader
-from torch.utils.data import random_split
-from torch.nn import Linear
-from torch.nn import ReLU
-from torch.nn import Softmax
-from torch.nn import Module
-from torch.optim import SGD
-from torch.nn import CrossEntropyLoss
-from torch.nn.init import kaiming_uniform_
-from torch.nn.init import xavier_uniform_
- 
-# dataset definition
-class CSVDataset(Dataset):
-    # load the dataset
-    def __init__(self, path):
-        # load the csv file as a dataframe
-        df = read_csv(path, header=None)
-        # store the inputs and outputs
-        self.X = df.values[:, :-1]
-        self.y = df.values[:, -1]
-        # ensure input data is floats
-        self.X = self.X.astype('float32')
-        # label encode target and ensure the values are floats
-        self.y = LabelEncoder().fit_transform(self.y)
- 
-    # number of rows in the dataset
-    def __len__(self):
-        return len(self.X)
- 
-    # get a row at an index
-    def __getitem__(self, idx):
-        return [self.X[idx], self.y[idx]]
- 
-    # get indexes for train and test rows
-    def get_splits(self, n_test=0.33):
-        # determine sizes
-        test_size = round(n_test * len(self.X))
-        train_size = len(self.X) - test_size
-        # calculate the split
-        return random_split(self, [train_size, test_size])
- 
-# model definition
-class MLP(Module):
-    # define model elements
-    def __init__(self, n_inputs):
-        super(MLP, self).__init__()
-        # input to first hidden layer
-        self.hidden1 = Linear(n_inputs, 10)
-        kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
-        self.act1 = ReLU()
-        # second hidden layer
-        self.hidden2 = Linear(10, 8)
-        kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
-        self.act2 = ReLU()
-        # third hidden layer and output
-        self.hidden3 = Linear(8, 3)
-        xavier_uniform_(self.hidden3.weight)
-        self.act3 = Softmax(dim=1)
- 
-    # forward propagate input
-    def forward(self, X):
-        # input to first hidden layer
-        X = self.hidden1(X)
-        X = self.act1(X)
-        # second hidden layer
-        X = self.hidden2(X)
-        X = self.act2(X)
-        # output layer
-        X = self.hidden3(X)
-        X = self.act3(X)
-        return X
- 
-# prepare the dataset
-def prepare_data(path):
-    # load the dataset
-    dataset = CSVDataset(path)
-    # calculate split
-    train, test = dataset.get_splits()
-    # prepare data loaders
-    train_dl = DataLoader(train, batch_size=32, shuffle=True)
-    test_dl = DataLoader(test, batch_size=1024, shuffle=False)
-    return train_dl, test_dl
- 
-# train the model
-def train_model(train_dl, model):
-    # define the optimization
-    criterion = CrossEntropyLoss()
-    optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
-    # enumerate epochs
-    for epoch in range(500):
-        # enumerate mini batches
-        for i, (inputs, targets) in enumerate(train_dl):
-            # clear the gradients
-            optimizer.zero_grad()
-            # compute the model output
-            yhat = model(inputs)
-            # calculate loss
-            loss = criterion(yhat, targets)
-            # credit assignment
-            loss.backward()
-            # update model weights
-            optimizer.step()
- 
-# evaluate the model
-def evaluate_model(test_dl, model):
-    predictions, actuals = list(), list()
-    for i, (inputs, targets) in enumerate(test_dl):
-        # evaluate the model on the test set
-        yhat = model(inputs)
-        # retrieve numpy array
-        yhat = yhat.detach().numpy()
-        actual = targets.numpy()
-        # convert to class labels
-        yhat = argmax(yhat, axis=1)
-        # reshape for stacking
-        actual = actual.reshape((len(actual), 1))
-        yhat = yhat.reshape((len(yhat), 1))
-        # store
-        predictions.append(yhat)
-        actuals.append(actual)
-    predictions, actuals = vstack(predictions), vstack(actuals)
-    # calculate accuracy
-    acc = accuracy_score(actuals, predictions)
-    return acc
- 
-# make a class prediction for one row of data
-def predict(row, model):
-    # convert row to data
-    row = Tensor([row])
-    # make prediction
-    yhat = model(row)
-    # retrieve numpy array
-    yhat = yhat.detach().numpy()
-    return yhat
- 
-# prepare the data
-path = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv'
-train_dl, test_dl = prepare_data(path)
-print(len(train_dl.dataset), len(test_dl.dataset))
-# define the network
-model = MLP(4)
-# train the model
-train_model(train_dl, model)
-# evaluate the model
-acc = evaluate_model(test_dl, model)
-print('Accuracy: %.3f' % acc)
-# make a single prediction
-row = [5.1,3.5,1.4,0.2]
-yhat = predict(row, model)
-print('Predicted: %s (class=%d)' % (yhat, argmax(yhat)))
diff --git a/src/pipeline.py b/src/pipeline.py
index 9264895..05ae413 100644
--- a/src/pipeline.py
+++ b/src/pipeline.py
@@ -3,13 +3,13 @@ main class for building a DL pipeline.
 
 """
 
-import click
-from batch import Batch
+from accelerate import Accelerator
+from torch.utils.data import DataLoader
+from torch.optim import AdamW
+from data import GenericDataset
 from model.linear import DNN
-from model.cnn import VGG16, VGG11
-from data import FashionDataset
-from utils import Stage
-import torch
+from runner import Runner
+import click
 
 
 @click.group()
@@ -19,29 +19,42 @@ def cli():
 
 @cli.command()
 def train():
-    batch_size = 16
-    num_workers = 8
 
-    path = "fashion-mnist_train.csv"
-    trainset = FashionDataset(path=path)
+    # Initialize hyperparameters
+    hidden_size = 128
+    epochs = 1000
+    batch_size = 10
+    lr = 0.001
 
-    trainloader = torch.utils.data.DataLoader(
-        trainset, batch_size=batch_size, shuffle=False, num_workers=num_workers
-    )
-    model = VGG11(in_channels=1, num_classes=10)
-    criterion = torch.nn.CrossEntropyLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)
-    batch = Batch(
-        stage=Stage.TRAIN,
+    # Accelerator is in charge of auto casting tensors to the appropriate GPU device
+    accelerator = Accelerator()
+
+    # Initialize the training set and a dataloader to iterate over the dataset
+    train_set = GenericDataset()
+    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
+
+    # Get the size of the input and output vectors from the training set
+    in_features, out_features = train_set.get_in_out_size()
+
+    # Create the model and optimizer and cast model to the appropriate GPU
+    model = DNN(in_features, hidden_size, out_features).to(accelerator.device)
+    optimizer = AdamW(model.parameters(), lr=lr)
+
+    # Create a runner that will handle
+    runner = Runner(
+        train_set=train_set,
+        train_loader=train_loader,
+        accelerator=accelerator,
         model=model,
-        device=torch.device("cpu"),
-        loader=trainloader,
-        criterion=criterion,
         optimizer=optimizer,
     )
-    batch.run(
-        "Run run run run. Run run run away. Oh Oh oH OHHHHHHH yayayayayayayayaya! - David Byrne"
-    )
+
+    # Train the model
+    for _ in range(epochs):
+
+        # Run one loop of training and record the average loss
+        train_stats = runner.next()
+        print(f"{train_stats}")
 
 
 if __name__ == "__main__":
diff --git a/src/runner.py b/src/runner.py
new file mode 100644
index 0000000..088e583
--- /dev/null
+++ b/src/runner.py
@@ -0,0 +1,51 @@
+from torch import nn
+
+
+class Runner:
+    """Runner class that is in charge of implementing routine training functions such as running epochs or doing inference time"""
+
+    def __init__(self, train_set, train_loader, accelerator, model, optimizer):
+
+        # Initialize class attributes
+        self.accelerator = accelerator
+        self.train_set = train_set
+
+        # Prepare opt, model, and train_loader (helps accelerator auto-cast to devices)
+        self.optimizer, self.model, self.train_loader = accelerator.prepare(
+            optimizer, model, train_loader
+        )
+
+        # Since data is for targets, use Mean Squared Error Loss
+        self.criterion = nn.MSELoss()
+
+    def next(self):
+        """Runs an epoch of training.
+
+        Includes updating model weights and tracking training loss
+
+        Returns:
+            float: The loss averaged over the entire epoch
+        """
+
+        # Turn the model to training mode (affects batchnorm and dropout)
+        self.model.train()
+
+        running_loss = 0.0
+
+        # Make sure there are no leftover gradients before starting training an epoch
+        self.optimizer.zero_grad()
+
+        for sample, target in self.train_loader:
+
+            prediction = self.model(sample)  # Forward pass through model
+            loss = self.criterion(prediction, target)  # Error calculation
+            running_loss += loss  # Increment running loss
+            self.accelerator.backward(
+                loss
+            )  # Increment gradients within model by sending loss backwards
+            self.optimizer.step()  # Update model weights
+            self.optimizer.zero_grad()  # Reset gradients to 0
+
+        # Take the average of the loss over each sample
+        avg_loss = running_loss / len(self.train_loader)
+        return avg_loss