add hydra config.

remove click.
add launch script.
add test dir.
switch from fashion mnist to generic.
This commit is contained in:
Matt
2023-01-26 07:25:07 -08:00
parent 404e39206b
commit 1f13224c4f
11 changed files with 123 additions and 43 deletions

View File

@@ -2,21 +2,24 @@ import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from data import FashionDataset
from data import MnistDataset
from tqdm import tqdm
from utils import Stage
from omegaconf import DictConfig
class Batch:
def __init__(
self,
stage: Stage,
model: nn.Module, device,
model: nn.Module,
device,
loader: DataLoader,
optimizer: optim.Optimizer,
criterion: nn.Module,
config: DictConfig = None,
):
"""todo"""
self.config = config
self.stage = stage
self.device = device
self.model = model.to(device)
@@ -26,7 +29,11 @@ class Batch:
self.loss = 0
def run(self, desc):
self.model.train()
# set the model to train model
if self.stage == Stage.TRAIN:
self.model.train()
if self.config.debug:
breakpoint()
epoch = 0
for epoch, (x, y) in enumerate(tqdm(self.loader, desc=desc)):
self.optimizer.zero_grad()
@@ -34,6 +41,7 @@ class Batch:
loss.backward() # Send loss backwards to accumulate gradients
self.optimizer.step() # Perform a gradient update on the weights of the mode
self.loss += loss.item()
return self.loss
def _run_batch(self, sample):
true_x, true_y = sample
@@ -47,8 +55,8 @@ def main():
model = nn.Conv2d(1, 64, 3)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)
path = "fashion-mnist_train.csv"
dataset = FashionDataset(path)
path = "mnist_train.csv"
dataset = MnistDataset(path)
batch_size = 16
num_workers = 1
loader = torch.utils.data.DataLoader(

6
src/collate.py Normal file
View File

@@ -0,0 +1,6 @@
from einops import rearrange
def channel_to_batch(batch):
"""TODO"""
return batch

6
src/config/main.yaml Normal file
View File

@@ -0,0 +1,6 @@
app_dir: ${hydra:runtime.cwd}
debug: true
lr: 2e-4
batch_size: 16
num_workers: 0
device: "cpu"

View File

@@ -3,51 +3,69 @@ import numpy as np
import einops
import csv
import torch
from pathlib import Path
from typing import Tuple
class FashionDataset(Dataset):
def __init__(self, path: str):
class MnistDataset(Dataset):
"""
The MNIST database of handwritten digits.
Training set is 60k labeled examples, test is 10k examples.
The b/w images normalized to 20x20, preserving aspect ratio.
It's the defacto standard image training set to learn about classification in DL
"""
def __init__(self, path: Path):
"""
give a path to a dir that contains the following csv files:
https://pjreddie.com/projects/mnist-in-csv/
"""
self.path = path
self.x, self.y = self.load()
self.features, self.labels = self.load()
def __getitem__(self, idx):
return (self.x[idx], self.y[idx])
return (self.features[idx], self.labels[idx])
def __len__(self):
return len(self.x)
return len(self.features)
def load(self):
def load(self) -> Tuple[torch.Tensor, torch.Tensor]:
# opening the CSV file
with open(self.path, mode="r") as file:
images = list()
classes = list()
labels = list()
# reading the CSV file
csvFile = csv.reader(file)
# displaying the contents of the CSV file
header = next(csvFile)
# header = next(csvFile)
limit = 1000
for line in csvFile:
if limit < 1:
break
classes.append(int(line[:1][0]))
images.append([int(x) for x in line[1:]])
label = int(line[0])
labels.append(label)
image = [int(x) for x in line[1:]]
images.append(image)
limit -= 1
classes = torch.tensor(classes, dtype=torch.long)
labels = torch.tensor(labels, dtype=torch.long)
images = torch.tensor(images, dtype=torch.float32)
images = einops.rearrange(images, "n (w h) -> n w h", w=28, h=28)
images = einops.repeat(
images, "n w h -> n c (w r_w) (h r_h)", c=1, r_w=8, r_h=8
)
return (images, classes)
return (images, labels)
def main():
path = "fashion-mnist_train.csv"
dataset = FashionDataset(path=path)
path = "storage/mnist_train.csv"
dataset = MnistDataset(path=path)
print(f"len: {len(dataset)}")
print(f"first shape: {dataset[0][0].shape}")
mean = einops.reduce(dataset[:10], "n w h -> w h", "mean")
mean = einops.reduce(dataset[:10][0], "n w h -> w h", "mean")
print(f"mean shape: {mean.shape}")
print(f"mean image: {mean}")
if __name__ == "__main__":

View File

@@ -3,46 +3,54 @@ main class for building a DL pipeline.
"""
import click
from batch import Batch
from model.linear import DNN
from model.cnn import VGG16, VGG11
from data import FashionDataset
from data import MnistDataset
from utils import Stage
import torch
from pathlib import Path
from collate import channel_to_batch
import hydra
from omegaconf import DictConfig
@click.group()
def cli():
pass
@hydra.main(config_path="config", config_name="main")
def train(config: DictConfig):
if config.debug:
breakpoint()
lr = config.lr
batch_size = config.batch_size
num_workers = config.num_workers
device = config.device
@cli.command()
def train():
batch_size = 16
num_workers = 8
path = "fashion-mnist_train.csv"
trainset = FashionDataset(path=path)
path = Path(config.app_dir) / "storage/mnist_train.csv"
trainset = MnistDataset(path=path)
trainloader = torch.utils.data.DataLoader(
trainset, batch_size=batch_size, shuffle=False, num_workers=num_workers
trainset,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
# collate_fn=channel_to_batch,
)
model = VGG11(in_channels=1, num_classes=10)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
batch = Batch(
stage=Stage.TRAIN,
model=model,
device=torch.device("cpu"),
device=torch.device(device),
loader=trainloader,
criterion=criterion,
optimizer=optimizer,
config=config,
)
batch.run(
log = batch.run(
"Run run run run. Run run run away. Oh Oh oH OHHHHHHH yayayayayayayayaya! - David Byrne"
)
if __name__ == "__main__":
cli()
train()