fix project_slug error.

replace ml_pipeline with project_name or module_name from config
This commit is contained in:
publicmatt 2024-04-06 15:48:29 -07:00
parent 727f16df57
commit 6eed08d1ba
43 changed files with 81 additions and 95 deletions

View File

@ -4,5 +4,8 @@
"module_name": "{{ cookiecutter.repo_name }}", "module_name": "{{ cookiecutter.repo_name }}",
"author_name": "Your name (or your organization/company/team)", "author_name": "Your name (or your organization/company/team)",
"description": "A short description of the project.", "description": "A short description of the project.",
"open_source_license": ["MIT", "BSD-3-Clause", "No license file"] "open_source_license": ["MIT", "BSD-3-Clause", "No license file"],
"_copy_without_render": [
"docs/book"
]
} }

View File

@ -0,0 +1,7 @@
MODEL__IN_CHANNELS=1
MODEL__NUM_CLASSES=10
DATA__TRAIN_PATH=/path/to/{{cookiecutter.project_name}}/data/mnist_train.csv
DATA__TEST_PATH=/path/to/{{cookiecutter.project_name}}/data/mnist_test.csv
PATHS__APP=/path/to/{{cookiecutter.project_name}}/{{cookiecutter.module_name}}/
PATHS__ROOT=/path/to/{{cookiecutter.project_name}}/
PATHS__DATA=/path/to/{{cookiecutter.project_name}}/data/

View File

@ -1,30 +1,36 @@
APP_NAME=ml_pipeline APP_NAME={{cookiecutter.module_name}}
PYTHON=.venv/bin/python3 PYTHON=.venv/bin/python3
INTERPRETER=/usr/bin/python3 INTERPRETER=/usr/bin/python3
.PHONY: help test .PHONY: help test
all: run all: help
init: ## create a venv init: ## create a venv
$(INTERPRETER) -m venv .venv $(INTERPRETER) -m venv .venv
cp .env.example .env
run: ## run the pipeline (train)
$(PYTHON) -m $(APP_NAME) pipeline:train
data: ## download the mnist data
$(PYTHON) -m $(APP_NAME) data:download
# wget https://pjreddie.com/media/files/mnist_train.csv -O data/mnist_train.csv
# wget https://pjreddie.com/media/files/mnist_test.csv -O data/mnist_test.csv
test:
find . -iname "*.py" | entr -c pytest
serve:
$(PYTHON) -m $(APP_NAME) app:serve
install: install:
$(PYTHON) -m pip install -r requirements.txt $(PYTHON) -m pip install -r requirements.txt
data: ## download the mnist data
$(PYTHON) -m $(APP_NAME) data:download
run: ## run the pipeline (train)
$(PYTHON) -m $(APP_NAME) pipeline:train
serve: ## start fastapi uvicorn server
$(PYTHON) -m $(APP_NAME) app:serve
docs: ## serve the mdbook docs directory
mdbook serve docs
test: ## run pytest tests
$(PYTHON) -m pytest
test-watch: ## run pytest on .py changes
find . -iname "*.py" | entr -c $(PYTHON) -m pytest
help: ## display this help message help: ## display this help message
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' @echo "available commands:"
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36mmake %-30s\033[0m %s\n", $$1, $$2}'

View File

@ -3,4 +3,4 @@ authors = ["publicmatt"]
language = "en" language = "en"
multilingual = false multilingual = false
src = "src" src = "src"
title = "ml_pipeline" title = "{{cookiecutter.project_name}}"

View File

@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[project] [project]
name = "ml_pipeline" name = "{{cookiecutter.project_name}}"
version = "0.1.0" version = "0.1.0"
authors = [ authors = [
{name = "publicmatt", email = "git@publicmatt.com"}, {name = "publicmatt", email = "git@publicmatt.com"},
@ -18,7 +18,6 @@ dependencies = [
"numpy==1.26.4", "numpy==1.26.4",
"pytest==8.1.1", "pytest==8.1.1",
"pytest-cov==5.0.0", "pytest-cov==5.0.0",
"python-dotenv==1.0.1",
"requests==2.31.0", "requests==2.31.0",
"torch==2.2.2", "torch==2.2.2",
"torchvision=0.17.2", "torchvision=0.17.2",
@ -37,11 +36,11 @@ repository = "https://example.com/my_project/repo"
documentation = "https://example.com/my_project/docs" documentation = "https://example.com/my_project/docs"
[tool.setuptools] [tool.setuptools]
packages = ["ml_pipeline"] packages = ["{{cookiecutter.module_name}}"]
[tool.pytest.ini_options] [tool.pytest.ini_options]
# Run tests in parallel using pytest-xdist # Run tests in parallel using pytest-xdist
addopts = "--cov=ml_pipeline --cov-report=term" addopts = "--cov={{cookiecutter.module_name}} --cov-report=term"
# Specify the paths to look for tests # Specify the paths to look for tests
testpaths = [ testpaths = [
"test", "test",

View File

@ -0,0 +1,6 @@
from {{cookiecutter.module_name}} import config
from {{cookiecutter.module_name}}.model.cnn import VGG11
def test_in_channels():
assert config.model.name == 'vgg11'

View File

@ -1,5 +1,5 @@
from ml_pipeline.data.dataset import MnistDataset from {{cookiecutter.module_name}}.data.dataset import MnistDataset
from ml_pipeline import config from {{cookiecutter.module_name}} import config
from pathlib import Path from pathlib import Path
import pytest import pytest

View File

@ -5,7 +5,7 @@ pwd = Path(__file__).parent
config_path = pwd / 'config' config_path = pwd / 'config'
root_path = pwd.parent root_path = pwd.parent
config = ConfigurationSet( config = ConfigurationSet(
config_from_env(prefix="ML_PIPELINE", separator="__", lowercase_keys=True), config_from_env(prefix="{{cookiecutter.module_name.upper()}}", separator="__", lowercase_keys=True),
config_from_dotenv(root_path / ".env", read_from_file=True, lowercase_keys=True, interpolate=True, interpolate_type=1), config_from_dotenv(root_path / ".env", read_from_file=True, lowercase_keys=True, interpolate=True, interpolate_type=1),
config_from_toml(config_path / "training.toml", read_from_file=True), config_from_toml(config_path / "training.toml", read_from_file=True),
config_from_toml(config_path / "data.toml", read_from_file=True), config_from_toml(config_path / "data.toml", read_from_file=True),

View File

@ -0,0 +1,5 @@
from {{cookiecutter.module_name}}.cli import cli
if __name__ == "__main__":
cli()

View File

@ -0,0 +1,11 @@
from {{cookiecutter.module_name}} import config
from fastapi import FastAPI, Response
import logging
import uvicorn
app = FastAPI()
logger = logging.getLogger(__name__)
def run():
uvicorn.run("{{cookiecutter.module_name}}.app:app", host=config.app.host, port=config.app.port, proxy_headers=True)

View File

@ -4,38 +4,38 @@ import click
@click.version_option() @click.version_option()
def cli(): def cli():
""" """
ml_pipeline: a template for building, training and running pytorch models. build, train and run machine learning models.
""" """
@cli.command("pipeline:train") @cli.command("pipeline:train")
def pipeline_train(): def pipeline_train():
"""run the training pipeline with train data""" """run the training pipeline with train data"""
from ml_pipeline.training import pipeline from {{cookiecutter.module_name}}.training import pipeline
pipeline.run(evaluate=False) pipeline.run(evaluate=False)
@cli.command("pipeline:evaluate") @cli.command("pipeline:evaluate")
def pipeline_evaluate(): def pipeline_evaluate():
"""run the training pipeline with test data""" """run the training pipeline with test data"""
from ml_pipeline.training import pipeline from {{cookiecutter.module_name}}.training import pipeline
pipeline.run(evaluate=True) pipeline.run(evaluate=True)
@cli.command("app:serve") @cli.command("app:serve")
def app_serve(): def app_serve():
"""run the api server pipeline with pretrained model""" """run the api server pipeline with pretrained model"""
from ml_pipeline import app from {{cookiecutter.module_name}} import app
app.run() app.run()
@cli.command("data:download") @cli.command("data:download")
def data_download(): def data_download():
"""download the train and test data""" """download the train and test data"""
from ml_pipeline import data from {{cookiecutter.module_name}} import data
from ml_pipeline import config from {{cookiecutter.module_name}} import config
from pathlib import Path from pathlib import Path
data.download(Path(config.paths.data)) data.download(Path(config.paths.data))
@cli.command("data:debug") @cli.command("data:debug")
def data_debug(): def data_debug():
"""debug the dataset class""" """debug the dataset class"""
from ml_pipeline.data import dataset from {{cookiecutter.module_name}}.data import dataset
dataset.debug() dataset.debug()

View File

@ -1,4 +1,4 @@
[paths] [paths]
repo = "/path/to/root" repo = "/path/to/root"
app = "/path/to/root/ml_pipeline" app = "/path/to/root/{{cookiecutter.module_name}}"
data = "/path/to/root/data" data = "/path/to/root/data"

View File

@ -1,7 +1,7 @@
from pathlib import Path from pathlib import Path
import requests import requests
import logging import logging
from ml_pipeline import config from {{cookiecutter.module_name}} import config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -5,7 +5,7 @@ import csv
import torch import torch
from pathlib import Path from pathlib import Path
from typing import Tuple from typing import Tuple
from ml_pipeline import config, logger from {{cookiecutter.module_name}} import config, logger
class MnistDataset(Dataset): class MnistDataset(Dataset):

View File

@ -8,7 +8,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Now you can import your package\n", "# Now you can import your package\n",
"import ml_pipeline" "import {{cookiecutter.module_name}}"
] ]
}, },
{ {
@ -18,7 +18,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from ml_pipeline.data.dataset import MnistDataset" "from {{cookiecutter.module_name}}.data.dataset import MnistDataset"
] ]
}, },
{ {
@ -28,38 +28,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from ml_pipeline import config" "from {{cookiecutter.module_name}} import config"
] ]
}, },
{
"cell_type": "code",
"execution_count": 5,
"id": "c8ce7920-c056-44ac-93df-b25bae870592",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<ConfigurationSet: 0x7fcf70fc1a50>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"config"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "83293ef7-37b3-452f-8de5-13bee633d099",
"metadata": {},
"outputs": [],
"source": []
}
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {

View File

@ -1,8 +1,8 @@
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torch.optim import AdamW from torch.optim import AdamW
from ml_pipeline.training.runner import Runner from {{cookiecutter.module_name}}.training.runner import Runner
from ml_pipeline import config, logger from {{cookiecutter.module_name}} import config, logger
def run(evaluate=False): def run(evaluate=False):
@ -30,8 +30,8 @@ def run(evaluate=False):
logger.info(f"{step}") logger.info(f"{step}")
def get_model(name='vgg11'): def get_model(name='vgg11'):
from ml_pipeline.model.linear import DNN from {{cookiecutter.module_name}}.model.linear import DNN
from ml_pipeline.model.cnn import VGG11 from {{cookiecutter.module_name}}.model.cnn import VGG11
if name == 'vgg11': if name == 'vgg11':
return VGG11(config.data.in_channels, config.data.num_classes) return VGG11(config.data.in_channels, config.data.num_classes)
else: else:
@ -43,7 +43,7 @@ def get_model(name='vgg11'):
def get_dataset(evaluate=False): def get_dataset(evaluate=False):
# Usage # Usage
from ml_pipeline.data.dataset import MnistDataset from {{cookiecutter.module_name}}.data.dataset import MnistDataset
from torchvision import transforms from torchvision import transforms
csv_file_path = config.data.train_path if not evaluate else config.data.test_path csv_file_path = config.data.train_path if not evaluate else config.data.test_path
transform = transforms.Compose([ transform = transforms.Compose([

View File

@ -1,6 +0,0 @@
from ml_pipeline import config
from ml_pipeline.model.cnn import VGG11
def test_in_channels():
assert config.model.name == 'vgg11'

View File

@ -1,5 +0,0 @@
from ml_pipeline.cli import cli
if __name__ == "__main__":
cli()

View File

@ -1,11 +0,0 @@
from ml_pipeline import config
from fastapi import FastAPI, Response
import logging
import uvicorn
app = FastAPI()
logger = logging.getLogger(__name__)
def run():
uvicorn.run("ml_pipeline.app:app", host=config.app.host, port=config.app.port, proxy_headers=True)