add cli. add pdf export. add solutions.

This commit is contained in:
matt 2023-07-28 08:05:36 -07:00
parent 621fc95755
commit 3b1b38cb13
15 changed files with 215 additions and 75 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
build/
*.swp
.venv/
__pycache__/

9
Makefile Normal file
View File

@ -0,0 +1,9 @@
PYTHON=.venv/bin/python
SET=neetcode150
.PHONY: build
build:
$(PYTHON) src build --problem_set $(SET) --out problems --out solutions
pandoc build/problems-$(SET).md --pdf-engine=xelatex -o dist/problems-$(SET).pdf
pandoc build/solutions-$(SET).md --pdf-engine=xelatex -o dist/solutions-$(SET).pdf

3
data/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*
*/
!.gitignore

3
dist/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*
*/
!.gitignore

View File

@ -1,2 +1,6 @@
python-leetcode==1.2.1 python-leetcode==1.2.1
tqdm tqdm
python-dotenv
click
markdownify
pandas

1
src/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.env

12
src/__main__.py Normal file
View File

@ -0,0 +1,12 @@
import click
from dotenv import load_dotenv
import neet
@click.group()
def cli():
"""run the leetcode export commands"""
if __name__ == "__main__":
load_dotenv()
cli.add_command(neet.build)
cli()

View File

@ -1,13 +1,10 @@
import argparse
import csv
import functools import functools
import itertools
import json
import logging import logging
import math import math
import os import os
import time import time
from typing import Callable, List, Tuple, Type from typing import Callable, List, Tuple, Type
from pathlib import Path
# https://github.com/prius/python-leetcode # https://github.com/prius/python-leetcode
import leetcode.api.default_api # type: ignore import leetcode.api.default_api # type: ignore
@ -21,6 +18,7 @@ import leetcode.models.graphql_question_detail # type: ignore
import urllib3 # type: ignore import urllib3 # type: ignore
from tqdm import tqdm # type: ignore from tqdm import tqdm # type: ignore
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
@ -134,12 +132,12 @@ def _get_problems_data_page(
title title
titleSlug titleSlug
categoryTitle categoryTitle
frequency
isPaidOnly
topicTags { topicTags {
name name
slug slug
} }
difficulty
content
companyTagStats companyTagStats
} }
} }
@ -162,34 +160,17 @@ def _get_problems_data_page(
return data return data
def parse_args() -> argparse.Namespace:
"""
Parse command line arguments for the script
"""
parser = argparse.ArgumentParser(
description="Fetch leetcode problems and output them to a CSV file"
)
parser.add_argument(
"--batch-size",
type=int,
help="Fetch this many problems at once (set less if leetcode times out)",
default=300,
)
parser.add_argument(
"--output",
type=str,
help="Write output to file",
default="problems.csv",
)
args = parser.parse_args()
return args
def _get_problems_data( def get_problems(
page_size: int, page_size: int = 300,
) -> List[leetcode.models.graphql_question_detail.GraphqlQuestionDetail]: ) -> List[leetcode.models.graphql_question_detail.GraphqlQuestionDetail]:
import pickle
pickled = Path(os.getenv("LEETCODE_DATA_PATH")) / 'problems.pkl'
if pickled.exists():
with open(pickled, 'rb') as f:
problems = pickle.load(f)
return problems
problem_count = _get_problems_count() problem_count = _get_problems_count()
start = 0 start = 0
@ -207,48 +188,7 @@ def _get_problems_data(
data = _get_problems_data_page(start, page_size, page) data = _get_problems_data_page(start, page_size, page)
problems.extend(data) problems.extend(data)
with open(pickled, 'wb') as f:
out = [p.to_dict() for p in problems]
pickle.dump(out, f, pickle.HIGHEST_PROTOCOL)
return problems return problems
def main() -> None:
args = parse_args()
problems_data = _get_problems_data(args.batch_size)
csv_header = [
"Question id",
"title",
"slug",
"category",
"frequency",
"is_paid",
"topics",
"companies",
]
with open(args.output, "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter=";")
csv_writer.writerow(csv_header)
for problem_data in problems_data:
csv_writer.writerow(
[
problem_data.question_frontend_id,
problem_data.title,
problem_data.title_slug,
problem_data.category_title,
problem_data.frequency,
problem_data.is_paid_only,
",".join([d.slug for d in problem_data.topic_tags]),
",".join(
{
d["slug"]
for d in itertools.chain(
*json.loads(problem_data.company_tag_stats).values()
)
}
),
]
)
if __name__ == "__main__":
main()

115
src/neet.py Normal file
View File

@ -0,0 +1,115 @@
from download import get_problems
import pandas as pd
import json
from pathlib import Path
from markdownify import markdownify as md
import click
import re
import textwrap
import os
import template
def get_data(series):
def as_markdown(content):
content = md(content or "", strip=['strong'])
long_lines = re.findall(r".*(Explanation.*)", content)
for l in long_lines:
content = content.replace(l, textwrap.fill(l, 80))
return content
def get_solution(problem_id):
p = Path(os.getenv("LEETCODE_DATA_PATH")) / 'solutions'
matches = list(p.glob(f"{int(problem_id):04d}*.py"))
if not matches:
return ""
p = matches[0]
with open(p, 'r') as f:
solution = f.read()
return solution
open_path = Path(os.getenv('LEETCODE_DATA_PATH')) / 'neetcode.json'
with open(open_path, 'r') as f:
neet = json.load(f)
neet = pd.DataFrame(neet)
problems = pd.DataFrame([d.to_dict() for d in get_problems()])
neet['slug'] = neet.link.str.strip('/')
del(neet['difficulty'])
problems = pd.merge(problems, neet, left_on="title_slug", right_on="slug")
problems['tags'] = problems.topic_tags.apply(lambda t: " | ".join([d.get('slug') for d in t]))
problems['markdown'] = problems.content.apply(lambda c: as_markdown(c))
problems.rename(columns={"question_frontend_id": "id"}, inplace=True)
problems['solution'] = problems.id.apply(lambda i: get_solution(i))
patterns = [
"Arrays & Hashing"
,"Two Pointers"
,"Sliding Window"
,"Stack"
,"Binary Search"
,"Linked List"
,"Trees"
,"Tries"
,"Heap / Priority Queue"
,"Backtracking"
,"Graphs"
,"Advanced Graphs"
,"1-D Dynamic Programming"
,"2-D Dynamic Programming"
,"Greedy"
,"Intervals"
,"Math & Geometry"
,"Bit Manipulation"
]
#patterns = pd.DataFrame(patterns, columns=['category'])
problems = pd.merge(problems, pd.DataFrame(patterns, columns=['pattern']).reset_index(), on='pattern')
problems = problems[(problems[series] == True) & (problems.premium != True)]
return problems
def save_solutions(data, save_to: Path, problem_set):
out = template.get('main_title.md').render({"problem_set": problem_set})
keys = ['title', 'slug', 'id', 'solution', 'pattern', 'difficulty', 'tags', 'index']
number = 0
total = len(data)
grouped = data[keys].groupby(['index', 'pattern', 'difficulty'])
for i, ((pattern_id, pattern, difficulty), group) in enumerate(grouped):
out += template.get('pattern_title.md').render({"pattern": pattern, "difficulty":difficulty})
page = template.get("solution.md")
for j, problem in group.iterrows():
number += 1
problem = problem.to_dict()
problem["total"] = total
problem["number"] = number
out += page.render(problem)
with open(save_to, "w") as f:
f.write(out)
def save_problems(problems, save_to: Path, problem_set):
keys = ['title', 'slug', 'id', 'markdown', 'pattern', 'difficulty', 'index']
out = template.get('main_title.md').render({"problem_set": problem_set})
number = 0
grouped = problems[keys].groupby(['index', 'pattern', 'difficulty'])
for i, ((pattern_id, pattern, difficulty), group) in enumerate(grouped):
out += template.get('pattern_title.md').render({"pattern": pattern, "difficulty":difficulty})
page = template.get("solution.md")
for j, problem in group.iterrows():
number += 1
problem = problem.to_dict()
problem["total"] = total
problem["number"] = number
out += page.render(problem)
with open(save_to, "w") as f:
f.write(out)
@click.command("build")
@click.option('-o', '--out', type=click.Choice(['problems', 'solutions']), multiple=True, default=['problems'])
@click.option('--problem_set', default="neetcode150", type=click.Choice(['neetcode150', 'blind75', 'all']))
def build(out, problem_set):
data = get_data(problem_set)
if 'problems' in out:
save_to = Path(os.getenv("LEETCODE_BUILD_PATH")) / f'problems-{problem_set}.md'
save_problems(data, save_to, problem_set)
if 'solutions' in out:
save_to = Path(os.getenv("LEETCODE_BUILD_PATH")) / f'solutions-{problem_set}.md'
save_solutions(data, save_to, problem_set)
print(f"saved: {save_to.absolute()}")

11
src/template.py Normal file
View File

@ -0,0 +1,11 @@
import jinja2
from pathlib import Path
import os
def get(name):
templates_path =Path(os.getenv('LEETCODE_APP_PATH')) / 'templates/'
templateLoader = jinja2.FileSystemLoader(searchpath=templates_path)
templateEnv = jinja2.Environment(loader=templateLoader)
template = templateEnv.get_template(name)
return template

View File

@ -0,0 +1,9 @@
---
geometry: margin=2cm
output: pdf_document
---
# LeetCode - {{problem_set}}
\newpage

View File

@ -0,0 +1,5 @@
# {{pattern}} - {{difficulty}}
\newpage

11
src/templates/problem.md Normal file
View File

@ -0,0 +1,11 @@
## {{id}} - {{title}} ({{number}}/{{len(total)}})
*[https://leetcode.com/problems/{{slug}}](https://leetcode.com/problems/{{slug}})*
---
{{markdown}}
\newpage

13
src/templates/solution.md Normal file
View File

@ -0,0 +1,13 @@
## {{id}} - {{title}} ({{number}}/{{total}})
*[https://leetcode.com/problems/{{slug}}](https://leetcode.com/problems/{{slug}})*
---
```
{{solution}}
```
\newpage

View File