add cli. add pdf export. add solutions.

2023-07-28 08:05:36 -07:00 · 2023-07-28 08:05:36 -07:00 · 3b1b38cb13
parent 621fc95755
commit 3b1b38cb13
15 changed files with 215 additions and 75 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 build/
 *.swp
 .venv/
 __pycache__/
--- a/9
+++ b/9
@ -0,0 +1,9 @@
 PYTHON=.venv/bin/python
 SET=neetcode150
 .PHONY: build
 build:
 	$(PYTHON) src build --problem_set $(SET) --out problems --out solutions
 	pandoc build/problems-$(SET).md --pdf-engine=xelatex -o dist/problems-$(SET).pdf
 	pandoc build/solutions-$(SET).md --pdf-engine=xelatex -o dist/solutions-$(SET).pdf
--- a/data/.gitignore
+++ b/data/.gitignore
@ -0,0 +1,3 @@
 *
 */
 !.gitignore
--- a/dist/.gitignore
+++ b/dist/.gitignore
@ -0,0 +1,3 @@
 *
 */
 !.gitignore
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,6 @@
 python-leetcode==1.2.1
 tqdm
 python-dotenv
 click
 markdownify
 pandas
--- a/src/.gitignore
+++ b/src/.gitignore
@ -0,0 +1 @@
 .env
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,12 @@
 import click
 from dotenv import load_dotenv
 import neet
@click.group()
 def cli():
    """run the leetcode export commands"""
 if __name__ == "__main__":
    load_dotenv()
    cli.add_command(neet.build)
    cli()
--- a/src/download.py
+++ b/src/download.py
@ -1,13 +1,10 @@
 import argparse
 import csv
 import functools
 import itertools
 import json
 import logging
 import math
 import os
 import time
 from typing import Callable, List, Tuple, Type
 from pathlib import Path
 # https://github.com/prius/python-leetcode
 import leetcode.api.default_api  # type: ignore
@ -21,6 +18,7 @@ import leetcode.models.graphql_question_detail  # type: ignore
 import urllib3  # type: ignore
 from tqdm import tqdm  # type: ignore
 logging.getLogger().setLevel(logging.INFO)
@ -134,12 +132,12 @@ def _get_problems_data_page(
                title
                titleSlug
                categoryTitle
                frequency
                isPaidOnly
                topicTags {
                  name
                  slug
                }
                difficulty
                content
                companyTagStats
            }
          }
@ -162,34 +160,17 @@ def _get_problems_data_page(
    return data
 def parse_args() -> argparse.Namespace:
    """
    Parse command line arguments for the script
    """
    parser = argparse.ArgumentParser(
        description="Fetch leetcode problems and output them to a CSV file"
    )
    parser.add_argument(
        "--batch-size",
        type=int,
        help="Fetch this many problems at once (set less if leetcode times out)",
        default=300,
    )
    parser.add_argument(
        "--output",
        type=str,
        help="Write output to file",
        default="problems.csv",
    )
    args = parser.parse_args()
    return args
-def _get_problems_data(
+def get_problems(
-    page_size: int,
+    page_size: int = 300,
 ) -> List[leetcode.models.graphql_question_detail.GraphqlQuestionDetail]:
    import pickle
    pickled = Path(os.getenv("LEETCODE_DATA_PATH")) / 'problems.pkl'
    if pickled.exists():
        with open(pickled, 'rb') as f:
            problems = pickle.load(f)
        return problems
    problem_count = _get_problems_count()
    start = 0
@ -207,48 +188,7 @@ def _get_problems_data(
        data = _get_problems_data_page(start, page_size, page)
        problems.extend(data)
    with open(pickled, 'wb') as f:
        out = [p.to_dict() for p in problems]
        pickle.dump(out, f, pickle.HIGHEST_PROTOCOL)
    return problems
 def main() -> None:
    args = parse_args()
    problems_data = _get_problems_data(args.batch_size)
    csv_header = [
        "Question id",
        "title",
        "slug",
        "category",
        "frequency",
        "is_paid",
        "topics",
        "companies",
    ]
    with open(args.output, "w") as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=";")
        csv_writer.writerow(csv_header)
        for problem_data in problems_data:
            csv_writer.writerow(
                [
                    problem_data.question_frontend_id,
                    problem_data.title,
                    problem_data.title_slug,
                    problem_data.category_title,
                    problem_data.frequency,
                    problem_data.is_paid_only,
                    ",".join([d.slug for d in problem_data.topic_tags]),
                    ",".join(
                        {
                            d["slug"]
                            for d in itertools.chain(
                                *json.loads(problem_data.company_tag_stats).values()
                            )
                        }
                    ),
                ]
            )
 if __name__ == "__main__":
    main()
--- a/src/neet.py
+++ b/src/neet.py
@ -0,0 +1,115 @@
 from download import get_problems
 import pandas as pd
 import json
 from pathlib import Path
 from markdownify import markdownify as md
 import click
 import re
 import textwrap
 import os
 import template
 def get_data(series):
    def as_markdown(content):
        content = md(content or "", strip=['strong'])
        long_lines = re.findall(r".*(Explanation.*)", content)
        for l in long_lines:
            content = content.replace(l, textwrap.fill(l, 80))
        return content
    def get_solution(problem_id):
        p = Path(os.getenv("LEETCODE_DATA_PATH")) / 'solutions'
        matches = list(p.glob(f"{int(problem_id):04d}*.py"))
        if not matches:
            return ""
        p = matches[0]
        with open(p, 'r') as f:
            solution = f.read()
        return solution
    open_path = Path(os.getenv('LEETCODE_DATA_PATH')) / 'neetcode.json'
    with open(open_path, 'r') as f:
        neet = json.load(f)
    neet = pd.DataFrame(neet) 
    problems = pd.DataFrame([d.to_dict() for d in get_problems()])
    neet['slug'] = neet.link.str.strip('/')
    del(neet['difficulty'])
    problems = pd.merge(problems, neet, left_on="title_slug", right_on="slug")
    problems['tags'] = problems.topic_tags.apply(lambda t: " | ".join([d.get('slug') for d in t]))
    problems['markdown'] = problems.content.apply(lambda c: as_markdown(c))
    problems.rename(columns={"question_frontend_id": "id"}, inplace=True)
    problems['solution'] = problems.id.apply(lambda i: get_solution(i))
    patterns = [
        "Arrays & Hashing"
        ,"Two Pointers"
        ,"Sliding Window"
        ,"Stack"
        ,"Binary Search"
        ,"Linked List"
        ,"Trees"
        ,"Tries"
        ,"Heap / Priority Queue"
        ,"Backtracking"
        ,"Graphs"
        ,"Advanced Graphs"
        ,"1-D Dynamic Programming"
        ,"2-D Dynamic Programming"
        ,"Greedy"
        ,"Intervals"
        ,"Math & Geometry"
        ,"Bit Manipulation"
    ]
    #patterns = pd.DataFrame(patterns, columns=['category'])
    problems = pd.merge(problems, pd.DataFrame(patterns, columns=['pattern']).reset_index(), on='pattern')
    problems = problems[(problems[series] == True) & (problems.premium != True)]
    return problems
 def save_solutions(data, save_to: Path, problem_set):
    out = template.get('main_title.md').render({"problem_set": problem_set})
    keys = ['title', 'slug', 'id', 'solution', 'pattern', 'difficulty', 'tags', 'index']
    number = 0
    total = len(data)
    grouped = data[keys].groupby(['index', 'pattern', 'difficulty'])
    for i, ((pattern_id, pattern, difficulty), group) in enumerate(grouped):
        out += template.get('pattern_title.md').render({"pattern": pattern, "difficulty":difficulty})
        page = template.get("solution.md")
        for j, problem in group.iterrows():
            number += 1
            problem = problem.to_dict()
            problem["total"] = total
            problem["number"] = number
            out += page.render(problem)
    with open(save_to, "w") as f:
        f.write(out)
 def save_problems(problems, save_to: Path, problem_set):
    keys = ['title', 'slug', 'id', 'markdown', 'pattern', 'difficulty', 'index']
    out = template.get('main_title.md').render({"problem_set": problem_set})
    number = 0
    grouped = problems[keys].groupby(['index', 'pattern', 'difficulty'])
    for i, ((pattern_id, pattern, difficulty), group) in enumerate(grouped):
        out += template.get('pattern_title.md').render({"pattern": pattern, "difficulty":difficulty})
        page = template.get("solution.md")
        for j, problem in group.iterrows():
            number += 1
            problem = problem.to_dict()
            problem["total"] = total
            problem["number"] = number
            out += page.render(problem)
    with open(save_to, "w") as f:
        f.write(out)
@click.command("build")
@click.option('-o', '--out', type=click.Choice(['problems', 'solutions']), multiple=True, default=['problems'])
@click.option('--problem_set', default="neetcode150", type=click.Choice(['neetcode150', 'blind75', 'all']))
 def build(out, problem_set):
    data = get_data(problem_set)
    if 'problems' in out:
        save_to = Path(os.getenv("LEETCODE_BUILD_PATH")) / f'problems-{problem_set}.md'
        save_problems(data, save_to, problem_set)
    if 'solutions' in out:
        save_to = Path(os.getenv("LEETCODE_BUILD_PATH")) / f'solutions-{problem_set}.md'
        save_solutions(data, save_to, problem_set)
        print(f"saved: {save_to.absolute()}")
--- a/src/template.py
+++ b/src/template.py
@ -0,0 +1,11 @@
 import jinja2
 from pathlib import Path
 import os
 def get(name):
    templates_path =Path(os.getenv('LEETCODE_APP_PATH')) / 'templates/'
    templateLoader = jinja2.FileSystemLoader(searchpath=templates_path)
    templateEnv = jinja2.Environment(loader=templateLoader)
    template = templateEnv.get_template(name)
    return template
--- a/src/templates/main_title.md
+++ b/src/templates/main_title.md
@ -0,0 +1,9 @@
 ---
 geometry: margin=2cm
 output: pdf_document
 ---
 # LeetCode - {{problem_set}}
 \newpage
--- a/src/templates/pattern_title.md
+++ b/src/templates/pattern_title.md
@ -0,0 +1,5 @@
 # {{pattern}} - {{difficulty}}
 \newpage
--- a/src/templates/problem.md
+++ b/src/templates/problem.md
@ -0,0 +1,11 @@
 ## {{id}} - {{title}} ({{number}}/{{len(total)}})
 *[https://leetcode.com/problems/{{slug}}](https://leetcode.com/problems/{{slug}})*
 ---
 {{markdown}}
 \newpage
--- a/src/templates/solution.md
+++ b/src/templates/solution.md
@ -0,0 +1,13 @@
 ## {{id}} - {{title}} ({{number}}/{{total}})
 *[https://leetcode.com/problems/{{slug}}](https://leetcode.com/problems/{{slug}})*
 ---
 ```
 {{solution}}
 ```
 \newpage
--- a/src/templates/solution/title.md
+++ b/src/templates/solution/title.md