add cli. add pdf export. add solutions.

2023-07-28 08:05:36 -07:00 · 2023-07-28 08:05:36 -07:00 · 3b1b38cb13
parent 621fc95755
commit 3b1b38cb13
15 changed files with 215 additions and 75 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+build/
+*.swp
+.venv/
+__pycache__/
--- a/9
+++ b/9
@ -0,0 +1,9 @@
+PYTHON=.venv/bin/python
+SET=neetcode150
+
+.PHONY: build
+
+build:
+	$(PYTHON) src build --problem_set $(SET) --out problems --out solutions
+	pandoc build/problems-$(SET).md --pdf-engine=xelatex -o dist/problems-$(SET).pdf
+	pandoc build/solutions-$(SET).md --pdf-engine=xelatex -o dist/solutions-$(SET).pdf
--- a/data/.gitignore
+++ b/data/.gitignore
@ -0,0 +1,3 @@
+*
+*/
+!.gitignore
--- a/dist/.gitignore
+++ b/dist/.gitignore
@ -0,0 +1,3 @@
+*
+*/
+!.gitignore
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,6 @@
 python-leetcode==1.2.1
 tqdm
+python-dotenv
+click
+markdownify
+pandas
--- a/src/.gitignore
+++ b/src/.gitignore
@ -0,0 +1 @@
+.env
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,12 @@
+import click
+from dotenv import load_dotenv
+import neet
+
+@click.group()
+def cli():
+    """run the leetcode export commands"""
+
+if __name__ == "__main__":
+    load_dotenv()
+    cli.add_command(neet.build)
+    cli()
--- a/src/download.py
+++ b/src/download.py
@ -1,13 +1,10 @@
-import argparse
-import csv
 import functools
-import itertools
-import json
 import logging
 import math
 import os
 import time
 from typing import Callable, List, Tuple, Type
+from pathlib import Path

 # https://github.com/prius/python-leetcode
 import leetcode.api.default_api  # type: ignore
@ -21,6 +18,7 @@ import leetcode.models.graphql_question_detail  # type: ignore
 import urllib3  # type: ignore
 from tqdm import tqdm  # type: ignore

+
 logging.getLogger().setLevel(logging.INFO)


@ -134,12 +132,12 @@ def _get_problems_data_page(
                title
                titleSlug
                categoryTitle
-                frequency
-                isPaidOnly
                topicTags {
                  name
                  slug
                }
+                difficulty
+                content
                companyTagStats
            }
          }
@ -162,34 +160,17 @@ def _get_problems_data_page(
    return data


-def parse_args() -> argparse.Namespace:
-    """
-    Parse command line arguments for the script
-    """
-    parser = argparse.ArgumentParser(
-        description="Fetch leetcode problems and output them to a CSV file"
-    )
-    parser.add_argument(
-        "--batch-size",
-        type=int,
-        help="Fetch this many problems at once (set less if leetcode times out)",
-        default=300,
-    )
-    parser.add_argument(
-        "--output",
-        type=str,
-        help="Write output to file",
-        default="problems.csv",
-    )
-
-    args = parser.parse_args()
-
-    return args


-def _get_problems_data(
-    page_size: int,
+def get_problems(
+    page_size: int = 300,
 ) -> List[leetcode.models.graphql_question_detail.GraphqlQuestionDetail]:
+    import pickle
+    pickled = Path(os.getenv("LEETCODE_DATA_PATH")) / 'problems.pkl'
+    if pickled.exists():
+        with open(pickled, 'rb') as f:
+            problems = pickle.load(f)
+        return problems
    problem_count = _get_problems_count()

    start = 0
@ -207,48 +188,7 @@ def _get_problems_data(
        data = _get_problems_data_page(start, page_size, page)
        problems.extend(data)

+    with open(pickled, 'wb') as f:
+        out = [p.to_dict() for p in problems]
+        pickle.dump(out, f, pickle.HIGHEST_PROTOCOL)
    return problems
-
-
-def main() -> None:
-    args = parse_args()
-    problems_data = _get_problems_data(args.batch_size)
-
-    csv_header = [
-        "Question id",
-        "title",
-        "slug",
-        "category",
-        "frequency",
-        "is_paid",
-        "topics",
-        "companies",
-    ]
-    with open(args.output, "w") as csv_file:
-        csv_writer = csv.writer(csv_file, delimiter=";")
-        csv_writer.writerow(csv_header)
-
-        for problem_data in problems_data:
-            csv_writer.writerow(
-                [
-                    problem_data.question_frontend_id,
-                    problem_data.title,
-                    problem_data.title_slug,
-                    problem_data.category_title,
-                    problem_data.frequency,
-                    problem_data.is_paid_only,
-                    ",".join([d.slug for d in problem_data.topic_tags]),
-                    ",".join(
-                        {
-                            d["slug"]
-                            for d in itertools.chain(
-                                *json.loads(problem_data.company_tag_stats).values()
-                            )
-                        }
-                    ),
-                ]
-            )
-
-
-if __name__ == "__main__":
-    main()
--- a/src/neet.py
+++ b/src/neet.py
@ -0,0 +1,115 @@
+from download import get_problems
+import pandas as pd
+import json
+from pathlib import Path
+from markdownify import markdownify as md
+import click
+import re
+import textwrap
+import os
+import template
+
+def get_data(series):
+
+    def as_markdown(content):
+        content = md(content or "", strip=['strong'])
+        long_lines = re.findall(r".*(Explanation.*)", content)
+        for l in long_lines:
+            content = content.replace(l, textwrap.fill(l, 80))
+        return content
+
+    def get_solution(problem_id):
+        p = Path(os.getenv("LEETCODE_DATA_PATH")) / 'solutions'
+        matches = list(p.glob(f"{int(problem_id):04d}*.py"))
+        if not matches:
+            return ""
+        p = matches[0]
+        with open(p, 'r') as f:
+            solution = f.read()
+        return solution
+
+    open_path = Path(os.getenv('LEETCODE_DATA_PATH')) / 'neetcode.json'
+    with open(open_path, 'r') as f:
+        neet = json.load(f)
+    neet = pd.DataFrame(neet) 
+    problems = pd.DataFrame([d.to_dict() for d in get_problems()])
+    neet['slug'] = neet.link.str.strip('/')
+    del(neet['difficulty'])
+    problems = pd.merge(problems, neet, left_on="title_slug", right_on="slug")
+    problems['tags'] = problems.topic_tags.apply(lambda t: " | ".join([d.get('slug') for d in t]))
+    problems['markdown'] = problems.content.apply(lambda c: as_markdown(c))
+    problems.rename(columns={"question_frontend_id": "id"}, inplace=True)
+    problems['solution'] = problems.id.apply(lambda i: get_solution(i))
+    patterns = [
+        "Arrays & Hashing"
+        ,"Two Pointers"
+        ,"Sliding Window"
+        ,"Stack"
+        ,"Binary Search"
+        ,"Linked List"
+        ,"Trees"
+        ,"Tries"
+        ,"Heap / Priority Queue"
+        ,"Backtracking"
+        ,"Graphs"
+        ,"Advanced Graphs"
+        ,"1-D Dynamic Programming"
+        ,"2-D Dynamic Programming"
+        ,"Greedy"
+        ,"Intervals"
+        ,"Math & Geometry"
+        ,"Bit Manipulation"
+    ]
+    #patterns = pd.DataFrame(patterns, columns=['category'])
+    problems = pd.merge(problems, pd.DataFrame(patterns, columns=['pattern']).reset_index(), on='pattern')
+    problems = problems[(problems[series] == True) & (problems.premium != True)]
+    return problems
+
+
+def save_solutions(data, save_to: Path, problem_set):
+    out = template.get('main_title.md').render({"problem_set": problem_set})
+    keys = ['title', 'slug', 'id', 'solution', 'pattern', 'difficulty', 'tags', 'index']
+    number = 0
+    total = len(data)
+    grouped = data[keys].groupby(['index', 'pattern', 'difficulty'])
+    for i, ((pattern_id, pattern, difficulty), group) in enumerate(grouped):
+        out += template.get('pattern_title.md').render({"pattern": pattern, "difficulty":difficulty})
+        page = template.get("solution.md")
+        for j, problem in group.iterrows():
+            number += 1
+            problem = problem.to_dict()
+            problem["total"] = total
+            problem["number"] = number
+            out += page.render(problem)
+    with open(save_to, "w") as f:
+        f.write(out)
+
+def save_problems(problems, save_to: Path, problem_set):
+    keys = ['title', 'slug', 'id', 'markdown', 'pattern', 'difficulty', 'index']
+    out = template.get('main_title.md').render({"problem_set": problem_set})
+    number = 0
+    grouped = problems[keys].groupby(['index', 'pattern', 'difficulty'])
+    for i, ((pattern_id, pattern, difficulty), group) in enumerate(grouped):
+        out += template.get('pattern_title.md').render({"pattern": pattern, "difficulty":difficulty})
+        page = template.get("solution.md")
+        for j, problem in group.iterrows():
+            number += 1
+            problem = problem.to_dict()
+            problem["total"] = total
+            problem["number"] = number
+            out += page.render(problem)
+    with open(save_to, "w") as f:
+        f.write(out)
+
+@click.command("build")
+@click.option('-o', '--out', type=click.Choice(['problems', 'solutions']), multiple=True, default=['problems'])
+@click.option('--problem_set', default="neetcode150", type=click.Choice(['neetcode150', 'blind75', 'all']))
+def build(out, problem_set):
+    data = get_data(problem_set)
+    if 'problems' in out:
+        save_to = Path(os.getenv("LEETCODE_BUILD_PATH")) / f'problems-{problem_set}.md'
+        save_problems(data, save_to, problem_set)
+    if 'solutions' in out:
+        save_to = Path(os.getenv("LEETCODE_BUILD_PATH")) / f'solutions-{problem_set}.md'
+        save_solutions(data, save_to, problem_set)
+        print(f"saved: {save_to.absolute()}")
--- a/src/template.py
+++ b/src/template.py
@ -0,0 +1,11 @@
+import jinja2
+from pathlib import Path
+import os
+
+def get(name):
+    templates_path =Path(os.getenv('LEETCODE_APP_PATH')) / 'templates/'
+
+    templateLoader = jinja2.FileSystemLoader(searchpath=templates_path)
+    templateEnv = jinja2.Environment(loader=templateLoader)
+    template = templateEnv.get_template(name)
+    return template
--- a/src/templates/main_title.md
+++ b/src/templates/main_title.md
@ -0,0 +1,9 @@
+---
+geometry: margin=2cm
+output: pdf_document
+---
+
+# LeetCode - {{problem_set}}
+
+\newpage
+
--- a/src/templates/pattern_title.md
+++ b/src/templates/pattern_title.md
@ -0,0 +1,5 @@
+
+# {{pattern}} - {{difficulty}}
+
+\newpage
+
--- a/src/templates/problem.md
+++ b/src/templates/problem.md
@ -0,0 +1,11 @@
+
+## {{id}} - {{title}} ({{number}}/{{len(total)}})
+
+*[https://leetcode.com/problems/{{slug}}](https://leetcode.com/problems/{{slug}})*
+
+---
+
+{{markdown}}
+
+\newpage
+
--- a/src/templates/solution.md
+++ b/src/templates/solution.md
@ -0,0 +1,13 @@
+
+## {{id}} - {{title}} ({{number}}/{{total}})
+
+*[https://leetcode.com/problems/{{slug}}](https://leetcode.com/problems/{{slug}})*
+
+---
+
+```
+{{solution}}
+```
+
+\newpage
+
--- a/src/templates/solution/title.md
+++ b/src/templates/solution/title.md