Initial commit
This commit is contained in:
commit
621fc95755
|
@ -0,0 +1,9 @@
|
|||
Simple script to download leetcode problems into a csv file
|
||||
|
||||
```
|
||||
$ export LEETCODE_SESSION_ID="xxx"
|
||||
$ pip install -r requirements.txt
|
||||
$ python export.py
|
||||
INFO:root:Fetching 2277 problems 300 per page
|
||||
62%|██████████████████████████████████████████████████████████████████████ | 1500/2400 [00:32<00:20, 44.66problem/s]
|
||||
```
|
|
@ -0,0 +1,254 @@
|
|||
import argparse
|
||||
import csv
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import time
|
||||
from typing import Callable, List, Tuple, Type
|
||||
|
||||
# https://github.com/prius/python-leetcode
|
||||
import leetcode.api.default_api # type: ignore
|
||||
import leetcode.api_client # type: ignore
|
||||
import leetcode.auth # type: ignore
|
||||
import leetcode.configuration # type: ignore
|
||||
import leetcode.models.graphql_query # type: ignore
|
||||
import leetcode.models.graphql_query_problemset_question_list_variables # type: ignore
|
||||
import leetcode.models.graphql_query_problemset_question_list_variables_filter_input # type: ignore
|
||||
import leetcode.models.graphql_question_detail # type: ignore
|
||||
import urllib3 # type: ignore
|
||||
from tqdm import tqdm # type: ignore
|
||||
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
|
||||
def _get_leetcode_api_client() -> leetcode.api.default_api.DefaultApi:
|
||||
"""
|
||||
Leetcode API instance constructor.
|
||||
|
||||
This is a singleton, because we don't need to create a separate client
|
||||
each time
|
||||
"""
|
||||
|
||||
configuration = leetcode.configuration.Configuration()
|
||||
|
||||
session_id = os.environ["LEETCODE_SESSION_ID"]
|
||||
csrf_token = leetcode.auth.get_csrf_cookie(session_id)
|
||||
|
||||
configuration.api_key["x-csrftoken"] = csrf_token
|
||||
configuration.api_key["csrftoken"] = csrf_token
|
||||
configuration.api_key["LEETCODE_SESSION"] = session_id
|
||||
configuration.api_key["Referer"] = "https://leetcode.com"
|
||||
configuration.debug = False
|
||||
api_instance = leetcode.api.default_api.DefaultApi(
|
||||
leetcode.api_client.ApiClient(configuration)
|
||||
)
|
||||
|
||||
return api_instance
|
||||
|
||||
|
||||
def retry(times: int, exceptions: Tuple[Type[Exception]], delay: float) -> Callable:
|
||||
"""
|
||||
Retry Decorator
|
||||
Retries the wrapped function/method `times` times if the exceptions listed
|
||||
in `exceptions` are thrown
|
||||
"""
|
||||
|
||||
def decorator(func):
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
for attempt in range(times - 1):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except exceptions:
|
||||
logging.exception(
|
||||
"Exception occured, try %s/%s", attempt + 1, times
|
||||
)
|
||||
time.sleep(delay)
|
||||
|
||||
logging.error("Last try")
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
@retry(times=3, exceptions=(urllib3.exceptions.ProtocolError,), delay=5)
|
||||
def _get_problems_count() -> int:
|
||||
api_instance = _get_leetcode_api_client()
|
||||
|
||||
graphql_request = leetcode.models.graphql_query.GraphqlQuery(
|
||||
query="""
|
||||
query problemsetQuestionList($categorySlug: String, $limit: Int, $skip: Int, $filters: QuestionListFilterInput) {
|
||||
problemsetQuestionList: questionList(
|
||||
categorySlug: $categorySlug
|
||||
limit: $limit
|
||||
skip: $skip
|
||||
filters: $filters
|
||||
) {
|
||||
totalNum
|
||||
}
|
||||
}
|
||||
""",
|
||||
variables=leetcode.models.graphql_query_problemset_question_list_variables.GraphqlQueryProblemsetQuestionListVariables(
|
||||
category_slug="",
|
||||
limit=1,
|
||||
skip=0,
|
||||
filters=leetcode.models.graphql_query_problemset_question_list_variables_filter_input.GraphqlQueryProblemsetQuestionListVariablesFilterInput(
|
||||
tags=[],
|
||||
# difficulty="MEDIUM",
|
||||
# status="NOT_STARTED",
|
||||
# list_id="7p5x763", # Top Amazon Questions
|
||||
# premium_only=False,
|
||||
),
|
||||
),
|
||||
operation_name="problemsetQuestionList",
|
||||
)
|
||||
|
||||
time.sleep(2) # Leetcode has a rate limiter
|
||||
data = api_instance.graphql_post(body=graphql_request).data
|
||||
|
||||
return data.problemset_question_list.total_num or 0
|
||||
|
||||
|
||||
@retry(times=3, exceptions=(urllib3.exceptions.ProtocolError,), delay=5)
|
||||
def _get_problems_data_page(
|
||||
offset: int, page_size: int, page: int
|
||||
) -> List[leetcode.models.graphql_question_detail.GraphqlQuestionDetail]:
|
||||
api_instance = _get_leetcode_api_client()
|
||||
|
||||
graphql_request = leetcode.models.graphql_query.GraphqlQuery(
|
||||
query="""
|
||||
query problemsetQuestionList($categorySlug: String, $limit: Int, $skip: Int, $filters: QuestionListFilterInput) {
|
||||
problemsetQuestionList: questionList(
|
||||
categorySlug: $categorySlug
|
||||
limit: $limit
|
||||
skip: $skip
|
||||
filters: $filters
|
||||
) {
|
||||
questions: data {
|
||||
questionFrontendId
|
||||
title
|
||||
titleSlug
|
||||
categoryTitle
|
||||
frequency
|
||||
isPaidOnly
|
||||
topicTags {
|
||||
name
|
||||
slug
|
||||
}
|
||||
companyTagStats
|
||||
}
|
||||
}
|
||||
}
|
||||
""",
|
||||
variables=leetcode.models.graphql_query_problemset_question_list_variables.GraphqlQueryProblemsetQuestionListVariables(
|
||||
category_slug="",
|
||||
limit=page_size,
|
||||
skip=offset + page * page_size,
|
||||
filters=leetcode.models.graphql_query_problemset_question_list_variables_filter_input.GraphqlQueryProblemsetQuestionListVariablesFilterInput(),
|
||||
),
|
||||
operation_name="problemsetQuestionList",
|
||||
)
|
||||
|
||||
time.sleep(2) # Leetcode has a rate limiter
|
||||
data = api_instance.graphql_post(
|
||||
body=graphql_request
|
||||
).data.problemset_question_list.questions
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""
|
||||
Parse command line arguments for the script
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Fetch leetcode problems and output them to a CSV file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
type=int,
|
||||
help="Fetch this many problems at once (set less if leetcode times out)",
|
||||
default=300,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=str,
|
||||
help="Write output to file",
|
||||
default="problems.csv",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def _get_problems_data(
|
||||
page_size: int,
|
||||
) -> List[leetcode.models.graphql_question_detail.GraphqlQuestionDetail]:
|
||||
problem_count = _get_problems_count()
|
||||
|
||||
start = 0
|
||||
stop = problem_count
|
||||
|
||||
problems: List[leetcode.models.graphql_question_detail.GraphqlQuestionDetail] = []
|
||||
|
||||
logging.info(f"Fetching {stop - start + 1} problems {page_size} per page")
|
||||
|
||||
for page in tqdm(
|
||||
range(math.ceil((stop - start + 1) / page_size)),
|
||||
unit="problem",
|
||||
unit_scale=page_size,
|
||||
):
|
||||
data = _get_problems_data_page(start, page_size, page)
|
||||
problems.extend(data)
|
||||
|
||||
return problems
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
problems_data = _get_problems_data(args.batch_size)
|
||||
|
||||
csv_header = [
|
||||
"Question id",
|
||||
"title",
|
||||
"slug",
|
||||
"category",
|
||||
"frequency",
|
||||
"is_paid",
|
||||
"topics",
|
||||
"companies",
|
||||
]
|
||||
with open(args.output, "w") as csv_file:
|
||||
csv_writer = csv.writer(csv_file, delimiter=";")
|
||||
csv_writer.writerow(csv_header)
|
||||
|
||||
for problem_data in problems_data:
|
||||
csv_writer.writerow(
|
||||
[
|
||||
problem_data.question_frontend_id,
|
||||
problem_data.title,
|
||||
problem_data.title_slug,
|
||||
problem_data.category_title,
|
||||
problem_data.frequency,
|
||||
problem_data.is_paid_only,
|
||||
",".join([d.slug for d in problem_data.topic_tags]),
|
||||
",".join(
|
||||
{
|
||||
d["slug"]
|
||||
for d in itertools.chain(
|
||||
*json.loads(problem_data.company_tag_stats).values()
|
||||
)
|
||||
}
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,2 @@
|
|||
python-leetcode==1.2.1
|
||||
tqdm
|
Loading…
Reference in New Issue