49 lines
951 B
Python
49 lines
951 B
Python
from data.main import connect
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
DB = connect()
|
|
edges = DB.query("""
|
|
select
|
|
*
|
|
from link_edges
|
|
""").df()
|
|
DB.close()
|
|
|
|
edges
|
|
|
|
adj = edges.pivot(index='parent_id', columns='child_id', values='links').fillna(0)
|
|
select_publishers = pd.DataFrame(adj.index.tolist(), columns=['publisher_id'])
|
|
|
|
DB = connect()
|
|
DB.query("create schema top")
|
|
|
|
DB.query("""
|
|
CREATE OR REPLACE TABLE top.publishers AS
|
|
SELECT
|
|
p.*
|
|
FROM publishers p
|
|
JOIN select_publishers s
|
|
ON s.publisher_id = p.id
|
|
""")
|
|
|
|
DB.query("""
|
|
CREATE OR REPLACE TABLE top.stories AS
|
|
SELECT
|
|
s.*
|
|
FROM stories s
|
|
JOIN top.publishers p
|
|
ON s.publisher_id = p.id
|
|
WHERE year(s.published_at) >= 2006
|
|
AND year(s.published_at) < 2023
|
|
""")
|
|
|
|
DB.query("""
|
|
CREATE OR REPLACE TABLE top.related_stories AS
|
|
SELECT
|
|
r.*
|
|
FROM top.stories s
|
|
JOIN related_stories r
|
|
ON s.id = r.parent_id
|
|
""")
|