from data.main import connect import pandas as pd import numpy as np DB = connect() edges = DB.query(""" select * from link_edges """).df() DB.close() edges adj = edges.pivot(index='parent_id', columns='child_id', values='links').fillna(0) select_publishers = pd.DataFrame(adj.index.tolist(), columns=['publisher_id']) DB = connect() DB.query("create schema top") DB.query(""" CREATE OR REPLACE TABLE top.publishers AS SELECT p.* FROM publishers p JOIN select_publishers s ON s.publisher_id = p.id """) DB.query(""" CREATE OR REPLACE TABLE top.stories AS SELECT s.* FROM stories s JOIN top.publishers p ON s.publisher_id = p.id WHERE year(s.published_at) >= 2006 AND year(s.published_at) < 2023 """) DB.query(""" CREATE OR REPLACE TABLE top.related_stories AS SELECT r.* FROM top.stories s JOIN related_stories r ON s.id = r.parent_id """)