add exp 4/5
This commit is contained in:
@@ -335,3 +335,92 @@ def another_norm():
|
||||
on sv2.id = s.id
|
||||
limit 5
|
||||
""")
|
||||
|
||||
@click.command('data:create-election-table')
|
||||
def create_elections_table():
|
||||
df = pd.read_csv(data_dir() / 'election_dates.csv', sep="|")
|
||||
df['date'] = pd.to_datetime(df.date)
|
||||
|
||||
DB = connect()
|
||||
DB.query("""
|
||||
CREATE OR REPLACE TABLE election_dates AS
|
||||
SELECT
|
||||
row_number() over() as id
|
||||
,type
|
||||
,date
|
||||
FROM df
|
||||
""")
|
||||
|
||||
DB.query("""
|
||||
CREATE OR REPLACE TABLE election_distance AS
|
||||
WITH cte as (
|
||||
SELECT
|
||||
day(e.date - s.published_at) as days_away
|
||||
,e.id as election_id
|
||||
,e.date as election_date
|
||||
,s.published_at as publish_date
|
||||
FROM (
|
||||
SELECT
|
||||
DISTINCT
|
||||
published_at
|
||||
FROM top.stories
|
||||
) s
|
||||
CROSS JOIN election_dates e
|
||||
) , windowed as (
|
||||
SELECT
|
||||
row_number() over(partition by publish_date order by abs(days_away) asc) as rn
|
||||
,days_away
|
||||
,publish_date
|
||||
,election_date
|
||||
,election_id
|
||||
FROM cte
|
||||
)
|
||||
SELECT
|
||||
days_away
|
||||
,publish_date
|
||||
,election_date
|
||||
,election_id
|
||||
FROM windowed
|
||||
WHERE rn = 1
|
||||
""")
|
||||
|
||||
DB.close()
|
||||
|
||||
@click.command('scrape:create-denorm')
|
||||
def create_denorm():
|
||||
|
||||
DB = connect()
|
||||
DB.sql("create schema denorm")
|
||||
DB.sql("""
|
||||
CREATE OR REPLACE TABLE denorm.stories AS
|
||||
SELECT
|
||||
s.id as story_id
|
||||
,s.title
|
||||
,s.url
|
||||
,s.published_at
|
||||
,s.author
|
||||
,p.name as publisher
|
||||
,p.tld as tld
|
||||
,sent.class_id as sentiment
|
||||
,d.days_away as election_distance
|
||||
,b.ordinal as bias
|
||||
,pca.first as link_1
|
||||
,pca.second as link_2
|
||||
,e.emotion_id as emotion
|
||||
FROM top.stories s
|
||||
JOIN top.publishers p
|
||||
ON p.id = s.publisher_id
|
||||
JOIN top.story_sentiments sent
|
||||
ON s.id = sent.story_id
|
||||
JOIN election_distance d
|
||||
ON d.election_date = s.published_at
|
||||
JOIN publisher_bias pb
|
||||
ON pb.publisher_id = p.id
|
||||
JOIN bias_ratings b
|
||||
ON b.id = pb.bias_id
|
||||
JOIN top.publisher_pca_onehot pca
|
||||
ON pca.publisher_id = p.id
|
||||
JOIN story_emotions e
|
||||
ON e.story_id = s.id
|
||||
""")
|
||||
DB.close()
|
||||
|
||||
Reference in New Issue
Block a user