import requests import seaborn as sns import matplotlib.pyplot as plt import click from data import connect @click.command(name="broken:crawl") def crawl(): """crawl story urls checking for link rot or redirects.""" DB = connect() urls = DB.query(""" select id ,url from stories order by published_at asc limit 5 """).fetchall() DB.close() story_id, url = urls[1] # url responses = [] for story_id, url in urls: out = {'story_id' : story_id, 'final_url' : url, 'timeout' : 0, 'status_code' : 200, 'content_length' : 0} try: response = requests.get(url, verify=False, timeout=10) if len(response.history) > 1: out['redirect'] = 1 if url != response.url: out['final_url'] = response.url out['status_code'] = response.status_code out['content_length'] = len(response.content) except requests.exceptions.ReadTimeout as e: print(f"timeout: {url}") out['timeout'] = 1 responses.append(out) sns.histplot(x=hist['cnt']) plt.show()