Use new builder exceptions in TwitterBuilder

This commit is contained in:
Sonny Bakker 2020-10-14 22:18:58 +02:00
parent e89b4c04a1
commit 379f8516e3

View file

@ -22,6 +22,10 @@ from newsreader.news.collection.base import (
) )
from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
from newsreader.news.collection.exceptions import ( from newsreader.news.collection.exceptions import (
BuilderDuplicateException,
BuilderException,
BuilderMissingDataException,
BuilderParseException,
StreamDeniedException, StreamDeniedException,
StreamException, StreamException,
StreamNotFoundException, StreamNotFoundException,
@ -50,36 +54,50 @@ class TwitterBuilder(PostBuilder):
results = {} results = {}
for post in self.payload: for post in self.payload:
remote_identifier = post.get("id_str")
if not remote_identifier or remote_identifier in self.existing_posts:
continue
try: try:
results[remote_identifier] = self.build_post(post) post = self.build_post(post)
except KeyError: except BuilderException:
logger.exception(f"Failed building post {remote_identifier}") logger.exception("Failed building post")
continue continue
identifier = post.remote_identifier
results[identifier] = post
self.instances = results.values() self.instances = results.values()
def build_post(self, data): def build_post(self, data):
remote_identifier = data["id_str"] remote_identifier = data.get("id_str", "")
rule = self.stream.rule
if remote_identifier in self.existing_posts:
raise BuilderDuplicateException(payload=data)
try:
body = urlize(data["full_text"], nofollow=True) body = urlize(data["full_text"], nofollow=True)
title = truncate_text(Post, "title", self.sanitize_fragment(data["full_text"])) title = truncate_text(
url = f"{TWITTER_URL}/{self.stream.rule.screen_name}/status/{remote_identifier}" Post, "title", self.sanitize_fragment(data["full_text"])
)
publication_date = pytz.utc.localize( publication_date = pytz.utc.localize(
datetime.strptime(data["created_at"], "%a %b %d %H:%M:%S +0000 %Y") datetime.strptime(data["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
) )
except KeyError as e:
raise BuilderMissingDataException(payload=data) from e
except (OverflowError, OSError) as e:
raise BuilderParseException(payload=data) from e
url = f"{TWITTER_URL}/{rule.screen_name}/status/{remote_identifier}"
if "extended_entities" in data: if "extended_entities" in data:
try: try:
media_entities = self.get_media_entities(data) media_entities = self.get_media_entities(data)
body += media_entities body += media_entities
except KeyError: except KeyError as e:
logger.exception(f"Failed parsing media_entities for {url}") raise BuilderMissingDataException(
message="Failed parsing data for media entities", payload=data
) from e
try:
if "retweeted_status" in data: if "retweeted_status" in data:
original_post = data["retweeted_status"] original_post = data["retweeted_status"]
original_tweet = urlize(original_post["full_text"], nofollow=True) original_tweet = urlize(original_post["full_text"], nofollow=True)
@ -88,18 +106,22 @@ class TwitterBuilder(PostBuilder):
original_post = data["quoted_status"] original_post = data["quoted_status"]
original_tweet = urlize(original_post["full_text"], nofollow=True) original_tweet = urlize(original_post["full_text"], nofollow=True)
body = f"{body} <br><div>Quoted tweet: {original_tweet}</div>" body = f"{body} <br><div>Quoted tweet: {original_tweet}</div>"
except KeyError as e:
raise BuilderMissingDataException(
message="Failed parsing data for original tweet", payload=data
) from e
body = self.sanitize_fragment(body) body = self.sanitize_fragment(body)
return Post( return Post(
**{ **{
"remote_identifier": data["id_str"], "remote_identifier": remote_identifier,
"title": fix_text(title), "title": fix_text(title),
"body": fix_text(body), "body": fix_text(body),
"author": self.stream.rule.screen_name, "author": rule.screen_name,
"publication_date": publication_date, "publication_date": publication_date,
"url": url, "url": url,
"rule": self.stream.rule, "rule": rule,
} }
) )