Use new builder exceptions in RedditBuilder

This commit is contained in:
Sonny Bakker 2020-10-14 21:39:36 +02:00
parent cffcd954d7
commit e89b4c04a1

View file

@ -28,6 +28,10 @@ from newsreader.news.collection.constants import (
WHITELISTED_TAGS, WHITELISTED_TAGS,
) )
from newsreader.news.collection.exceptions import ( from newsreader.news.collection.exceptions import (
BuilderDuplicateException,
BuilderException,
BuilderMissingDataException,
BuilderParseException,
StreamDeniedException, StreamDeniedException,
StreamException, StreamException,
StreamParseException, StreamParseException,
@ -125,56 +129,67 @@ class RedditBuilder(PostBuilder):
entries = self.payload["data"]["children"] entries = self.payload["data"]["children"]
for entry in entries: for entry in entries:
if not "data" in entry:
continue
elif entry.get("kind") != REDDIT_POST:
continue
elif not "id" in entry["data"]:
continue
remote_identifier = entry["data"]["id"]
if remote_identifier in results or remote_identifier in self.existing_posts:
continue
try: try:
post = self.build_post(entry["data"]) post = self.build_post(entry)
except KeyError: except BuilderException:
logger.exception(f"Failed building post {remote_identifier}") logger.exception("Failed building post")
continue continue
results[remote_identifier] = post identifier = post.remote_identifier
results[identifier] = post
self.instances = results.values() self.instances = results.values()
def build_post(self, entry): def build_post(self, entry):
rule = self.stream.rule rule = self.stream.rule
entry_data = entry.get("data", {})
remote_identifier = entry_data.get("id", "")
kind = entry.get("kind")
remote_identifier = entry["id"] if remote_identifier in self.existing_posts:
title = truncate_text(Post, "title", entry["title"]) raise BuilderDuplicateException(payload=entry)
author = truncate_text(Post, "author", entry["author"]) elif kind != REDDIT_POST:
post_url_fragment = entry["permalink"] raise BuilderParseException(
direct_url = entry["url"] message=f"Payload is not an reddit post, its of kind {kind}",
payload=entry,
if entry["is_self"]: )
body = self.get_text_post(entry) elif not entry_data:
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS): raise BuilderMissingDataException(
body = self.get_image_post(entry) message=f"Post {remote_identifier} did not contain any data",
elif entry["is_video"]: payload=entry,
body = self.get_native_video_post(entry) )
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
body = self.get_video_post(entry)
else:
body = self.get_url_post(entry)
try: try:
parsed_date = datetime.fromtimestamp(entry["created_utc"]) title = entry_data["title"]
author = entry_data["author"]
post_url_fragment = entry_data["permalink"]
direct_url = entry_data["url"]
is_text = entry_data["is_self"]
is_video = entry_data["is_video"]
except KeyError as e:
raise BuilderMissingDataException(payload=entry) from e
title = truncate_text(Post, "title", title)
author = truncate_text(Post, "author", author)
if is_text:
body = self.get_text_post(entry_data)
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
body = self.get_image_post(title, direct_url)
elif is_video:
body = self.get_native_video_post(entry_data)
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
body = self.get_video_post(direct_url)
else:
body = self.get_url_post(title, direct_url)
try:
parsed_date = datetime.fromtimestamp(entry_data["created_utc"])
created_date = pytz.utc.localize(parsed_date) created_date = pytz.utc.localize(parsed_date)
except (OverflowError, OSError): except (OverflowError, OSError) as e:
logging.warning( raise BuilderParseException(payload=entry) from e
f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}" except KeyError as e:
) raise BuilderMissingDataException(payload=entry) from e
created_date = timezone.now()
post_entry = { post_entry = {
"remote_identifier": remote_identifier, "remote_identifier": remote_identifier,
@ -189,27 +204,33 @@ class RedditBuilder(PostBuilder):
return Post(**post_entry) return Post(**post_entry)
def get_text_post(self, entry): def get_text_post(self, entry):
uncleaned_body = entry["selftext_html"] try:
uncleaned_body = entry["selftext_html"]
except KeyError as e:
raise BuilderMissingDataException(payload=entry) from e
unescaped_body = unescape(uncleaned_body) if uncleaned_body else "" unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
return self.sanitize_fragment(unescaped_body) if unescaped_body else "" return self.sanitize_fragment(unescaped_body) if unescaped_body else ""
def get_image_post(self, entry): def get_image_post(self, title, url):
return format_html( return format_html(
"<div><img alt='{title}' src='{url}' loading='lazy' /></div>", "<div><img alt='{title}' src='{url}' loading='lazy' /></div>",
url=entry["url"], url=url,
title=entry["title"], title=title,
) )
def get_native_video_post(self, entry): def get_native_video_post(self, entry):
video_info = entry["secure_media"]["reddit_video"] try:
video_info = entry["secure_media"]["reddit_video"]
except KeyError as e:
raise BuilderMissingDataException(payload=entry) from e
return format_html( return format_html(
"<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>", "<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
url=video_info["fallback_url"], url=video_info["fallback_url"],
) )
def get_video_post(self, entry): def get_video_post(self, url):
url = entry["url"]
extension = next( extension = next(
extension.replace(".", "") extension.replace(".", "")
for extension in REDDIT_VIDEO_EXTENSIONS for extension in REDDIT_VIDEO_EXTENSIONS
@ -228,11 +249,11 @@ class RedditBuilder(PostBuilder):
extension=extension, extension=extension,
) )
def get_url_post(self, entry): def get_url_post(self, title, url):
return format_html( return format_html(
"<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>", "<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>",
url=entry["url"], url=url,
title=entry["title"], title=title,
) )