Use new builder exceptions in RedditBuilder

This commit is contained in:
Sonny Bakker 2020-10-14 21:39:36 +02:00
parent cffcd954d7
commit e89b4c04a1

View file

@ -28,6 +28,10 @@ from newsreader.news.collection.constants import (
WHITELISTED_TAGS,
)
from newsreader.news.collection.exceptions import (
BuilderDuplicateException,
BuilderException,
BuilderMissingDataException,
BuilderParseException,
StreamDeniedException,
StreamException,
StreamParseException,
@ -125,56 +129,67 @@ class RedditBuilder(PostBuilder):
entries = self.payload["data"]["children"]
for entry in entries:
if not "data" in entry:
continue
elif entry.get("kind") != REDDIT_POST:
continue
elif not "id" in entry["data"]:
continue
remote_identifier = entry["data"]["id"]
if remote_identifier in results or remote_identifier in self.existing_posts:
continue
try:
post = self.build_post(entry["data"])
except KeyError:
logger.exception(f"Failed building post {remote_identifier}")
post = self.build_post(entry)
except BuilderException:
logger.exception("Failed building post")
continue
results[remote_identifier] = post
identifier = post.remote_identifier
results[identifier] = post
self.instances = results.values()
def build_post(self, entry):
rule = self.stream.rule
entry_data = entry.get("data", {})
remote_identifier = entry_data.get("id", "")
kind = entry.get("kind")
remote_identifier = entry["id"]
title = truncate_text(Post, "title", entry["title"])
author = truncate_text(Post, "author", entry["author"])
post_url_fragment = entry["permalink"]
direct_url = entry["url"]
if entry["is_self"]:
body = self.get_text_post(entry)
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
body = self.get_image_post(entry)
elif entry["is_video"]:
body = self.get_native_video_post(entry)
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
body = self.get_video_post(entry)
else:
body = self.get_url_post(entry)
if remote_identifier in self.existing_posts:
raise BuilderDuplicateException(payload=entry)
elif kind != REDDIT_POST:
raise BuilderParseException(
message=f"Payload is not an reddit post, its of kind {kind}",
payload=entry,
)
elif not entry_data:
raise BuilderMissingDataException(
message=f"Post {remote_identifier} did not contain any data",
payload=entry,
)
try:
parsed_date = datetime.fromtimestamp(entry["created_utc"])
title = entry_data["title"]
author = entry_data["author"]
post_url_fragment = entry_data["permalink"]
direct_url = entry_data["url"]
is_text = entry_data["is_self"]
is_video = entry_data["is_video"]
except KeyError as e:
raise BuilderMissingDataException(payload=entry) from e
title = truncate_text(Post, "title", title)
author = truncate_text(Post, "author", author)
if is_text:
body = self.get_text_post(entry_data)
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
body = self.get_image_post(title, direct_url)
elif is_video:
body = self.get_native_video_post(entry_data)
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
body = self.get_video_post(direct_url)
else:
body = self.get_url_post(title, direct_url)
try:
parsed_date = datetime.fromtimestamp(entry_data["created_utc"])
created_date = pytz.utc.localize(parsed_date)
except (OverflowError, OSError):
logging.warning(
f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}"
)
created_date = timezone.now()
except (OverflowError, OSError) as e:
raise BuilderParseException(payload=entry) from e
except KeyError as e:
raise BuilderMissingDataException(payload=entry) from e
post_entry = {
"remote_identifier": remote_identifier,
@ -189,27 +204,33 @@ class RedditBuilder(PostBuilder):
return Post(**post_entry)
def get_text_post(self, entry):
try:
uncleaned_body = entry["selftext_html"]
except KeyError as e:
raise BuilderMissingDataException(payload=entry) from e
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
return self.sanitize_fragment(unescaped_body) if unescaped_body else ""
def get_image_post(self, entry):
def get_image_post(self, title, url):
return format_html(
"<div><img alt='{title}' src='{url}' loading='lazy' /></div>",
url=entry["url"],
title=entry["title"],
url=url,
title=title,
)
def get_native_video_post(self, entry):
try:
video_info = entry["secure_media"]["reddit_video"]
except KeyError as e:
raise BuilderMissingDataException(payload=entry) from e
return format_html(
"<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
url=video_info["fallback_url"],
)
def get_video_post(self, entry):
url = entry["url"]
def get_video_post(self, url):
extension = next(
extension.replace(".", "")
for extension in REDDIT_VIDEO_EXTENSIONS
@ -228,11 +249,11 @@ class RedditBuilder(PostBuilder):
extension=extension,
)
def get_url_post(self, entry):
def get_url_post(self, title, url):
return format_html(
"<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>",
url=entry["url"],
title=entry["title"],
url=url,
title=title,
)