Use new builder exceptions in RedditBuilder
This commit is contained in:
parent
cffcd954d7
commit
e89b4c04a1
1 changed files with 69 additions and 48 deletions
|
|
@ -28,6 +28,10 @@ from newsreader.news.collection.constants import (
|
|||
WHITELISTED_TAGS,
|
||||
)
|
||||
from newsreader.news.collection.exceptions import (
|
||||
BuilderDuplicateException,
|
||||
BuilderException,
|
||||
BuilderMissingDataException,
|
||||
BuilderParseException,
|
||||
StreamDeniedException,
|
||||
StreamException,
|
||||
StreamParseException,
|
||||
|
|
@ -125,56 +129,67 @@ class RedditBuilder(PostBuilder):
|
|||
entries = self.payload["data"]["children"]
|
||||
|
||||
for entry in entries:
|
||||
if not "data" in entry:
|
||||
continue
|
||||
elif entry.get("kind") != REDDIT_POST:
|
||||
continue
|
||||
elif not "id" in entry["data"]:
|
||||
continue
|
||||
|
||||
remote_identifier = entry["data"]["id"]
|
||||
|
||||
if remote_identifier in results or remote_identifier in self.existing_posts:
|
||||
continue
|
||||
|
||||
try:
|
||||
post = self.build_post(entry["data"])
|
||||
except KeyError:
|
||||
logger.exception(f"Failed building post {remote_identifier}")
|
||||
post = self.build_post(entry)
|
||||
except BuilderException:
|
||||
logger.exception("Failed building post")
|
||||
continue
|
||||
|
||||
results[remote_identifier] = post
|
||||
identifier = post.remote_identifier
|
||||
results[identifier] = post
|
||||
|
||||
self.instances = results.values()
|
||||
|
||||
def build_post(self, entry):
|
||||
rule = self.stream.rule
|
||||
entry_data = entry.get("data", {})
|
||||
remote_identifier = entry_data.get("id", "")
|
||||
kind = entry.get("kind")
|
||||
|
||||
remote_identifier = entry["id"]
|
||||
title = truncate_text(Post, "title", entry["title"])
|
||||
author = truncate_text(Post, "author", entry["author"])
|
||||
post_url_fragment = entry["permalink"]
|
||||
direct_url = entry["url"]
|
||||
|
||||
if entry["is_self"]:
|
||||
body = self.get_text_post(entry)
|
||||
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
|
||||
body = self.get_image_post(entry)
|
||||
elif entry["is_video"]:
|
||||
body = self.get_native_video_post(entry)
|
||||
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
|
||||
body = self.get_video_post(entry)
|
||||
else:
|
||||
body = self.get_url_post(entry)
|
||||
if remote_identifier in self.existing_posts:
|
||||
raise BuilderDuplicateException(payload=entry)
|
||||
elif kind != REDDIT_POST:
|
||||
raise BuilderParseException(
|
||||
message=f"Payload is not an reddit post, its of kind {kind}",
|
||||
payload=entry,
|
||||
)
|
||||
elif not entry_data:
|
||||
raise BuilderMissingDataException(
|
||||
message=f"Post {remote_identifier} did not contain any data",
|
||||
payload=entry,
|
||||
)
|
||||
|
||||
try:
|
||||
parsed_date = datetime.fromtimestamp(entry["created_utc"])
|
||||
title = entry_data["title"]
|
||||
author = entry_data["author"]
|
||||
post_url_fragment = entry_data["permalink"]
|
||||
direct_url = entry_data["url"]
|
||||
is_text = entry_data["is_self"]
|
||||
is_video = entry_data["is_video"]
|
||||
except KeyError as e:
|
||||
raise BuilderMissingDataException(payload=entry) from e
|
||||
|
||||
title = truncate_text(Post, "title", title)
|
||||
author = truncate_text(Post, "author", author)
|
||||
|
||||
if is_text:
|
||||
body = self.get_text_post(entry_data)
|
||||
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
|
||||
body = self.get_image_post(title, direct_url)
|
||||
elif is_video:
|
||||
body = self.get_native_video_post(entry_data)
|
||||
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
|
||||
body = self.get_video_post(direct_url)
|
||||
else:
|
||||
body = self.get_url_post(title, direct_url)
|
||||
|
||||
try:
|
||||
parsed_date = datetime.fromtimestamp(entry_data["created_utc"])
|
||||
created_date = pytz.utc.localize(parsed_date)
|
||||
except (OverflowError, OSError):
|
||||
logging.warning(
|
||||
f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}"
|
||||
)
|
||||
created_date = timezone.now()
|
||||
except (OverflowError, OSError) as e:
|
||||
raise BuilderParseException(payload=entry) from e
|
||||
except KeyError as e:
|
||||
raise BuilderMissingDataException(payload=entry) from e
|
||||
|
||||
post_entry = {
|
||||
"remote_identifier": remote_identifier,
|
||||
|
|
@ -189,27 +204,33 @@ class RedditBuilder(PostBuilder):
|
|||
return Post(**post_entry)
|
||||
|
||||
def get_text_post(self, entry):
|
||||
try:
|
||||
uncleaned_body = entry["selftext_html"]
|
||||
except KeyError as e:
|
||||
raise BuilderMissingDataException(payload=entry) from e
|
||||
|
||||
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
|
||||
return self.sanitize_fragment(unescaped_body) if unescaped_body else ""
|
||||
|
||||
def get_image_post(self, entry):
|
||||
def get_image_post(self, title, url):
|
||||
return format_html(
|
||||
"<div><img alt='{title}' src='{url}' loading='lazy' /></div>",
|
||||
url=entry["url"],
|
||||
title=entry["title"],
|
||||
url=url,
|
||||
title=title,
|
||||
)
|
||||
|
||||
def get_native_video_post(self, entry):
|
||||
try:
|
||||
video_info = entry["secure_media"]["reddit_video"]
|
||||
except KeyError as e:
|
||||
raise BuilderMissingDataException(payload=entry) from e
|
||||
|
||||
return format_html(
|
||||
"<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
|
||||
url=video_info["fallback_url"],
|
||||
)
|
||||
|
||||
def get_video_post(self, entry):
|
||||
url = entry["url"]
|
||||
def get_video_post(self, url):
|
||||
extension = next(
|
||||
extension.replace(".", "")
|
||||
for extension in REDDIT_VIDEO_EXTENSIONS
|
||||
|
|
@ -228,11 +249,11 @@ class RedditBuilder(PostBuilder):
|
|||
extension=extension,
|
||||
)
|
||||
|
||||
def get_url_post(self, entry):
|
||||
def get_url_post(self, title, url):
|
||||
return format_html(
|
||||
"<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>",
|
||||
url=entry["url"],
|
||||
title=entry["title"],
|
||||
url=url,
|
||||
title=title,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue