Use new builder exceptions in RedditBuilder
This commit is contained in:
parent
cffcd954d7
commit
e89b4c04a1
1 changed files with 69 additions and 48 deletions
|
|
@ -28,6 +28,10 @@ from newsreader.news.collection.constants import (
|
||||||
WHITELISTED_TAGS,
|
WHITELISTED_TAGS,
|
||||||
)
|
)
|
||||||
from newsreader.news.collection.exceptions import (
|
from newsreader.news.collection.exceptions import (
|
||||||
|
BuilderDuplicateException,
|
||||||
|
BuilderException,
|
||||||
|
BuilderMissingDataException,
|
||||||
|
BuilderParseException,
|
||||||
StreamDeniedException,
|
StreamDeniedException,
|
||||||
StreamException,
|
StreamException,
|
||||||
StreamParseException,
|
StreamParseException,
|
||||||
|
|
@ -125,56 +129,67 @@ class RedditBuilder(PostBuilder):
|
||||||
entries = self.payload["data"]["children"]
|
entries = self.payload["data"]["children"]
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
if not "data" in entry:
|
|
||||||
continue
|
|
||||||
elif entry.get("kind") != REDDIT_POST:
|
|
||||||
continue
|
|
||||||
elif not "id" in entry["data"]:
|
|
||||||
continue
|
|
||||||
|
|
||||||
remote_identifier = entry["data"]["id"]
|
|
||||||
|
|
||||||
if remote_identifier in results or remote_identifier in self.existing_posts:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
post = self.build_post(entry["data"])
|
post = self.build_post(entry)
|
||||||
except KeyError:
|
except BuilderException:
|
||||||
logger.exception(f"Failed building post {remote_identifier}")
|
logger.exception("Failed building post")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results[remote_identifier] = post
|
identifier = post.remote_identifier
|
||||||
|
results[identifier] = post
|
||||||
|
|
||||||
self.instances = results.values()
|
self.instances = results.values()
|
||||||
|
|
||||||
def build_post(self, entry):
|
def build_post(self, entry):
|
||||||
rule = self.stream.rule
|
rule = self.stream.rule
|
||||||
|
entry_data = entry.get("data", {})
|
||||||
|
remote_identifier = entry_data.get("id", "")
|
||||||
|
kind = entry.get("kind")
|
||||||
|
|
||||||
remote_identifier = entry["id"]
|
if remote_identifier in self.existing_posts:
|
||||||
title = truncate_text(Post, "title", entry["title"])
|
raise BuilderDuplicateException(payload=entry)
|
||||||
author = truncate_text(Post, "author", entry["author"])
|
elif kind != REDDIT_POST:
|
||||||
post_url_fragment = entry["permalink"]
|
raise BuilderParseException(
|
||||||
direct_url = entry["url"]
|
message=f"Payload is not an reddit post, its of kind {kind}",
|
||||||
|
payload=entry,
|
||||||
if entry["is_self"]:
|
)
|
||||||
body = self.get_text_post(entry)
|
elif not entry_data:
|
||||||
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
|
raise BuilderMissingDataException(
|
||||||
body = self.get_image_post(entry)
|
message=f"Post {remote_identifier} did not contain any data",
|
||||||
elif entry["is_video"]:
|
payload=entry,
|
||||||
body = self.get_native_video_post(entry)
|
)
|
||||||
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
|
|
||||||
body = self.get_video_post(entry)
|
|
||||||
else:
|
|
||||||
body = self.get_url_post(entry)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed_date = datetime.fromtimestamp(entry["created_utc"])
|
title = entry_data["title"]
|
||||||
|
author = entry_data["author"]
|
||||||
|
post_url_fragment = entry_data["permalink"]
|
||||||
|
direct_url = entry_data["url"]
|
||||||
|
is_text = entry_data["is_self"]
|
||||||
|
is_video = entry_data["is_video"]
|
||||||
|
except KeyError as e:
|
||||||
|
raise BuilderMissingDataException(payload=entry) from e
|
||||||
|
|
||||||
|
title = truncate_text(Post, "title", title)
|
||||||
|
author = truncate_text(Post, "author", author)
|
||||||
|
|
||||||
|
if is_text:
|
||||||
|
body = self.get_text_post(entry_data)
|
||||||
|
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
|
||||||
|
body = self.get_image_post(title, direct_url)
|
||||||
|
elif is_video:
|
||||||
|
body = self.get_native_video_post(entry_data)
|
||||||
|
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
|
||||||
|
body = self.get_video_post(direct_url)
|
||||||
|
else:
|
||||||
|
body = self.get_url_post(title, direct_url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
parsed_date = datetime.fromtimestamp(entry_data["created_utc"])
|
||||||
created_date = pytz.utc.localize(parsed_date)
|
created_date = pytz.utc.localize(parsed_date)
|
||||||
except (OverflowError, OSError):
|
except (OverflowError, OSError) as e:
|
||||||
logging.warning(
|
raise BuilderParseException(payload=entry) from e
|
||||||
f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}"
|
except KeyError as e:
|
||||||
)
|
raise BuilderMissingDataException(payload=entry) from e
|
||||||
created_date = timezone.now()
|
|
||||||
|
|
||||||
post_entry = {
|
post_entry = {
|
||||||
"remote_identifier": remote_identifier,
|
"remote_identifier": remote_identifier,
|
||||||
|
|
@ -189,27 +204,33 @@ class RedditBuilder(PostBuilder):
|
||||||
return Post(**post_entry)
|
return Post(**post_entry)
|
||||||
|
|
||||||
def get_text_post(self, entry):
|
def get_text_post(self, entry):
|
||||||
uncleaned_body = entry["selftext_html"]
|
try:
|
||||||
|
uncleaned_body = entry["selftext_html"]
|
||||||
|
except KeyError as e:
|
||||||
|
raise BuilderMissingDataException(payload=entry) from e
|
||||||
|
|
||||||
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
|
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
|
||||||
return self.sanitize_fragment(unescaped_body) if unescaped_body else ""
|
return self.sanitize_fragment(unescaped_body) if unescaped_body else ""
|
||||||
|
|
||||||
def get_image_post(self, entry):
|
def get_image_post(self, title, url):
|
||||||
return format_html(
|
return format_html(
|
||||||
"<div><img alt='{title}' src='{url}' loading='lazy' /></div>",
|
"<div><img alt='{title}' src='{url}' loading='lazy' /></div>",
|
||||||
url=entry["url"],
|
url=url,
|
||||||
title=entry["title"],
|
title=title,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_native_video_post(self, entry):
|
def get_native_video_post(self, entry):
|
||||||
video_info = entry["secure_media"]["reddit_video"]
|
try:
|
||||||
|
video_info = entry["secure_media"]["reddit_video"]
|
||||||
|
except KeyError as e:
|
||||||
|
raise BuilderMissingDataException(payload=entry) from e
|
||||||
|
|
||||||
return format_html(
|
return format_html(
|
||||||
"<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
|
"<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
|
||||||
url=video_info["fallback_url"],
|
url=video_info["fallback_url"],
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_video_post(self, entry):
|
def get_video_post(self, url):
|
||||||
url = entry["url"]
|
|
||||||
extension = next(
|
extension = next(
|
||||||
extension.replace(".", "")
|
extension.replace(".", "")
|
||||||
for extension in REDDIT_VIDEO_EXTENSIONS
|
for extension in REDDIT_VIDEO_EXTENSIONS
|
||||||
|
|
@ -228,11 +249,11 @@ class RedditBuilder(PostBuilder):
|
||||||
extension=extension,
|
extension=extension,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_url_post(self, entry):
|
def get_url_post(self, title, url):
|
||||||
return format_html(
|
return format_html(
|
||||||
"<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>",
|
"<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>",
|
||||||
url=entry["url"],
|
url=url,
|
||||||
title=entry["title"],
|
title=title,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue