Show direct url when no body was found

This commit is contained in:
Sonny 2020-07-19 22:59:39 +02:00
parent 0579d6f61b
commit b3adab82fa
2 changed files with 38 additions and 27 deletions

View file

@ -45,6 +45,9 @@ RATE_LIMIT_DURATION = timedelta(seconds=60)
REDDIT_IMAGE_EXTENSIONS = (".jpg", ".png", ".gif")
REDDIT_VIDEO_EXTENSIONS = (".mp4", ".gifv", ".webm")
# see type prefixes on https://www.reddit.com/dev/api/
REDDIT_POST = "t3"
def get_reddit_authorization_url(user):
state = str(uuid4())
@ -117,40 +120,42 @@ class RedditBuilder(Builder):
results = {}
for post in posts:
if not "data" in post:
if not "data" in post or post["kind"] != REDDIT_POST:
continue
remote_identifier = post["data"]["id"]
title = truncate_text(Post, "title", post["data"]["title"])
author = truncate_text(Post, "author", post["data"]["author"])
post_url_fragment = post["data"]["permalink"]
direct_url = post["data"]["url"]
is_text_post = post["data"]["is_self"]
data = post["data"]
remote_identifier = data["id"]
title = truncate_text(Post, "title", data["title"])
author = truncate_text(Post, "author", data["author"])
post_url_fragment = data["permalink"]
direct_url = data["url"]
is_text_post = data["is_self"]
if remote_identifier in results:
continue
uncleaned_body = post["data"]["selftext_html"]
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
body = (
bleach.clean(
unescaped_body,
tags=WHITELISTED_TAGS,
attributes=WHITELISTED_ATTRIBUTES,
strip=True,
strip_comments=True,
if is_text_post:
uncleaned_body = data["selftext_html"]
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
body = (
bleach.clean(
unescaped_body,
tags=WHITELISTED_TAGS,
attributes=WHITELISTED_ATTRIBUTES,
strip=True,
strip_comments=True,
)
if unescaped_body
else ""
)
if unescaped_body
else ""
)
if not is_text_post and direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
body = f"<div><img alt='{title}' src='{direct_url}' loading='lazy' /></div>"
elif not is_text_post and post["data"]["is_video"]:
video_info = post["data"]["secure_media"]["reddit_video"]
elif data["is_video"]:
video_info = data["secure_media"]["reddit_video"]
body = f"<div><video controls muted><source src='{video_info['fallback_url']}' type='video/mp4' /></video></div>"
elif not is_text_post and direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
extension = next(
extension.replace(".", "")
for extension in REDDIT_VIDEO_EXTENSIONS
@ -161,6 +166,8 @@ class RedditBuilder(Builder):
body = f"<div><video controls muted><source src='{direct_url.replace(extension, 'mp4')}' type='video/mp4' /></video></div>"
else:
body = f"<div><video controls muted><source src='{direct_url}' type='video/{extension}' /></video></div>"
else:
body = f"<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{direct_url}' class='link'>Direct url</a></div>"
try:
parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
@ -169,7 +176,7 @@ class RedditBuilder(Builder):
logging.warning(f"Failed parsing timestamp from {url_fragment}")
created_date = timezone.now()
data = {
post_data = {
"remote_identifier": remote_identifier,
"title": title,
"body": body,
@ -182,13 +189,13 @@ class RedditBuilder(Builder):
if remote_identifier in self.existing_posts:
existing_post = self.existing_posts[remote_identifier]
for key, value in data.items():
for key, value in post_data.items():
setattr(existing_post, key, value)
results[existing_post.remote_identifier] = existing_post
continue
results[remote_identifier] = Post(**data)
results[remote_identifier] = Post(**post_data)
return results.values()

View file

@ -240,3 +240,7 @@ class RedditBuilderTestCase(TestCase):
@skip("Not implemented")
def test_link_only_post(self):
pass
@skip("Not implemented")
def test_skip_not_known_post_type(self):
pass