Show direct url when no body was found
This commit is contained in:
parent
0579d6f61b
commit
b3adab82fa
2 changed files with 38 additions and 27 deletions
|
|
@ -45,6 +45,9 @@ RATE_LIMIT_DURATION = timedelta(seconds=60)
|
|||
REDDIT_IMAGE_EXTENSIONS = (".jpg", ".png", ".gif")
|
||||
REDDIT_VIDEO_EXTENSIONS = (".mp4", ".gifv", ".webm")
|
||||
|
||||
# see type prefixes on https://www.reddit.com/dev/api/
|
||||
REDDIT_POST = "t3"
|
||||
|
||||
|
||||
def get_reddit_authorization_url(user):
|
||||
state = str(uuid4())
|
||||
|
|
@ -117,40 +120,42 @@ class RedditBuilder(Builder):
|
|||
results = {}
|
||||
|
||||
for post in posts:
|
||||
if not "data" in post:
|
||||
if not "data" in post or post["kind"] != REDDIT_POST:
|
||||
continue
|
||||
|
||||
remote_identifier = post["data"]["id"]
|
||||
title = truncate_text(Post, "title", post["data"]["title"])
|
||||
author = truncate_text(Post, "author", post["data"]["author"])
|
||||
post_url_fragment = post["data"]["permalink"]
|
||||
direct_url = post["data"]["url"]
|
||||
is_text_post = post["data"]["is_self"]
|
||||
data = post["data"]
|
||||
|
||||
remote_identifier = data["id"]
|
||||
title = truncate_text(Post, "title", data["title"])
|
||||
author = truncate_text(Post, "author", data["author"])
|
||||
post_url_fragment = data["permalink"]
|
||||
direct_url = data["url"]
|
||||
is_text_post = data["is_self"]
|
||||
|
||||
if remote_identifier in results:
|
||||
continue
|
||||
|
||||
uncleaned_body = post["data"]["selftext_html"]
|
||||
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
|
||||
body = (
|
||||
bleach.clean(
|
||||
unescaped_body,
|
||||
tags=WHITELISTED_TAGS,
|
||||
attributes=WHITELISTED_ATTRIBUTES,
|
||||
strip=True,
|
||||
strip_comments=True,
|
||||
if is_text_post:
|
||||
uncleaned_body = data["selftext_html"]
|
||||
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
|
||||
body = (
|
||||
bleach.clean(
|
||||
unescaped_body,
|
||||
tags=WHITELISTED_TAGS,
|
||||
attributes=WHITELISTED_ATTRIBUTES,
|
||||
strip=True,
|
||||
strip_comments=True,
|
||||
)
|
||||
if unescaped_body
|
||||
else ""
|
||||
)
|
||||
if unescaped_body
|
||||
else ""
|
||||
)
|
||||
|
||||
if not is_text_post and direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
|
||||
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
|
||||
body = f"<div><img alt='{title}' src='{direct_url}' loading='lazy' /></div>"
|
||||
elif not is_text_post and post["data"]["is_video"]:
|
||||
video_info = post["data"]["secure_media"]["reddit_video"]
|
||||
elif data["is_video"]:
|
||||
video_info = data["secure_media"]["reddit_video"]
|
||||
|
||||
body = f"<div><video controls muted><source src='{video_info['fallback_url']}' type='video/mp4' /></video></div>"
|
||||
elif not is_text_post and direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
|
||||
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
|
||||
extension = next(
|
||||
extension.replace(".", "")
|
||||
for extension in REDDIT_VIDEO_EXTENSIONS
|
||||
|
|
@ -161,6 +166,8 @@ class RedditBuilder(Builder):
|
|||
body = f"<div><video controls muted><source src='{direct_url.replace(extension, 'mp4')}' type='video/mp4' /></video></div>"
|
||||
else:
|
||||
body = f"<div><video controls muted><source src='{direct_url}' type='video/{extension}' /></video></div>"
|
||||
else:
|
||||
body = f"<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{direct_url}' class='link'>Direct url</a></div>"
|
||||
|
||||
try:
|
||||
parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
|
||||
|
|
@ -169,7 +176,7 @@ class RedditBuilder(Builder):
|
|||
logging.warning(f"Failed parsing timestamp from {url_fragment}")
|
||||
created_date = timezone.now()
|
||||
|
||||
data = {
|
||||
post_data = {
|
||||
"remote_identifier": remote_identifier,
|
||||
"title": title,
|
||||
"body": body,
|
||||
|
|
@ -182,13 +189,13 @@ class RedditBuilder(Builder):
|
|||
if remote_identifier in self.existing_posts:
|
||||
existing_post = self.existing_posts[remote_identifier]
|
||||
|
||||
for key, value in data.items():
|
||||
for key, value in post_data.items():
|
||||
setattr(existing_post, key, value)
|
||||
|
||||
results[existing_post.remote_identifier] = existing_post
|
||||
continue
|
||||
|
||||
results[remote_identifier] = Post(**data)
|
||||
results[remote_identifier] = Post(**post_data)
|
||||
|
||||
return results.values()
|
||||
|
||||
|
|
|
|||
|
|
@ -240,3 +240,7 @@ class RedditBuilderTestCase(TestCase):
|
|||
@skip("Not implemented")
|
||||
def test_link_only_post(self):
|
||||
pass
|
||||
|
||||
@skip("Not implemented")
|
||||
def test_skip_not_known_post_type(self):
|
||||
pass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue