From b3adab82fa8311caf2cb20c8eefdd00fb4bf692d Mon Sep 17 00:00:00 2001 From: Sonny Date: Sun, 19 Jul 2020 22:59:39 +0200 Subject: [PATCH] Show direct url when no body was found --- src/newsreader/news/collection/reddit.py | 61 +++++++++++-------- .../collection/tests/reddit/builder/tests.py | 4 ++ 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/src/newsreader/news/collection/reddit.py b/src/newsreader/news/collection/reddit.py index 7af7eb1..c713e17 100644 --- a/src/newsreader/news/collection/reddit.py +++ b/src/newsreader/news/collection/reddit.py @@ -45,6 +45,9 @@ RATE_LIMIT_DURATION = timedelta(seconds=60) REDDIT_IMAGE_EXTENSIONS = (".jpg", ".png", ".gif") REDDIT_VIDEO_EXTENSIONS = (".mp4", ".gifv", ".webm") +# see type prefixes on https://www.reddit.com/dev/api/ +REDDIT_POST = "t3" + def get_reddit_authorization_url(user): state = str(uuid4()) @@ -117,40 +120,42 @@ class RedditBuilder(Builder): results = {} for post in posts: - if not "data" in post: + if not "data" in post or post["kind"] != REDDIT_POST: continue - remote_identifier = post["data"]["id"] - title = truncate_text(Post, "title", post["data"]["title"]) - author = truncate_text(Post, "author", post["data"]["author"]) - post_url_fragment = post["data"]["permalink"] - direct_url = post["data"]["url"] - is_text_post = post["data"]["is_self"] + data = post["data"] + + remote_identifier = data["id"] + title = truncate_text(Post, "title", data["title"]) + author = truncate_text(Post, "author", data["author"]) + post_url_fragment = data["permalink"] + direct_url = data["url"] + is_text_post = data["is_self"] if remote_identifier in results: continue - uncleaned_body = post["data"]["selftext_html"] - unescaped_body = unescape(uncleaned_body) if uncleaned_body else "" - body = ( - bleach.clean( - unescaped_body, - tags=WHITELISTED_TAGS, - attributes=WHITELISTED_ATTRIBUTES, - strip=True, - strip_comments=True, + if is_text_post: + uncleaned_body = data["selftext_html"] + unescaped_body = unescape(uncleaned_body) if uncleaned_body else "" + body = ( + bleach.clean( + unescaped_body, + tags=WHITELISTED_TAGS, + attributes=WHITELISTED_ATTRIBUTES, + strip=True, + strip_comments=True, + ) + if unescaped_body + else "" ) - if unescaped_body - else "" - ) - - if not is_text_post and direct_url.endswith(REDDIT_IMAGE_EXTENSIONS): + elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS): body = f"
{title}
" - elif not is_text_post and post["data"]["is_video"]: - video_info = post["data"]["secure_media"]["reddit_video"] + elif data["is_video"]: + video_info = data["secure_media"]["reddit_video"] body = f"
" - elif not is_text_post and direct_url.endswith(REDDIT_VIDEO_EXTENSIONS): + elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS): extension = next( extension.replace(".", "") for extension in REDDIT_VIDEO_EXTENSIONS @@ -161,6 +166,8 @@ class RedditBuilder(Builder): body = f"
" else: body = f"
" + else: + body = f"
Direct url
" try: parsed_date = datetime.fromtimestamp(post["data"]["created_utc"]) @@ -169,7 +176,7 @@ class RedditBuilder(Builder): logging.warning(f"Failed parsing timestamp from {url_fragment}") created_date = timezone.now() - data = { + post_data = { "remote_identifier": remote_identifier, "title": title, "body": body, @@ -182,13 +189,13 @@ class RedditBuilder(Builder): if remote_identifier in self.existing_posts: existing_post = self.existing_posts[remote_identifier] - for key, value in data.items(): + for key, value in post_data.items(): setattr(existing_post, key, value) results[existing_post.remote_identifier] = existing_post continue - results[remote_identifier] = Post(**data) + results[remote_identifier] = Post(**post_data) return results.values() diff --git a/src/newsreader/news/collection/tests/reddit/builder/tests.py b/src/newsreader/news/collection/tests/reddit/builder/tests.py index da1525a..528edb3 100644 --- a/src/newsreader/news/collection/tests/reddit/builder/tests.py +++ b/src/newsreader/news/collection/tests/reddit/builder/tests.py @@ -240,3 +240,7 @@ class RedditBuilderTestCase(TestCase): @skip("Not implemented") def test_link_only_post(self): pass + + @skip("Not implemented") + def test_skip_not_known_post_type(self): + pass