From a90e55865505fb18d65cddda7059be14a1889383 Mon Sep 17 00:00:00 2001 From: Sonny Bakker Date: Sat, 12 Sep 2020 16:49:01 +0200 Subject: [PATCH] Move media_entities parsing to seperate function --- .../collection/tests/twitter/builder/tests.py | 6 +- src/newsreader/news/collection/twitter.py | 114 ++++++++++-------- 2 files changed, 71 insertions(+), 49 deletions(-) diff --git a/src/newsreader/news/collection/tests/twitter/builder/tests.py b/src/newsreader/news/collection/tests/twitter/builder/tests.py index 2d7150e..b3561d8 100644 --- a/src/newsreader/news/collection/tests/twitter/builder/tests.py +++ b/src/newsreader/news/collection/tests/twitter/builder/tests.py @@ -107,12 +107,14 @@ class TwitterBuilderTestCase(TestCase): self.assertIn(full_text, post.body) self.assertInHTML( - f"
1269039233072689152
", + """
1269039233072689152
""", post.body, + count=1, ) self.assertInHTML( - f"
1269039233068527618
", + """
1269039233068527618
""", post.body, + count=1, ) def test_videos_in_post(self): diff --git a/src/newsreader/news/collection/twitter.py b/src/newsreader/news/collection/twitter.py index ae29047..d8d273b 100644 --- a/src/newsreader/news/collection/twitter.py +++ b/src/newsreader/news/collection/twitter.py @@ -1,3 +1,5 @@ +import logging + from datetime import datetime from django.template.defaultfilters import truncatechars @@ -12,6 +14,8 @@ from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeC from newsreader.news.core.models import Post +logger = logging.getLogger(__name__) + TWITTER_URL = "https://twitter.com" TWITTER_API_URL = "https://api.twitter.com/1.1" @@ -36,62 +40,31 @@ class TwitterBuilder(Builder): for post in posts: remote_identifier = post["id_str"] + url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}" publication_date = pytz.utc.localize( datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y") ) - body = "" + body = post["full_text"] if "extended_entities" in post: - media_entities = post["extended_entities"]["media"] - - for media_entity in media_entities: - media_type = media_entity["type"] - media_url = media_entity["media_url_https"] - title = media_entity["id_str"] - - if media_type == TwitterPostTypeChoices.photo: - html_fragment = format_html( - "
{title}
", - title=title, - media_url=media_url, - ) - - body += html_fragment - - elif media_type in ( - TwitterPostTypeChoices.video, - TwitterPostTypeChoices.animated_gif, - ): - meta_data = media_entity["video_info"] - - videos = sorted( - [video for video in meta_data["variants"]], - reverse=True, - key=lambda video: video.get("bitrate", 0), - ) - - if not videos: - continue - - video = videos[0] - content_type = video["content_type"] - url = video["url"] - - html_fragment = format_html( - """
""", - url=url, - content_type=content_type, - ) - body += html_fragment + try: + media_entities = self.get_media_entities(post) + body += media_entities + except KeyError: + logger.exception(f"Failed parsing media_entities for {url}") if "retweeted_status" in post: original_post = post["retweeted_status"] - body += format_html(f"Original tweet: {original_post['full_text']}") + body += format_html( + "Original tweet: {original_post}", + original_post=original_post["full_text"], + ) if "quoted_status" in post: original_post = post["quoted_status"] - body += format_html(f"Quoted tweet: {original_post['full_text']}") - - body += format_html(post["full_text"]) + body += format_html( + "Quoted tweet: {original_post}", + original_post=original_post["full_text"], + ) data = { "remote_identifier": remote_identifier, @@ -99,7 +72,7 @@ class TwitterBuilder(Builder): "body": fix_text(body), "author": rule.screen_name, "publication_date": publication_date, - "url": f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}", + "url": url, "rule": rule, } @@ -107,6 +80,53 @@ class TwitterBuilder(Builder): return results.values() + def get_media_entities(self, post): + media_entities = post["extended_entities"]["media"] + formatted_entities = "" + + for media_entity in media_entities: + media_type = media_entity["type"] + media_url = media_entity["media_url_https"] + title = media_entity["id_str"] + + if media_type == TwitterPostTypeChoices.photo: + html_fragment = format_html( + """
{title}
""", + title=title, + media_url=media_url, + ) + + formatted_entities += html_fragment + + elif media_type in ( + TwitterPostTypeChoices.video, + TwitterPostTypeChoices.animated_gif, + ): + meta_data = media_entity["video_info"] + + videos = sorted( + [video for video in meta_data["variants"]], + reverse=True, + key=lambda video: video.get("bitrate", 0), + ) + + if not videos: + continue + + video = videos[0] + content_type = video["content_type"] + url = video["url"] + + html_fragment = format_html( + """
""", + url=url, + content_type=content_type, + ) + + formatted_entities += html_fragment + + return formatted_entities + class TwitterStream(Stream): pass