diff --git a/src/newsreader/news/collection/tests/twitter/builder/tests.py b/src/newsreader/news/collection/tests/twitter/builder/tests.py
index 2d7150e..b3561d8 100644
--- a/src/newsreader/news/collection/tests/twitter/builder/tests.py
+++ b/src/newsreader/news/collection/tests/twitter/builder/tests.py
@@ -107,12 +107,14 @@ class TwitterBuilderTestCase(TestCase):
self.assertIn(full_text, post.body)
self.assertInHTML(
- f"
",
+ """""",
post.body,
+ count=1,
)
self.assertInHTML(
- f"",
+ """""",
post.body,
+ count=1,
)
def test_videos_in_post(self):
diff --git a/src/newsreader/news/collection/twitter.py b/src/newsreader/news/collection/twitter.py
index ae29047..d8d273b 100644
--- a/src/newsreader/news/collection/twitter.py
+++ b/src/newsreader/news/collection/twitter.py
@@ -1,3 +1,5 @@
+import logging
+
from datetime import datetime
from django.template.defaultfilters import truncatechars
@@ -12,6 +14,8 @@ from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeC
from newsreader.news.core.models import Post
+logger = logging.getLogger(__name__)
+
TWITTER_URL = "https://twitter.com"
TWITTER_API_URL = "https://api.twitter.com/1.1"
@@ -36,62 +40,31 @@ class TwitterBuilder(Builder):
for post in posts:
remote_identifier = post["id_str"]
+ url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
publication_date = pytz.utc.localize(
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
)
- body = ""
+ body = post["full_text"]
if "extended_entities" in post:
- media_entities = post["extended_entities"]["media"]
-
- for media_entity in media_entities:
- media_type = media_entity["type"]
- media_url = media_entity["media_url_https"]
- title = media_entity["id_str"]
-
- if media_type == TwitterPostTypeChoices.photo:
- html_fragment = format_html(
- "",
- title=title,
- media_url=media_url,
- )
-
- body += html_fragment
-
- elif media_type in (
- TwitterPostTypeChoices.video,
- TwitterPostTypeChoices.animated_gif,
- ):
- meta_data = media_entity["video_info"]
-
- videos = sorted(
- [video for video in meta_data["variants"]],
- reverse=True,
- key=lambda video: video.get("bitrate", 0),
- )
-
- if not videos:
- continue
-
- video = videos[0]
- content_type = video["content_type"]
- url = video["url"]
-
- html_fragment = format_html(
- """ """,
- url=url,
- content_type=content_type,
- )
- body += html_fragment
+ try:
+ media_entities = self.get_media_entities(post)
+ body += media_entities
+ except KeyError:
+ logger.exception(f"Failed parsing media_entities for {url}")
if "retweeted_status" in post:
original_post = post["retweeted_status"]
- body += format_html(f"Original tweet: {original_post['full_text']}")
+ body += format_html(
+ "Original tweet: {original_post}",
+ original_post=original_post["full_text"],
+ )
if "quoted_status" in post:
original_post = post["quoted_status"]
- body += format_html(f"Quoted tweet: {original_post['full_text']}")
-
- body += format_html(post["full_text"])
+ body += format_html(
+ "Quoted tweet: {original_post}",
+ original_post=original_post["full_text"],
+ )
data = {
"remote_identifier": remote_identifier,
@@ -99,7 +72,7 @@ class TwitterBuilder(Builder):
"body": fix_text(body),
"author": rule.screen_name,
"publication_date": publication_date,
- "url": f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}",
+ "url": url,
"rule": rule,
}
@@ -107,6 +80,53 @@ class TwitterBuilder(Builder):
return results.values()
+ def get_media_entities(self, post):
+ media_entities = post["extended_entities"]["media"]
+ formatted_entities = ""
+
+ for media_entity in media_entities:
+ media_type = media_entity["type"]
+ media_url = media_entity["media_url_https"]
+ title = media_entity["id_str"]
+
+ if media_type == TwitterPostTypeChoices.photo:
+ html_fragment = format_html(
+ """
""",
+ title=title,
+ media_url=media_url,
+ )
+
+ formatted_entities += html_fragment
+
+ elif media_type in (
+ TwitterPostTypeChoices.video,
+ TwitterPostTypeChoices.animated_gif,
+ ):
+ meta_data = media_entity["video_info"]
+
+ videos = sorted(
+ [video for video in meta_data["variants"]],
+ reverse=True,
+ key=lambda video: video.get("bitrate", 0),
+ )
+
+ if not videos:
+ continue
+
+ video = videos[0]
+ content_type = video["content_type"]
+ url = video["url"]
+
+ html_fragment = format_html(
+ """
""",
+ url=url,
+ content_type=content_type,
+ )
+
+ formatted_entities += html_fragment
+
+ return formatted_entities
+
class TwitterStream(Stream):
pass