Move media_entities parsing to seperate function

This commit is contained in:
Sonny Bakker 2020-09-12 16:49:01 +02:00
parent cfe9c29a14
commit a90e558655
2 changed files with 71 additions and 49 deletions

View file

@ -107,12 +107,14 @@ class TwitterBuilderTestCase(TestCase):
self.assertIn(full_text, post.body) self.assertIn(full_text, post.body)
self.assertInHTML( self.assertInHTML(
f"<div><img alt='1269039233072689152' src='https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg' loading='lazy' /></div>", """<div><img alt="1269039233072689152" src="https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg" loading="lazy" /></div>""",
post.body, post.body,
count=1,
) )
self.assertInHTML( self.assertInHTML(
f"<div><img alt='1269039233068527618' src='https://pbs.twimg.com/media/EZyIdXUVcAI3Cju.jpg' loading='lazy' /></div>", """<div><img alt="1269039233068527618" src="https://pbs.twimg.com/media/EZyIdXUVcAI3Cju.jpg" loading="lazy" /></div>""",
post.body, post.body,
count=1,
) )
def test_videos_in_post(self): def test_videos_in_post(self):

View file

@ -1,3 +1,5 @@
import logging
from datetime import datetime from datetime import datetime
from django.template.defaultfilters import truncatechars from django.template.defaultfilters import truncatechars
@ -12,6 +14,8 @@ from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeC
from newsreader.news.core.models import Post from newsreader.news.core.models import Post
logger = logging.getLogger(__name__)
TWITTER_URL = "https://twitter.com" TWITTER_URL = "https://twitter.com"
TWITTER_API_URL = "https://api.twitter.com/1.1" TWITTER_API_URL = "https://api.twitter.com/1.1"
@ -36,62 +40,31 @@ class TwitterBuilder(Builder):
for post in posts: for post in posts:
remote_identifier = post["id_str"] remote_identifier = post["id_str"]
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
publication_date = pytz.utc.localize( publication_date = pytz.utc.localize(
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y") datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
) )
body = "" body = post["full_text"]
if "extended_entities" in post: if "extended_entities" in post:
media_entities = post["extended_entities"]["media"] try:
media_entities = self.get_media_entities(post)
for media_entity in media_entities: body += media_entities
media_type = media_entity["type"] except KeyError:
media_url = media_entity["media_url_https"] logger.exception(f"Failed parsing media_entities for {url}")
title = media_entity["id_str"]
if media_type == TwitterPostTypeChoices.photo:
html_fragment = format_html(
"<div><img alt='{title}' src='{media_url}' loading='lazy' /></div>",
title=title,
media_url=media_url,
)
body += html_fragment
elif media_type in (
TwitterPostTypeChoices.video,
TwitterPostTypeChoices.animated_gif,
):
meta_data = media_entity["video_info"]
videos = sorted(
[video for video in meta_data["variants"]],
reverse=True,
key=lambda video: video.get("bitrate", 0),
)
if not videos:
continue
video = videos[0]
content_type = video["content_type"]
url = video["url"]
html_fragment = format_html(
"""<div><video controls muted><source src="{url}" type="{content_type}" /></video></div> """,
url=url,
content_type=content_type,
)
body += html_fragment
if "retweeted_status" in post: if "retweeted_status" in post:
original_post = post["retweeted_status"] original_post = post["retweeted_status"]
body += format_html(f"Original tweet: {original_post['full_text']}") body += format_html(
"Original tweet: {original_post}",
original_post=original_post["full_text"],
)
if "quoted_status" in post: if "quoted_status" in post:
original_post = post["quoted_status"] original_post = post["quoted_status"]
body += format_html(f"Quoted tweet: {original_post['full_text']}") body += format_html(
"Quoted tweet: {original_post}",
body += format_html(post["full_text"]) original_post=original_post["full_text"],
)
data = { data = {
"remote_identifier": remote_identifier, "remote_identifier": remote_identifier,
@ -99,7 +72,7 @@ class TwitterBuilder(Builder):
"body": fix_text(body), "body": fix_text(body),
"author": rule.screen_name, "author": rule.screen_name,
"publication_date": publication_date, "publication_date": publication_date,
"url": f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}", "url": url,
"rule": rule, "rule": rule,
} }
@ -107,6 +80,53 @@ class TwitterBuilder(Builder):
return results.values() return results.values()
def get_media_entities(self, post):
media_entities = post["extended_entities"]["media"]
formatted_entities = ""
for media_entity in media_entities:
media_type = media_entity["type"]
media_url = media_entity["media_url_https"]
title = media_entity["id_str"]
if media_type == TwitterPostTypeChoices.photo:
html_fragment = format_html(
"""<br /><div><img alt="{title}" src="{media_url}" loading="lazy" /></div>""",
title=title,
media_url=media_url,
)
formatted_entities += html_fragment
elif media_type in (
TwitterPostTypeChoices.video,
TwitterPostTypeChoices.animated_gif,
):
meta_data = media_entity["video_info"]
videos = sorted(
[video for video in meta_data["variants"]],
reverse=True,
key=lambda video: video.get("bitrate", 0),
)
if not videos:
continue
video = videos[0]
content_type = video["content_type"]
url = video["url"]
html_fragment = format_html(
"""<br /><div><video controls muted><source src="{url}" type="{content_type}" /></video></div> """,
url=url,
content_type=content_type,
)
formatted_entities += html_fragment
return formatted_entities
class TwitterStream(Stream): class TwitterStream(Stream):
pass pass