Move media_entities parsing to seperate function
This commit is contained in:
parent
cfe9c29a14
commit
a90e558655
2 changed files with 71 additions and 49 deletions
|
|
@ -107,12 +107,14 @@ class TwitterBuilderTestCase(TestCase):
|
|||
|
||||
self.assertIn(full_text, post.body)
|
||||
self.assertInHTML(
|
||||
f"<div><img alt='1269039233072689152' src='https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg' loading='lazy' /></div>",
|
||||
"""<div><img alt="1269039233072689152" src="https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg" loading="lazy" /></div>""",
|
||||
post.body,
|
||||
count=1,
|
||||
)
|
||||
self.assertInHTML(
|
||||
f"<div><img alt='1269039233068527618' src='https://pbs.twimg.com/media/EZyIdXUVcAI3Cju.jpg' loading='lazy' /></div>",
|
||||
"""<div><img alt="1269039233068527618" src="https://pbs.twimg.com/media/EZyIdXUVcAI3Cju.jpg" loading="lazy" /></div>""",
|
||||
post.body,
|
||||
count=1,
|
||||
)
|
||||
|
||||
def test_videos_in_post(self):
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import logging
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from django.template.defaultfilters import truncatechars
|
||||
|
|
@ -12,6 +14,8 @@ from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeC
|
|||
from newsreader.news.core.models import Post
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TWITTER_URL = "https://twitter.com"
|
||||
TWITTER_API_URL = "https://api.twitter.com/1.1"
|
||||
|
||||
|
|
@ -36,62 +40,31 @@ class TwitterBuilder(Builder):
|
|||
|
||||
for post in posts:
|
||||
remote_identifier = post["id_str"]
|
||||
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
|
||||
publication_date = pytz.utc.localize(
|
||||
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
||||
)
|
||||
body = ""
|
||||
body = post["full_text"]
|
||||
|
||||
if "extended_entities" in post:
|
||||
media_entities = post["extended_entities"]["media"]
|
||||
|
||||
for media_entity in media_entities:
|
||||
media_type = media_entity["type"]
|
||||
media_url = media_entity["media_url_https"]
|
||||
title = media_entity["id_str"]
|
||||
|
||||
if media_type == TwitterPostTypeChoices.photo:
|
||||
html_fragment = format_html(
|
||||
"<div><img alt='{title}' src='{media_url}' loading='lazy' /></div>",
|
||||
title=title,
|
||||
media_url=media_url,
|
||||
)
|
||||
|
||||
body += html_fragment
|
||||
|
||||
elif media_type in (
|
||||
TwitterPostTypeChoices.video,
|
||||
TwitterPostTypeChoices.animated_gif,
|
||||
):
|
||||
meta_data = media_entity["video_info"]
|
||||
|
||||
videos = sorted(
|
||||
[video for video in meta_data["variants"]],
|
||||
reverse=True,
|
||||
key=lambda video: video.get("bitrate", 0),
|
||||
)
|
||||
|
||||
if not videos:
|
||||
continue
|
||||
|
||||
video = videos[0]
|
||||
content_type = video["content_type"]
|
||||
url = video["url"]
|
||||
|
||||
html_fragment = format_html(
|
||||
"""<div><video controls muted><source src="{url}" type="{content_type}" /></video></div> """,
|
||||
url=url,
|
||||
content_type=content_type,
|
||||
)
|
||||
body += html_fragment
|
||||
try:
|
||||
media_entities = self.get_media_entities(post)
|
||||
body += media_entities
|
||||
except KeyError:
|
||||
logger.exception(f"Failed parsing media_entities for {url}")
|
||||
|
||||
if "retweeted_status" in post:
|
||||
original_post = post["retweeted_status"]
|
||||
body += format_html(f"Original tweet: {original_post['full_text']}")
|
||||
body += format_html(
|
||||
"Original tweet: {original_post}",
|
||||
original_post=original_post["full_text"],
|
||||
)
|
||||
if "quoted_status" in post:
|
||||
original_post = post["quoted_status"]
|
||||
body += format_html(f"Quoted tweet: {original_post['full_text']}")
|
||||
|
||||
body += format_html(post["full_text"])
|
||||
body += format_html(
|
||||
"Quoted tweet: {original_post}",
|
||||
original_post=original_post["full_text"],
|
||||
)
|
||||
|
||||
data = {
|
||||
"remote_identifier": remote_identifier,
|
||||
|
|
@ -99,7 +72,7 @@ class TwitterBuilder(Builder):
|
|||
"body": fix_text(body),
|
||||
"author": rule.screen_name,
|
||||
"publication_date": publication_date,
|
||||
"url": f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}",
|
||||
"url": url,
|
||||
"rule": rule,
|
||||
}
|
||||
|
||||
|
|
@ -107,6 +80,53 @@ class TwitterBuilder(Builder):
|
|||
|
||||
return results.values()
|
||||
|
||||
def get_media_entities(self, post):
|
||||
media_entities = post["extended_entities"]["media"]
|
||||
formatted_entities = ""
|
||||
|
||||
for media_entity in media_entities:
|
||||
media_type = media_entity["type"]
|
||||
media_url = media_entity["media_url_https"]
|
||||
title = media_entity["id_str"]
|
||||
|
||||
if media_type == TwitterPostTypeChoices.photo:
|
||||
html_fragment = format_html(
|
||||
"""<br /><div><img alt="{title}" src="{media_url}" loading="lazy" /></div>""",
|
||||
title=title,
|
||||
media_url=media_url,
|
||||
)
|
||||
|
||||
formatted_entities += html_fragment
|
||||
|
||||
elif media_type in (
|
||||
TwitterPostTypeChoices.video,
|
||||
TwitterPostTypeChoices.animated_gif,
|
||||
):
|
||||
meta_data = media_entity["video_info"]
|
||||
|
||||
videos = sorted(
|
||||
[video for video in meta_data["variants"]],
|
||||
reverse=True,
|
||||
key=lambda video: video.get("bitrate", 0),
|
||||
)
|
||||
|
||||
if not videos:
|
||||
continue
|
||||
|
||||
video = videos[0]
|
||||
content_type = video["content_type"]
|
||||
url = video["url"]
|
||||
|
||||
html_fragment = format_html(
|
||||
"""<br /><div><video controls muted><source src="{url}" type="{content_type}" /></video></div> """,
|
||||
url=url,
|
||||
content_type=content_type,
|
||||
)
|
||||
|
||||
formatted_entities += html_fragment
|
||||
|
||||
return formatted_entities
|
||||
|
||||
|
||||
class TwitterStream(Stream):
|
||||
pass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue