Move media_entities parsing to seperate function
This commit is contained in:
parent
cfe9c29a14
commit
a90e558655
2 changed files with 71 additions and 49 deletions
|
|
@ -107,12 +107,14 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
|
|
||||||
self.assertIn(full_text, post.body)
|
self.assertIn(full_text, post.body)
|
||||||
self.assertInHTML(
|
self.assertInHTML(
|
||||||
f"<div><img alt='1269039233072689152' src='https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg' loading='lazy' /></div>",
|
"""<div><img alt="1269039233072689152" src="https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg" loading="lazy" /></div>""",
|
||||||
post.body,
|
post.body,
|
||||||
|
count=1,
|
||||||
)
|
)
|
||||||
self.assertInHTML(
|
self.assertInHTML(
|
||||||
f"<div><img alt='1269039233068527618' src='https://pbs.twimg.com/media/EZyIdXUVcAI3Cju.jpg' loading='lazy' /></div>",
|
"""<div><img alt="1269039233068527618" src="https://pbs.twimg.com/media/EZyIdXUVcAI3Cju.jpg" loading="lazy" /></div>""",
|
||||||
post.body,
|
post.body,
|
||||||
|
count=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_videos_in_post(self):
|
def test_videos_in_post(self):
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from django.template.defaultfilters import truncatechars
|
from django.template.defaultfilters import truncatechars
|
||||||
|
|
@ -12,6 +14,8 @@ from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeC
|
||||||
from newsreader.news.core.models import Post
|
from newsreader.news.core.models import Post
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
TWITTER_URL = "https://twitter.com"
|
TWITTER_URL = "https://twitter.com"
|
||||||
TWITTER_API_URL = "https://api.twitter.com/1.1"
|
TWITTER_API_URL = "https://api.twitter.com/1.1"
|
||||||
|
|
||||||
|
|
@ -36,62 +40,31 @@ class TwitterBuilder(Builder):
|
||||||
|
|
||||||
for post in posts:
|
for post in posts:
|
||||||
remote_identifier = post["id_str"]
|
remote_identifier = post["id_str"]
|
||||||
|
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
|
||||||
publication_date = pytz.utc.localize(
|
publication_date = pytz.utc.localize(
|
||||||
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
||||||
)
|
)
|
||||||
body = ""
|
body = post["full_text"]
|
||||||
|
|
||||||
if "extended_entities" in post:
|
if "extended_entities" in post:
|
||||||
media_entities = post["extended_entities"]["media"]
|
try:
|
||||||
|
media_entities = self.get_media_entities(post)
|
||||||
for media_entity in media_entities:
|
body += media_entities
|
||||||
media_type = media_entity["type"]
|
except KeyError:
|
||||||
media_url = media_entity["media_url_https"]
|
logger.exception(f"Failed parsing media_entities for {url}")
|
||||||
title = media_entity["id_str"]
|
|
||||||
|
|
||||||
if media_type == TwitterPostTypeChoices.photo:
|
|
||||||
html_fragment = format_html(
|
|
||||||
"<div><img alt='{title}' src='{media_url}' loading='lazy' /></div>",
|
|
||||||
title=title,
|
|
||||||
media_url=media_url,
|
|
||||||
)
|
|
||||||
|
|
||||||
body += html_fragment
|
|
||||||
|
|
||||||
elif media_type in (
|
|
||||||
TwitterPostTypeChoices.video,
|
|
||||||
TwitterPostTypeChoices.animated_gif,
|
|
||||||
):
|
|
||||||
meta_data = media_entity["video_info"]
|
|
||||||
|
|
||||||
videos = sorted(
|
|
||||||
[video for video in meta_data["variants"]],
|
|
||||||
reverse=True,
|
|
||||||
key=lambda video: video.get("bitrate", 0),
|
|
||||||
)
|
|
||||||
|
|
||||||
if not videos:
|
|
||||||
continue
|
|
||||||
|
|
||||||
video = videos[0]
|
|
||||||
content_type = video["content_type"]
|
|
||||||
url = video["url"]
|
|
||||||
|
|
||||||
html_fragment = format_html(
|
|
||||||
"""<div><video controls muted><source src="{url}" type="{content_type}" /></video></div> """,
|
|
||||||
url=url,
|
|
||||||
content_type=content_type,
|
|
||||||
)
|
|
||||||
body += html_fragment
|
|
||||||
|
|
||||||
if "retweeted_status" in post:
|
if "retweeted_status" in post:
|
||||||
original_post = post["retweeted_status"]
|
original_post = post["retweeted_status"]
|
||||||
body += format_html(f"Original tweet: {original_post['full_text']}")
|
body += format_html(
|
||||||
|
"Original tweet: {original_post}",
|
||||||
|
original_post=original_post["full_text"],
|
||||||
|
)
|
||||||
if "quoted_status" in post:
|
if "quoted_status" in post:
|
||||||
original_post = post["quoted_status"]
|
original_post = post["quoted_status"]
|
||||||
body += format_html(f"Quoted tweet: {original_post['full_text']}")
|
body += format_html(
|
||||||
|
"Quoted tweet: {original_post}",
|
||||||
body += format_html(post["full_text"])
|
original_post=original_post["full_text"],
|
||||||
|
)
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"remote_identifier": remote_identifier,
|
"remote_identifier": remote_identifier,
|
||||||
|
|
@ -99,7 +72,7 @@ class TwitterBuilder(Builder):
|
||||||
"body": fix_text(body),
|
"body": fix_text(body),
|
||||||
"author": rule.screen_name,
|
"author": rule.screen_name,
|
||||||
"publication_date": publication_date,
|
"publication_date": publication_date,
|
||||||
"url": f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}",
|
"url": url,
|
||||||
"rule": rule,
|
"rule": rule,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -107,6 +80,53 @@ class TwitterBuilder(Builder):
|
||||||
|
|
||||||
return results.values()
|
return results.values()
|
||||||
|
|
||||||
|
def get_media_entities(self, post):
|
||||||
|
media_entities = post["extended_entities"]["media"]
|
||||||
|
formatted_entities = ""
|
||||||
|
|
||||||
|
for media_entity in media_entities:
|
||||||
|
media_type = media_entity["type"]
|
||||||
|
media_url = media_entity["media_url_https"]
|
||||||
|
title = media_entity["id_str"]
|
||||||
|
|
||||||
|
if media_type == TwitterPostTypeChoices.photo:
|
||||||
|
html_fragment = format_html(
|
||||||
|
"""<br /><div><img alt="{title}" src="{media_url}" loading="lazy" /></div>""",
|
||||||
|
title=title,
|
||||||
|
media_url=media_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
formatted_entities += html_fragment
|
||||||
|
|
||||||
|
elif media_type in (
|
||||||
|
TwitterPostTypeChoices.video,
|
||||||
|
TwitterPostTypeChoices.animated_gif,
|
||||||
|
):
|
||||||
|
meta_data = media_entity["video_info"]
|
||||||
|
|
||||||
|
videos = sorted(
|
||||||
|
[video for video in meta_data["variants"]],
|
||||||
|
reverse=True,
|
||||||
|
key=lambda video: video.get("bitrate", 0),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not videos:
|
||||||
|
continue
|
||||||
|
|
||||||
|
video = videos[0]
|
||||||
|
content_type = video["content_type"]
|
||||||
|
url = video["url"]
|
||||||
|
|
||||||
|
html_fragment = format_html(
|
||||||
|
"""<br /><div><video controls muted><source src="{url}" type="{content_type}" /></video></div> """,
|
||||||
|
url=url,
|
||||||
|
content_type=content_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
formatted_entities += html_fragment
|
||||||
|
|
||||||
|
return formatted_entities
|
||||||
|
|
||||||
|
|
||||||
class TwitterStream(Stream):
|
class TwitterStream(Stream):
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue