Move media_entities parsing to seperate function

This commit is contained in:
Sonny Bakker 2020-09-12 16:49:01 +02:00
parent cfe9c29a14
commit a90e558655
2 changed files with 71 additions and 49 deletions

View file

@ -107,12 +107,14 @@ class TwitterBuilderTestCase(TestCase):
self.assertIn(full_text, post.body) self.assertIn(full_text, post.body)
self.assertInHTML( self.assertInHTML(
f"<div><img alt='1269039233072689152' src='https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg' loading='lazy' /></div>", """<div><img alt="1269039233072689152" src="https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg" loading="lazy" /></div>""",
post.body, post.body,
count=1,
) )
self.assertInHTML( self.assertInHTML(
f"<div><img alt='1269039233068527618' src='https://pbs.twimg.com/media/EZyIdXUVcAI3Cju.jpg' loading='lazy' /></div>", """<div><img alt="1269039233068527618" src="https://pbs.twimg.com/media/EZyIdXUVcAI3Cju.jpg" loading="lazy" /></div>""",
post.body, post.body,
count=1,
) )
def test_videos_in_post(self): def test_videos_in_post(self):

View file

@ -1,3 +1,5 @@
import logging
from datetime import datetime from datetime import datetime
from django.template.defaultfilters import truncatechars from django.template.defaultfilters import truncatechars
@ -12,6 +14,8 @@ from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeC
from newsreader.news.core.models import Post from newsreader.news.core.models import Post
logger = logging.getLogger(__name__)
TWITTER_URL = "https://twitter.com" TWITTER_URL = "https://twitter.com"
TWITTER_API_URL = "https://api.twitter.com/1.1" TWITTER_API_URL = "https://api.twitter.com/1.1"
@ -36,13 +40,49 @@ class TwitterBuilder(Builder):
for post in posts: for post in posts:
remote_identifier = post["id_str"] remote_identifier = post["id_str"]
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
publication_date = pytz.utc.localize( publication_date = pytz.utc.localize(
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y") datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
) )
body = "" body = post["full_text"]
if "extended_entities" in post: if "extended_entities" in post:
try:
media_entities = self.get_media_entities(post)
body += media_entities
except KeyError:
logger.exception(f"Failed parsing media_entities for {url}")
if "retweeted_status" in post:
original_post = post["retweeted_status"]
body += format_html(
"Original tweet: {original_post}",
original_post=original_post["full_text"],
)
if "quoted_status" in post:
original_post = post["quoted_status"]
body += format_html(
"Quoted tweet: {original_post}",
original_post=original_post["full_text"],
)
data = {
"remote_identifier": remote_identifier,
"title": fix_text(truncatechars(post["full_text"], 40)),
"body": fix_text(body),
"author": rule.screen_name,
"publication_date": publication_date,
"url": url,
"rule": rule,
}
results[remote_identifier] = Post(**data)
return results.values()
def get_media_entities(self, post):
media_entities = post["extended_entities"]["media"] media_entities = post["extended_entities"]["media"]
formatted_entities = ""
for media_entity in media_entities: for media_entity in media_entities:
media_type = media_entity["type"] media_type = media_entity["type"]
@ -51,12 +91,12 @@ class TwitterBuilder(Builder):
if media_type == TwitterPostTypeChoices.photo: if media_type == TwitterPostTypeChoices.photo:
html_fragment = format_html( html_fragment = format_html(
"<div><img alt='{title}' src='{media_url}' loading='lazy' /></div>", """<br /><div><img alt="{title}" src="{media_url}" loading="lazy" /></div>""",
title=title, title=title,
media_url=media_url, media_url=media_url,
) )
body += html_fragment formatted_entities += html_fragment
elif media_type in ( elif media_type in (
TwitterPostTypeChoices.video, TwitterPostTypeChoices.video,
@ -78,34 +118,14 @@ class TwitterBuilder(Builder):
url = video["url"] url = video["url"]
html_fragment = format_html( html_fragment = format_html(
"""<div><video controls muted><source src="{url}" type="{content_type}" /></video></div> """, """<br /><div><video controls muted><source src="{url}" type="{content_type}" /></video></div> """,
url=url, url=url,
content_type=content_type, content_type=content_type,
) )
body += html_fragment
if "retweeted_status" in post: formatted_entities += html_fragment
original_post = post["retweeted_status"]
body += format_html(f"Original tweet: {original_post['full_text']}")
if "quoted_status" in post:
original_post = post["quoted_status"]
body += format_html(f"Quoted tweet: {original_post['full_text']}")
body += format_html(post["full_text"]) return formatted_entities
data = {
"remote_identifier": remote_identifier,
"title": fix_text(truncatechars(post["full_text"], 40)),
"body": fix_text(body),
"author": rule.screen_name,
"publication_date": publication_date,
"url": f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}",
"rule": rule,
}
results[remote_identifier] = Post(**data)
return results.values()
class TwitterStream(Stream): class TwitterStream(Stream):