diff --git a/src/newsreader/news/collection/tests/twitter/builder/mocks.py b/src/newsreader/news/collection/tests/twitter/builder/mocks.py index 6c17fef..364d762 100644 --- a/src/newsreader/news/collection/tests/twitter/builder/mocks.py +++ b/src/newsreader/news/collection/tests/twitter/builder/mocks.py @@ -700,6 +700,159 @@ video_mock = [ }, ] +video_without_bitrate_mock = [ + { + "contributors": None, + "coordinates": None, + "created_at": "Wed Aug 05 18:36:00 +0000 2020", + "display_text_range": [0, 196], + "entities": { + "hashtags": [], + "media": [ + { + "display_url": "pic.twitter.com/mZ8CAuq3SH", + "expanded_url": "https://twitter.com/RobertsSpaceInd/status/1291080532361527296/video/1", + "id": 1291074294747770880, + "id_str": "1291074294747770880", + "indices": [197, 220], + "media_url": "http://pbs.twimg.com/media/EerWyexUEAQhRL1.jpg", + "media_url_https": "https://pbs.twimg.com/media/EerWyexUEAQhRL1.jpg", + "sizes": { + "large": {"h": 720, "resize": "fit", "w": 1280}, + "medium": {"h": 675, "resize": "fit", "w": 1200}, + "small": {"h": 383, "resize": "fit", "w": 680}, + "thumb": {"h": 150, "resize": "crop", "w": 150}, + }, + "type": "photo", + "url": "https://t.co/mZ8CAuq3SH", + } + ], + "symbols": [], + "urls": [ + { + "display_url": "robertsspaceindustries.com/greycatroc", + "expanded_url": "http://robertsspaceindustries.com/greycatroc", + "indices": [173, 196], + "url": "https://t.co/2aH7qdOfSk", + } + ], + "user_mentions": [], + }, + "extended_entities": { + "media": [ + { + "additional_media_info": { + "description": "", + "embeddable": True, + "monetizable": False, + "title": "", + }, + "display_url": "pic.twitter.com/mZ8CAuq3SH", + "expanded_url": "https://twitter.com/RobertsSpaceInd/status/1291080532361527296/video/1", + "id": 1291074294747770880, + "id_str": "1291074294747770880", + "indices": [197, 220], + "media_url": "http://pbs.twimg.com/media/EerWyexUEAQhRL1.jpg", + "media_url_https": "https://pbs.twimg.com/media/EerWyexUEAQhRL1.jpg", + "sizes": { + "large": {"h": 720, "resize": "fit", "w": 1280}, + "medium": {"h": 675, "resize": "fit", "w": 1200}, + "small": {"h": 383, "resize": "fit", "w": 680}, + "thumb": {"h": 150, "resize": "crop", "w": 150}, + }, + "type": "video", + "url": "https://t.co/mZ8CAuq3SH", + "video_info": { + "aspect_ratio": [16, 9], + "duration_millis": 82967, + "variants": [ + { + "content_type": "application/x-mpegURL", + "url": "https://video.twimg.com/amplify_video/1291074294747770880/pl/kMYgFEoRyoW99o-i.m3u8?tag=13", + } + ], + }, + } + ] + }, + "favorite_count": 289, + "favorited": False, + "full_text": "Small enough to access hard-to-reach ore deposits, but with enough power to get through the tough jobs, Greycat\u2019s ROC perfectly complements any mining operation. \n\nDetails: https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH", + "geo": None, + "id": 1291080532361527296, + "id_str": "1291080532361527296", + "in_reply_to_screen_name": None, + "in_reply_to_status_id": None, + "in_reply_to_status_id_str": None, + "in_reply_to_user_id": None, + "in_reply_to_user_id_str": None, + "is_quote_status": False, + "lang": "en", + "place": None, + "possibly_sensitive": False, + "retweet_count": 64, + "retweeted": False, + "source": 'Twitter Media Studio', + "truncated": False, + "user": { + "contributors_enabled": False, + "created_at": "Wed Sep 05 00:58:11 +0000 2012", + "default_profile": False, + "default_profile_image": False, + "description": "The official Twitter profile for #StarCitizen and Roberts Space Industries.", + "entities": { + "description": {"urls": []}, + "url": { + "urls": [ + { + "display_url": "robertsspaceindustries.com", + "expanded_url": "http://www.robertsspaceindustries.com", + "indices": [0, 23], + "url": "https://t.co/iqO6apof3y", + } + ] + }, + }, + "favourites_count": 4588, + "follow_request_sent": None, + "followers_count": 106169, + "following": None, + "friends_count": 201, + "geo_enabled": False, + "has_extended_profile": False, + "id": 803542770, + "id_str": "803542770", + "is_translation_enabled": False, + "is_translator": False, + "lang": None, + "listed_count": 890, + "location": "Roberts Space Industries", + "name": "Star Citizen", + "notifications": None, + "profile_background_color": "131516", + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme14/bg.gif", + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme14/bg.gif", + "profile_background_tile": False, + "profile_banner_url": "https://pbs.twimg.com/profile_banners/803542770/1596651186", + "profile_image_url": "http://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg", + "profile_image_url_https": "https://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg", + "profile_link_color": "0A5485", + "profile_sidebar_border_color": "FFFFFF", + "profile_sidebar_fill_color": "EFEFEF", + "profile_text_color": "333333", + "profile_use_background_image": True, + "protected": False, + "screen_name": "RobertsSpaceInd", + "statuses_count": 6210, + "time_zone": None, + "translator_type": "none", + "url": "https://t.co/iqO6apof3y", + "utc_offset": None, + "verified": True, + }, + } +] + retweet_mock = [ { "contributors": None, diff --git a/src/newsreader/news/collection/tests/twitter/builder/tests.py b/src/newsreader/news/collection/tests/twitter/builder/tests.py index f7ef547..bf66648 100644 --- a/src/newsreader/news/collection/tests/twitter/builder/tests.py +++ b/src/newsreader/news/collection/tests/twitter/builder/tests.py @@ -8,11 +8,14 @@ from django.utils.html import format_html import pytz +from ftfy import fix_text + from newsreader.news.collection.tests.factories import TwitterProfileFactory from newsreader.news.collection.tests.twitter.builder.mocks import ( image_mock, simple_mock, video_mock, + video_without_bitrate_mock, ) from newsreader.news.collection.twitter import TWITTER_URL, TwitterBuilder from newsreader.news.core.models import Post @@ -100,11 +103,11 @@ class TwitterBuilderTestCase(TestCase): ) self.assertIn(full_text, post.body) - self.assertIn( + self.assertInHTML( f"
1269039233072689152
", post.body, ) - self.assertIn( + self.assertInHTML( f"
1269039233068527618
", post.body, ) @@ -126,11 +129,11 @@ class TwitterBuilderTestCase(TestCase): post = posts["1291080532361527296"] - full_text = ( + full_text = fix_text( "Small enough to access hard-to-reach ore deposits, but with enough" - "power to get through the tough jobs, Greycat\u2019s ROC perfectly" - "complements any mining operation. \n\nDetails:" - "https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH" + " power to get through the tough jobs, Greycat\u2019s ROC perfectly" + " complements any mining operation. \n\nDetails:" + " https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH" ) self.assertEquals(post.rule, profile) @@ -145,9 +148,31 @@ class TwitterBuilderTestCase(TestCase): ) self.assertIn(full_text, post.body) - self.assertIn( + self.assertInHTML( """
""", post.body, + count=1, + ) + + def test_video_without_bitrate(self): + builder = TwitterBuilder + + profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") + mock_stream = MagicMock(rule=profile) + + with builder((video_without_bitrate_mock, mock_stream)) as builder: + builder.save() + + posts = {post.remote_identifier: post for post in Post.objects.all()} + + self.assertCountEqual(("1291080532361527296",), posts.keys()) + + post = posts["1291080532361527296"] + + self.assertInHTML( + """
""", + post.body, + count=1, ) @skip("Not implemented") @@ -189,3 +214,7 @@ class TwitterBuilderTestCase(TestCase): @skip("Not implemented") def test_duplicate_in_data(self): pass + + @skip("Not implemented") + def test_invalid_unicode_chars(self): + pass diff --git a/src/newsreader/news/collection/twitter.py b/src/newsreader/news/collection/twitter.py index a45493b..8fbf331 100644 --- a/src/newsreader/news/collection/twitter.py +++ b/src/newsreader/news/collection/twitter.py @@ -5,6 +5,8 @@ from django.utils.html import format_html import pytz +from ftfy import fix_text + from newsreader.news.collection.base import Builder, Client, Collector, Stream from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices from newsreader.news.core.models import Post @@ -59,26 +61,21 @@ class TwitterBuilder(Builder): elif media_type == TwitterPostTypeChoices.video: meta_data = media_entity["video_info"] - # TODO catch case where bitrates are not defined or no videos - video = next( - iter( - sorted( - ( - video - for video in meta_data["variants"] - if "bitrate" in video - ), - reverse=True, - key=lambda video: video["bitrate"], - ) - ) + videos = sorted( + [video for video in meta_data["variants"]], + reverse=True, + key=lambda video: video.get("bitrate", 0), ) + if not videos: + continue + + video = videos[0] content_type = video["content_type"] url = video["url"] html_fragment = format_html( - """
""", + """
""", url=url, content_type=content_type, ) @@ -88,11 +85,11 @@ class TwitterBuilder(Builder): data = { "remote_identifier": remote_identifier, - "title": truncatechars(post["full_text"], 40), - "body": body, + "title": fix_text(truncatechars(post["full_text"], 40)), + "body": fix_text(body), "author": rule.screen_name, "publication_date": publication_date, - "url": (f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"), + "url": f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}", "rule": rule, }