From 5e53faf417e39e1b8b6c0d3108e0eb542c3a2beb Mon Sep 17 00:00:00 2001 From: Sonny Bakker Date: Sun, 13 Sep 2020 15:00:31 +0200 Subject: [PATCH] Urlize tweet bodies --- .../collection/tests/twitter/builder/tests.py | 111 +++++++++++++++--- src/newsreader/news/collection/twitter.py | 26 ++-- 2 files changed, 110 insertions(+), 27 deletions(-) diff --git a/src/newsreader/news/collection/tests/twitter/builder/tests.py b/src/newsreader/news/collection/tests/twitter/builder/tests.py index eba3e65..cc43c3c 100644 --- a/src/newsreader/news/collection/tests/twitter/builder/tests.py +++ b/src/newsreader/news/collection/tests/twitter/builder/tests.py @@ -2,7 +2,7 @@ from datetime import datetime from unittest.mock import Mock from django.test import TestCase -from django.utils.html import format_html +from django.utils.safestring import mark_safe import pytz @@ -46,11 +46,21 @@ class TwitterBuilderTestCase(TestCase): post = posts["1291528756373286914"] - full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX" + full_text = ( + "@ArieNeoSC Here you go, goodnight!\n\n" + """https://t.co/trAcIxBMlX""" + ) self.assertEquals(post.rule, profile) - self.assertEquals(post.title, truncate_text(Post, "title", full_text)) - self.assertEquals(post.body, format_html(full_text)) + self.assertEquals( + post.title, + truncate_text( + Post, + "title", + "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX", + ), + ) + self.assertEquals(post.body, mark_safe(full_text)) self.assertEquals(post.author, "RobertsSpaceInd") self.assertEquals( @@ -66,7 +76,7 @@ class TwitterBuilderTestCase(TestCase): self.assertEquals(post.rule, profile) self.assertEquals(post.title, truncate_text(Post, "title", full_text)) - self.assertEquals(post.body, format_html(full_text)) + self.assertEquals(post.body, mark_safe(full_text)) self.assertEquals(post.author, "RobertsSpaceInd") self.assertEquals( @@ -94,10 +104,8 @@ class TwitterBuilderTestCase(TestCase): post = posts["1269039237166321664"] - full_text = "_ https://t.co/VjEeDrL1iA" - self.assertEquals(post.rule, profile) - self.assertEquals(post.title, full_text) + self.assertEquals(post.title, "_ https://t.co/VjEeDrL1iA") self.assertEquals(post.author, "RobertsSpaceInd") self.assertEquals( @@ -107,7 +115,11 @@ class TwitterBuilderTestCase(TestCase): post.publication_date, pytz.utc.localize(datetime(2020, 6, 5, 22, 51, 46)) ) - self.assertIn(full_text, post.body) + self.assertInHTML( + """https://t.co/VjEeDrL1iA""", + post.body, + count=1, + ) self.assertInHTML( """
1269039233072689152
""", post.body, @@ -141,11 +153,24 @@ class TwitterBuilderTestCase(TestCase): "Small enough to access hard-to-reach ore deposits, but with enough" " power to get through the tough jobs, Greycat\u2019s ROC perfectly" " complements any mining operation. \n\nDetails:" - " https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH" + """ https://t.co/2aH7qdOfSk""" + """ https://t.co/mZ8CAuq3SH""" ) self.assertEquals(post.rule, profile) - self.assertEquals(post.title, truncate_text(Post, "title", full_text)) + self.assertEquals( + post.title, + truncate_text( + Post, + "title", + fix_text( + "Small enough to access hard-to-reach ore deposits, but with enough" + " power to get through the tough jobs, Greycat\u2019s ROC perfectly" + " complements any mining operation. \n\nDetails:" + " https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH" + ), + ), + ) self.assertEquals(post.author, "RobertsSpaceInd") self.assertEquals( @@ -208,7 +233,10 @@ class TwitterBuilderTestCase(TestCase): count=1, ) - self.assertIn("@Xenosystems https://t.co/wxvioLCJ6h", post.body) + self.assertIn( + """@Xenosystems https://t.co/wxvioLCJ6h""", + post.body, + ) def test_retweet_post(self): builder = TwitterBuilder @@ -241,7 +269,8 @@ class TwitterBuilderTestCase(TestCase): "Original tweet: New video! #StarCitizen 3.9 vs. 3.10 comparison!\nSo, the patch" " 3.10 came out, which brought us quite a lot of changes!\ud83d\ude42\nPlease," " share it with your friends!\ud83d\ude4f\n\nEnjoy watching and stay safe!" - " \u2764\ufe0f\u263a\ufe0f\n@RobertsSpaceInd\n\n@CloudImperium\n\nhttps://t.co/j4QahHzbw4" + " \u2764\ufe0f\u263a\ufe0f\n@RobertsSpaceInd\n\n@CloudImperium\n\n" + """https://t.co/j4QahHzbw4""" ), post.body, ) @@ -265,14 +294,17 @@ class TwitterBuilderTestCase(TestCase): post = posts["1290801039075979264"] self.assertIn( - fix_text("Bonne nuit \ud83c\udf3a\ud83d\udeeb https://t.co/WyznJwCJLp"), + fix_text( + "Bonne nuit \ud83c\udf3a\ud83d\udeeb" + """ https://t.co/WyznJwCJLp""" + ), post.body, ) self.assertIn( fix_text( "Quoted tweet: #Starcitizen Le jeu est beau. Bonne nuit" - " @RobertsSpaceInd https://t.co/xCXun68V3r" + """ @RobertsSpaceInd https://t.co/xCXun68V3r""" ), post.body, ) @@ -306,16 +338,59 @@ class TwitterBuilderTestCase(TestCase): post = posts["1291528756373286914"] full_text = ( - "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX" + "@ArieNeoSC Here you go, goodnight!\n\n" + """https://t.co/trAcIxBMlX""" "
" ) self.assertEquals(post.rule, profile) - self.assertEquals(post.title, truncate_text(Post, "title", full_text)) - self.assertEquals(post.body, format_html(full_text)) + self.assertEquals( + post.title, + truncate_text( + Post, + "title", + "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX" + "
", + ), + ) + self.assertEquals(post.body, mark_safe(full_text)) self.assertInHTML("", post.body, count=0) self.assertInHTML("
", post.body, count=1) self.assertInHTML("", post.title, count=0) self.assertInHTML("
", post.title, count=1) + + def test_urlize_on_urls(self): + builder = TwitterBuilder + + profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") + mock_stream = Mock(rule=profile) + + with builder(simple_mock, mock_stream) as builder: + builder.build() + builder.save() + + posts = {post.remote_identifier: post for post in Post.objects.all()} + + self.assertCountEqual( + ("1291528756373286914", "1288550304095416320"), posts.keys() + ) + + post = posts["1291528756373286914"] + + full_text = ( + "@ArieNeoSC Here you go, goodnight!\n\n" + """https://t.co/trAcIxBMlX""" + ) + + self.assertEquals(post.rule, profile) + self.assertEquals( + post.title, + truncate_text( + Post, + "title", + "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX", + ), + ) + self.assertEquals(post.body, mark_safe(full_text)) diff --git a/src/newsreader/news/collection/twitter.py b/src/newsreader/news/collection/twitter.py index 3d1c54b..19fb812 100644 --- a/src/newsreader/news/collection/twitter.py +++ b/src/newsreader/news/collection/twitter.py @@ -2,7 +2,7 @@ import logging from datetime import datetime -from django.utils.html import format_html +from django.utils.html import format_html, urlize import pytz @@ -36,8 +36,10 @@ class TwitterBuilder(PostBuilder): remote_identifier = post["id_str"] url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}" - body = post["full_text"] - title = truncate_text(Post, "title", self.sanitize_fragment(body)) + body = urlize(post["full_text"], nofollow=True) + title = truncate_text( + Post, "title", self.sanitize_fragment(post["full_text"]) + ) publication_date = pytz.utc.localize( datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y") @@ -52,15 +54,21 @@ class TwitterBuilder(PostBuilder): if "retweeted_status" in post: original_post = post["retweeted_status"] - body += format_html( - "Original tweet: {original_post}", - original_post=original_post["full_text"], + body += urlize( + format_html( + "Original tweet: {original_post}", + original_post=urlize(original_post["full_text"], nofollow=True), + ), + nofollow=True, ) if "quoted_status" in post: original_post = post["quoted_status"] - body += format_html( - "Quoted tweet: {original_post}", - original_post=original_post["full_text"], + body += urlize( + format_html( + "Quoted tweet: {original_post}", + original_post=original_post["full_text"], + ), + nofollow=True, ) body = self.sanitize_fragment(body)