Urlize tweet bodies

This commit is contained in:
Sonny Bakker 2020-09-13 15:00:31 +02:00
parent 646c629327
commit 5e53faf417
2 changed files with 110 additions and 27 deletions

View file

@ -2,7 +2,7 @@ from datetime import datetime
from unittest.mock import Mock
from django.test import TestCase
from django.utils.html import format_html
from django.utils.safestring import mark_safe
import pytz
@ -46,11 +46,21 @@ class TwitterBuilderTestCase(TestCase):
post = posts["1291528756373286914"]
full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
full_text = (
"@ArieNeoSC Here you go, goodnight!\n\n"
"""<a href="https://t.co/trAcIxBMlX" rel="nofollow">https://t.co/trAcIxBMlX</a>"""
)
self.assertEquals(post.rule, profile)
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
self.assertEquals(post.body, format_html(full_text))
self.assertEquals(
post.title,
truncate_text(
Post,
"title",
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX",
),
)
self.assertEquals(post.body, mark_safe(full_text))
self.assertEquals(post.author, "RobertsSpaceInd")
self.assertEquals(
@ -66,7 +76,7 @@ class TwitterBuilderTestCase(TestCase):
self.assertEquals(post.rule, profile)
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
self.assertEquals(post.body, format_html(full_text))
self.assertEquals(post.body, mark_safe(full_text))
self.assertEquals(post.author, "RobertsSpaceInd")
self.assertEquals(
@ -94,10 +104,8 @@ class TwitterBuilderTestCase(TestCase):
post = posts["1269039237166321664"]
full_text = "_ https://t.co/VjEeDrL1iA"
self.assertEquals(post.rule, profile)
self.assertEquals(post.title, full_text)
self.assertEquals(post.title, "_ https://t.co/VjEeDrL1iA")
self.assertEquals(post.author, "RobertsSpaceInd")
self.assertEquals(
@ -107,7 +115,11 @@ class TwitterBuilderTestCase(TestCase):
post.publication_date, pytz.utc.localize(datetime(2020, 6, 5, 22, 51, 46))
)
self.assertIn(full_text, post.body)
self.assertInHTML(
"""<a href="https://t.co/VjEeDrL1iA" rel="nofollow">https://t.co/VjEeDrL1iA</a>""",
post.body,
count=1,
)
self.assertInHTML(
"""<div><img alt="1269039233072689152" src="https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg" loading="lazy"></div>""",
post.body,
@ -141,11 +153,24 @@ class TwitterBuilderTestCase(TestCase):
"Small enough to access hard-to-reach ore deposits, but with enough"
" power to get through the tough jobs, Greycat\u2019s ROC perfectly"
" complements any mining operation. \n\nDetails:"
" https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH"
""" <a href="https://t.co/2aH7qdOfSk" rel="nofollow">https://t.co/2aH7qdOfSk</a>"""
""" <a href="https://t.co/mZ8CAuq3SH" rel="nofollow">https://t.co/mZ8CAuq3SH</a>"""
)
self.assertEquals(post.rule, profile)
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
self.assertEquals(
post.title,
truncate_text(
Post,
"title",
fix_text(
"Small enough to access hard-to-reach ore deposits, but with enough"
" power to get through the tough jobs, Greycat\u2019s ROC perfectly"
" complements any mining operation. \n\nDetails:"
" https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH"
),
),
)
self.assertEquals(post.author, "RobertsSpaceInd")
self.assertEquals(
@ -208,7 +233,10 @@ class TwitterBuilderTestCase(TestCase):
count=1,
)
self.assertIn("@Xenosystems https://t.co/wxvioLCJ6h", post.body)
self.assertIn(
"""@Xenosystems <a href="https://t.co/wxvioLCJ6h" rel="nofollow">https://t.co/wxvioLCJ6h</a>""",
post.body,
)
def test_retweet_post(self):
builder = TwitterBuilder
@ -241,7 +269,8 @@ class TwitterBuilderTestCase(TestCase):
"Original tweet: New video! #StarCitizen 3.9 vs. 3.10 comparison!\nSo, the patch"
" 3.10 came out, which brought us quite a lot of changes!\ud83d\ude42\nPlease,"
" share it with your friends!\ud83d\ude4f\n\nEnjoy watching and stay safe!"
" \u2764\ufe0f\u263a\ufe0f\n@RobertsSpaceInd\n\n@CloudImperium\n\nhttps://t.co/j4QahHzbw4"
" \u2764\ufe0f\u263a\ufe0f\n@RobertsSpaceInd\n\n@CloudImperium\n\n"
"""<a href="https://t.co/j4QahHzbw4" rel="nofollow">https://t.co/j4QahHzbw4</a>"""
),
post.body,
)
@ -265,14 +294,17 @@ class TwitterBuilderTestCase(TestCase):
post = posts["1290801039075979264"]
self.assertIn(
fix_text("Bonne nuit \ud83c\udf3a\ud83d\udeeb https://t.co/WyznJwCJLp"),
fix_text(
"Bonne nuit \ud83c\udf3a\ud83d\udeeb"
""" <a href="https://t.co/WyznJwCJLp" rel="nofollow">https://t.co/WyznJwCJLp</a>"""
),
post.body,
)
self.assertIn(
fix_text(
"Quoted tweet: #Starcitizen Le jeu est beau. Bonne nuit"
" @RobertsSpaceInd https://t.co/xCXun68V3r"
""" @RobertsSpaceInd <a href="https://t.co/xCXun68V3r" rel="nofollow">https://t.co/xCXun68V3r</a>"""
),
post.body,
)
@ -306,16 +338,59 @@ class TwitterBuilderTestCase(TestCase):
post = posts["1291528756373286914"]
full_text = (
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
"@ArieNeoSC Here you go, goodnight!\n\n"
"""<a href="https://t.co/trAcIxBMlX" rel="nofollow">https://t.co/trAcIxBMlX</a>"""
" <article></article>"
)
self.assertEquals(post.rule, profile)
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
self.assertEquals(post.body, format_html(full_text))
self.assertEquals(
post.title,
truncate_text(
Post,
"title",
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
" <article></article>",
),
)
self.assertEquals(post.body, mark_safe(full_text))
self.assertInHTML("<script></script>", post.body, count=0)
self.assertInHTML("<article></article>", post.body, count=1)
self.assertInHTML("<script></script>", post.title, count=0)
self.assertInHTML("<article></article>", post.title, count=1)
def test_urlize_on_urls(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
self.assertCountEqual(
("1291528756373286914", "1288550304095416320"), posts.keys()
)
post = posts["1291528756373286914"]
full_text = (
"@ArieNeoSC Here you go, goodnight!\n\n"
"""<a href="https://t.co/trAcIxBMlX" rel="nofollow">https://t.co/trAcIxBMlX</a>"""
)
self.assertEquals(post.rule, profile)
self.assertEquals(
post.title,
truncate_text(
Post,
"title",
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX",
),
)
self.assertEquals(post.body, mark_safe(full_text))

View file

@ -2,7 +2,7 @@ import logging
from datetime import datetime
from django.utils.html import format_html
from django.utils.html import format_html, urlize
import pytz
@ -36,8 +36,10 @@ class TwitterBuilder(PostBuilder):
remote_identifier = post["id_str"]
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
body = post["full_text"]
title = truncate_text(Post, "title", self.sanitize_fragment(body))
body = urlize(post["full_text"], nofollow=True)
title = truncate_text(
Post, "title", self.sanitize_fragment(post["full_text"])
)
publication_date = pytz.utc.localize(
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
@ -52,15 +54,21 @@ class TwitterBuilder(PostBuilder):
if "retweeted_status" in post:
original_post = post["retweeted_status"]
body += format_html(
body += urlize(
format_html(
"Original tweet: {original_post}",
original_post=original_post["full_text"],
original_post=urlize(original_post["full_text"], nofollow=True),
),
nofollow=True,
)
if "quoted_status" in post:
original_post = post["quoted_status"]
body += format_html(
body += urlize(
format_html(
"Quoted tweet: {original_post}",
original_post=original_post["full_text"],
),
nofollow=True,
)
body = self.sanitize_fragment(body)