Urlize tweet bodies
This commit is contained in:
parent
646c629327
commit
5e53faf417
2 changed files with 110 additions and 27 deletions
|
|
@ -2,7 +2,7 @@ from datetime import datetime
|
|||
from unittest.mock import Mock
|
||||
|
||||
from django.test import TestCase
|
||||
from django.utils.html import format_html
|
||||
from django.utils.safestring import mark_safe
|
||||
|
||||
import pytz
|
||||
|
||||
|
|
@ -46,11 +46,21 @@ class TwitterBuilderTestCase(TestCase):
|
|||
|
||||
post = posts["1291528756373286914"]
|
||||
|
||||
full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
|
||||
full_text = (
|
||||
"@ArieNeoSC Here you go, goodnight!\n\n"
|
||||
"""<a href="https://t.co/trAcIxBMlX" rel="nofollow">https://t.co/trAcIxBMlX</a>"""
|
||||
)
|
||||
|
||||
self.assertEquals(post.rule, profile)
|
||||
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
|
||||
self.assertEquals(post.body, format_html(full_text))
|
||||
self.assertEquals(
|
||||
post.title,
|
||||
truncate_text(
|
||||
Post,
|
||||
"title",
|
||||
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX",
|
||||
),
|
||||
)
|
||||
self.assertEquals(post.body, mark_safe(full_text))
|
||||
|
||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||
self.assertEquals(
|
||||
|
|
@ -66,7 +76,7 @@ class TwitterBuilderTestCase(TestCase):
|
|||
|
||||
self.assertEquals(post.rule, profile)
|
||||
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
|
||||
self.assertEquals(post.body, format_html(full_text))
|
||||
self.assertEquals(post.body, mark_safe(full_text))
|
||||
|
||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||
self.assertEquals(
|
||||
|
|
@ -94,10 +104,8 @@ class TwitterBuilderTestCase(TestCase):
|
|||
|
||||
post = posts["1269039237166321664"]
|
||||
|
||||
full_text = "_ https://t.co/VjEeDrL1iA"
|
||||
|
||||
self.assertEquals(post.rule, profile)
|
||||
self.assertEquals(post.title, full_text)
|
||||
self.assertEquals(post.title, "_ https://t.co/VjEeDrL1iA")
|
||||
|
||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||
self.assertEquals(
|
||||
|
|
@ -107,7 +115,11 @@ class TwitterBuilderTestCase(TestCase):
|
|||
post.publication_date, pytz.utc.localize(datetime(2020, 6, 5, 22, 51, 46))
|
||||
)
|
||||
|
||||
self.assertIn(full_text, post.body)
|
||||
self.assertInHTML(
|
||||
"""<a href="https://t.co/VjEeDrL1iA" rel="nofollow">https://t.co/VjEeDrL1iA</a>""",
|
||||
post.body,
|
||||
count=1,
|
||||
)
|
||||
self.assertInHTML(
|
||||
"""<div><img alt="1269039233072689152" src="https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg" loading="lazy"></div>""",
|
||||
post.body,
|
||||
|
|
@ -141,11 +153,24 @@ class TwitterBuilderTestCase(TestCase):
|
|||
"Small enough to access hard-to-reach ore deposits, but with enough"
|
||||
" power to get through the tough jobs, Greycat\u2019s ROC perfectly"
|
||||
" complements any mining operation. \n\nDetails:"
|
||||
" https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH"
|
||||
""" <a href="https://t.co/2aH7qdOfSk" rel="nofollow">https://t.co/2aH7qdOfSk</a>"""
|
||||
""" <a href="https://t.co/mZ8CAuq3SH" rel="nofollow">https://t.co/mZ8CAuq3SH</a>"""
|
||||
)
|
||||
|
||||
self.assertEquals(post.rule, profile)
|
||||
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
|
||||
self.assertEquals(
|
||||
post.title,
|
||||
truncate_text(
|
||||
Post,
|
||||
"title",
|
||||
fix_text(
|
||||
"Small enough to access hard-to-reach ore deposits, but with enough"
|
||||
" power to get through the tough jobs, Greycat\u2019s ROC perfectly"
|
||||
" complements any mining operation. \n\nDetails:"
|
||||
" https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH"
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||
self.assertEquals(
|
||||
|
|
@ -208,7 +233,10 @@ class TwitterBuilderTestCase(TestCase):
|
|||
count=1,
|
||||
)
|
||||
|
||||
self.assertIn("@Xenosystems https://t.co/wxvioLCJ6h", post.body)
|
||||
self.assertIn(
|
||||
"""@Xenosystems <a href="https://t.co/wxvioLCJ6h" rel="nofollow">https://t.co/wxvioLCJ6h</a>""",
|
||||
post.body,
|
||||
)
|
||||
|
||||
def test_retweet_post(self):
|
||||
builder = TwitterBuilder
|
||||
|
|
@ -241,7 +269,8 @@ class TwitterBuilderTestCase(TestCase):
|
|||
"Original tweet: New video! #StarCitizen 3.9 vs. 3.10 comparison!\nSo, the patch"
|
||||
" 3.10 came out, which brought us quite a lot of changes!\ud83d\ude42\nPlease,"
|
||||
" share it with your friends!\ud83d\ude4f\n\nEnjoy watching and stay safe!"
|
||||
" \u2764\ufe0f\u263a\ufe0f\n@RobertsSpaceInd\n\n@CloudImperium\n\nhttps://t.co/j4QahHzbw4"
|
||||
" \u2764\ufe0f\u263a\ufe0f\n@RobertsSpaceInd\n\n@CloudImperium\n\n"
|
||||
"""<a href="https://t.co/j4QahHzbw4" rel="nofollow">https://t.co/j4QahHzbw4</a>"""
|
||||
),
|
||||
post.body,
|
||||
)
|
||||
|
|
@ -265,14 +294,17 @@ class TwitterBuilderTestCase(TestCase):
|
|||
post = posts["1290801039075979264"]
|
||||
|
||||
self.assertIn(
|
||||
fix_text("Bonne nuit \ud83c\udf3a\ud83d\udeeb https://t.co/WyznJwCJLp"),
|
||||
fix_text(
|
||||
"Bonne nuit \ud83c\udf3a\ud83d\udeeb"
|
||||
""" <a href="https://t.co/WyznJwCJLp" rel="nofollow">https://t.co/WyznJwCJLp</a>"""
|
||||
),
|
||||
post.body,
|
||||
)
|
||||
|
||||
self.assertIn(
|
||||
fix_text(
|
||||
"Quoted tweet: #Starcitizen Le jeu est beau. Bonne nuit"
|
||||
" @RobertsSpaceInd https://t.co/xCXun68V3r"
|
||||
""" @RobertsSpaceInd <a href="https://t.co/xCXun68V3r" rel="nofollow">https://t.co/xCXun68V3r</a>"""
|
||||
),
|
||||
post.body,
|
||||
)
|
||||
|
|
@ -306,16 +338,59 @@ class TwitterBuilderTestCase(TestCase):
|
|||
post = posts["1291528756373286914"]
|
||||
|
||||
full_text = (
|
||||
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
|
||||
"@ArieNeoSC Here you go, goodnight!\n\n"
|
||||
"""<a href="https://t.co/trAcIxBMlX" rel="nofollow">https://t.co/trAcIxBMlX</a>"""
|
||||
" <article></article>"
|
||||
)
|
||||
|
||||
self.assertEquals(post.rule, profile)
|
||||
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
|
||||
self.assertEquals(post.body, format_html(full_text))
|
||||
self.assertEquals(
|
||||
post.title,
|
||||
truncate_text(
|
||||
Post,
|
||||
"title",
|
||||
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
|
||||
" <article></article>",
|
||||
),
|
||||
)
|
||||
self.assertEquals(post.body, mark_safe(full_text))
|
||||
|
||||
self.assertInHTML("<script></script>", post.body, count=0)
|
||||
self.assertInHTML("<article></article>", post.body, count=1)
|
||||
|
||||
self.assertInHTML("<script></script>", post.title, count=0)
|
||||
self.assertInHTML("<article></article>", post.title, count=1)
|
||||
|
||||
def test_urlize_on_urls(self):
|
||||
builder = TwitterBuilder
|
||||
|
||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||
mock_stream = Mock(rule=profile)
|
||||
|
||||
with builder(simple_mock, mock_stream) as builder:
|
||||
builder.build()
|
||||
builder.save()
|
||||
|
||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||
|
||||
self.assertCountEqual(
|
||||
("1291528756373286914", "1288550304095416320"), posts.keys()
|
||||
)
|
||||
|
||||
post = posts["1291528756373286914"]
|
||||
|
||||
full_text = (
|
||||
"@ArieNeoSC Here you go, goodnight!\n\n"
|
||||
"""<a href="https://t.co/trAcIxBMlX" rel="nofollow">https://t.co/trAcIxBMlX</a>"""
|
||||
)
|
||||
|
||||
self.assertEquals(post.rule, profile)
|
||||
self.assertEquals(
|
||||
post.title,
|
||||
truncate_text(
|
||||
Post,
|
||||
"title",
|
||||
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX",
|
||||
),
|
||||
)
|
||||
self.assertEquals(post.body, mark_safe(full_text))
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import logging
|
|||
|
||||
from datetime import datetime
|
||||
|
||||
from django.utils.html import format_html
|
||||
from django.utils.html import format_html, urlize
|
||||
|
||||
import pytz
|
||||
|
||||
|
|
@ -36,8 +36,10 @@ class TwitterBuilder(PostBuilder):
|
|||
remote_identifier = post["id_str"]
|
||||
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
|
||||
|
||||
body = post["full_text"]
|
||||
title = truncate_text(Post, "title", self.sanitize_fragment(body))
|
||||
body = urlize(post["full_text"], nofollow=True)
|
||||
title = truncate_text(
|
||||
Post, "title", self.sanitize_fragment(post["full_text"])
|
||||
)
|
||||
|
||||
publication_date = pytz.utc.localize(
|
||||
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
||||
|
|
@ -52,15 +54,21 @@ class TwitterBuilder(PostBuilder):
|
|||
|
||||
if "retweeted_status" in post:
|
||||
original_post = post["retweeted_status"]
|
||||
body += format_html(
|
||||
body += urlize(
|
||||
format_html(
|
||||
"Original tweet: {original_post}",
|
||||
original_post=original_post["full_text"],
|
||||
original_post=urlize(original_post["full_text"], nofollow=True),
|
||||
),
|
||||
nofollow=True,
|
||||
)
|
||||
if "quoted_status" in post:
|
||||
original_post = post["quoted_status"]
|
||||
body += format_html(
|
||||
body += urlize(
|
||||
format_html(
|
||||
"Quoted tweet: {original_post}",
|
||||
original_post=original_post["full_text"],
|
||||
),
|
||||
nofollow=True,
|
||||
)
|
||||
|
||||
body = self.sanitize_fragment(body)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue