Urlize tweet bodies
This commit is contained in:
parent
646c629327
commit
5e53faf417
2 changed files with 110 additions and 27 deletions
|
|
@ -2,7 +2,7 @@ from datetime import datetime
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.utils.html import format_html
|
from django.utils.safestring import mark_safe
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
|
|
@ -46,11 +46,21 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
|
|
||||||
post = posts["1291528756373286914"]
|
post = posts["1291528756373286914"]
|
||||||
|
|
||||||
full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
|
full_text = (
|
||||||
|
"@ArieNeoSC Here you go, goodnight!\n\n"
|
||||||
|
"""<a href="https://t.co/trAcIxBMlX" rel="nofollow">https://t.co/trAcIxBMlX</a>"""
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEquals(post.rule, profile)
|
self.assertEquals(post.rule, profile)
|
||||||
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
|
self.assertEquals(
|
||||||
self.assertEquals(post.body, format_html(full_text))
|
post.title,
|
||||||
|
truncate_text(
|
||||||
|
Post,
|
||||||
|
"title",
|
||||||
|
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.assertEquals(post.body, mark_safe(full_text))
|
||||||
|
|
||||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
|
|
@ -66,7 +76,7 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
|
|
||||||
self.assertEquals(post.rule, profile)
|
self.assertEquals(post.rule, profile)
|
||||||
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
|
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
|
||||||
self.assertEquals(post.body, format_html(full_text))
|
self.assertEquals(post.body, mark_safe(full_text))
|
||||||
|
|
||||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
|
|
@ -94,10 +104,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
|
|
||||||
post = posts["1269039237166321664"]
|
post = posts["1269039237166321664"]
|
||||||
|
|
||||||
full_text = "_ https://t.co/VjEeDrL1iA"
|
|
||||||
|
|
||||||
self.assertEquals(post.rule, profile)
|
self.assertEquals(post.rule, profile)
|
||||||
self.assertEquals(post.title, full_text)
|
self.assertEquals(post.title, "_ https://t.co/VjEeDrL1iA")
|
||||||
|
|
||||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
|
|
@ -107,7 +115,11 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
post.publication_date, pytz.utc.localize(datetime(2020, 6, 5, 22, 51, 46))
|
post.publication_date, pytz.utc.localize(datetime(2020, 6, 5, 22, 51, 46))
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn(full_text, post.body)
|
self.assertInHTML(
|
||||||
|
"""<a href="https://t.co/VjEeDrL1iA" rel="nofollow">https://t.co/VjEeDrL1iA</a>""",
|
||||||
|
post.body,
|
||||||
|
count=1,
|
||||||
|
)
|
||||||
self.assertInHTML(
|
self.assertInHTML(
|
||||||
"""<div><img alt="1269039233072689152" src="https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg" loading="lazy"></div>""",
|
"""<div><img alt="1269039233072689152" src="https://pbs.twimg.com/media/EZyIdXVU8AACPCz.jpg" loading="lazy"></div>""",
|
||||||
post.body,
|
post.body,
|
||||||
|
|
@ -141,11 +153,24 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
"Small enough to access hard-to-reach ore deposits, but with enough"
|
"Small enough to access hard-to-reach ore deposits, but with enough"
|
||||||
" power to get through the tough jobs, Greycat\u2019s ROC perfectly"
|
" power to get through the tough jobs, Greycat\u2019s ROC perfectly"
|
||||||
" complements any mining operation. \n\nDetails:"
|
" complements any mining operation. \n\nDetails:"
|
||||||
" https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH"
|
""" <a href="https://t.co/2aH7qdOfSk" rel="nofollow">https://t.co/2aH7qdOfSk</a>"""
|
||||||
|
""" <a href="https://t.co/mZ8CAuq3SH" rel="nofollow">https://t.co/mZ8CAuq3SH</a>"""
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEquals(post.rule, profile)
|
self.assertEquals(post.rule, profile)
|
||||||
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
|
self.assertEquals(
|
||||||
|
post.title,
|
||||||
|
truncate_text(
|
||||||
|
Post,
|
||||||
|
"title",
|
||||||
|
fix_text(
|
||||||
|
"Small enough to access hard-to-reach ore deposits, but with enough"
|
||||||
|
" power to get through the tough jobs, Greycat\u2019s ROC perfectly"
|
||||||
|
" complements any mining operation. \n\nDetails:"
|
||||||
|
" https://t.co/2aH7qdOfSk https://t.co/mZ8CAuq3SH"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
|
|
@ -208,7 +233,10 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
count=1,
|
count=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn("@Xenosystems https://t.co/wxvioLCJ6h", post.body)
|
self.assertIn(
|
||||||
|
"""@Xenosystems <a href="https://t.co/wxvioLCJ6h" rel="nofollow">https://t.co/wxvioLCJ6h</a>""",
|
||||||
|
post.body,
|
||||||
|
)
|
||||||
|
|
||||||
def test_retweet_post(self):
|
def test_retweet_post(self):
|
||||||
builder = TwitterBuilder
|
builder = TwitterBuilder
|
||||||
|
|
@ -241,7 +269,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
"Original tweet: New video! #StarCitizen 3.9 vs. 3.10 comparison!\nSo, the patch"
|
"Original tweet: New video! #StarCitizen 3.9 vs. 3.10 comparison!\nSo, the patch"
|
||||||
" 3.10 came out, which brought us quite a lot of changes!\ud83d\ude42\nPlease,"
|
" 3.10 came out, which brought us quite a lot of changes!\ud83d\ude42\nPlease,"
|
||||||
" share it with your friends!\ud83d\ude4f\n\nEnjoy watching and stay safe!"
|
" share it with your friends!\ud83d\ude4f\n\nEnjoy watching and stay safe!"
|
||||||
" \u2764\ufe0f\u263a\ufe0f\n@RobertsSpaceInd\n\n@CloudImperium\n\nhttps://t.co/j4QahHzbw4"
|
" \u2764\ufe0f\u263a\ufe0f\n@RobertsSpaceInd\n\n@CloudImperium\n\n"
|
||||||
|
"""<a href="https://t.co/j4QahHzbw4" rel="nofollow">https://t.co/j4QahHzbw4</a>"""
|
||||||
),
|
),
|
||||||
post.body,
|
post.body,
|
||||||
)
|
)
|
||||||
|
|
@ -265,14 +294,17 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
post = posts["1290801039075979264"]
|
post = posts["1290801039075979264"]
|
||||||
|
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
fix_text("Bonne nuit \ud83c\udf3a\ud83d\udeeb https://t.co/WyznJwCJLp"),
|
fix_text(
|
||||||
|
"Bonne nuit \ud83c\udf3a\ud83d\udeeb"
|
||||||
|
""" <a href="https://t.co/WyznJwCJLp" rel="nofollow">https://t.co/WyznJwCJLp</a>"""
|
||||||
|
),
|
||||||
post.body,
|
post.body,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
fix_text(
|
fix_text(
|
||||||
"Quoted tweet: #Starcitizen Le jeu est beau. Bonne nuit"
|
"Quoted tweet: #Starcitizen Le jeu est beau. Bonne nuit"
|
||||||
" @RobertsSpaceInd https://t.co/xCXun68V3r"
|
""" @RobertsSpaceInd <a href="https://t.co/xCXun68V3r" rel="nofollow">https://t.co/xCXun68V3r</a>"""
|
||||||
),
|
),
|
||||||
post.body,
|
post.body,
|
||||||
)
|
)
|
||||||
|
|
@ -306,16 +338,59 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
post = posts["1291528756373286914"]
|
post = posts["1291528756373286914"]
|
||||||
|
|
||||||
full_text = (
|
full_text = (
|
||||||
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
|
"@ArieNeoSC Here you go, goodnight!\n\n"
|
||||||
|
"""<a href="https://t.co/trAcIxBMlX" rel="nofollow">https://t.co/trAcIxBMlX</a>"""
|
||||||
" <article></article>"
|
" <article></article>"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEquals(post.rule, profile)
|
self.assertEquals(post.rule, profile)
|
||||||
self.assertEquals(post.title, truncate_text(Post, "title", full_text))
|
self.assertEquals(
|
||||||
self.assertEquals(post.body, format_html(full_text))
|
post.title,
|
||||||
|
truncate_text(
|
||||||
|
Post,
|
||||||
|
"title",
|
||||||
|
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
|
||||||
|
" <article></article>",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.assertEquals(post.body, mark_safe(full_text))
|
||||||
|
|
||||||
self.assertInHTML("<script></script>", post.body, count=0)
|
self.assertInHTML("<script></script>", post.body, count=0)
|
||||||
self.assertInHTML("<article></article>", post.body, count=1)
|
self.assertInHTML("<article></article>", post.body, count=1)
|
||||||
|
|
||||||
self.assertInHTML("<script></script>", post.title, count=0)
|
self.assertInHTML("<script></script>", post.title, count=0)
|
||||||
self.assertInHTML("<article></article>", post.title, count=1)
|
self.assertInHTML("<article></article>", post.title, count=1)
|
||||||
|
|
||||||
|
def test_urlize_on_urls(self):
|
||||||
|
builder = TwitterBuilder
|
||||||
|
|
||||||
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
|
mock_stream = Mock(rule=profile)
|
||||||
|
|
||||||
|
with builder(simple_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
||||||
|
self.assertCountEqual(
|
||||||
|
("1291528756373286914", "1288550304095416320"), posts.keys()
|
||||||
|
)
|
||||||
|
|
||||||
|
post = posts["1291528756373286914"]
|
||||||
|
|
||||||
|
full_text = (
|
||||||
|
"@ArieNeoSC Here you go, goodnight!\n\n"
|
||||||
|
"""<a href="https://t.co/trAcIxBMlX" rel="nofollow">https://t.co/trAcIxBMlX</a>"""
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEquals(post.rule, profile)
|
||||||
|
self.assertEquals(
|
||||||
|
post.title,
|
||||||
|
truncate_text(
|
||||||
|
Post,
|
||||||
|
"title",
|
||||||
|
"@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.assertEquals(post.body, mark_safe(full_text))
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ import logging
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from django.utils.html import format_html
|
from django.utils.html import format_html, urlize
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
|
|
@ -36,8 +36,10 @@ class TwitterBuilder(PostBuilder):
|
||||||
remote_identifier = post["id_str"]
|
remote_identifier = post["id_str"]
|
||||||
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
|
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
|
||||||
|
|
||||||
body = post["full_text"]
|
body = urlize(post["full_text"], nofollow=True)
|
||||||
title = truncate_text(Post, "title", self.sanitize_fragment(body))
|
title = truncate_text(
|
||||||
|
Post, "title", self.sanitize_fragment(post["full_text"])
|
||||||
|
)
|
||||||
|
|
||||||
publication_date = pytz.utc.localize(
|
publication_date = pytz.utc.localize(
|
||||||
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
||||||
|
|
@ -52,15 +54,21 @@ class TwitterBuilder(PostBuilder):
|
||||||
|
|
||||||
if "retweeted_status" in post:
|
if "retweeted_status" in post:
|
||||||
original_post = post["retweeted_status"]
|
original_post = post["retweeted_status"]
|
||||||
body += format_html(
|
body += urlize(
|
||||||
"Original tweet: {original_post}",
|
format_html(
|
||||||
original_post=original_post["full_text"],
|
"Original tweet: {original_post}",
|
||||||
|
original_post=urlize(original_post["full_text"], nofollow=True),
|
||||||
|
),
|
||||||
|
nofollow=True,
|
||||||
)
|
)
|
||||||
if "quoted_status" in post:
|
if "quoted_status" in post:
|
||||||
original_post = post["quoted_status"]
|
original_post = post["quoted_status"]
|
||||||
body += format_html(
|
body += urlize(
|
||||||
"Quoted tweet: {original_post}",
|
format_html(
|
||||||
original_post=original_post["full_text"],
|
"Quoted tweet: {original_post}",
|
||||||
|
original_post=original_post["full_text"],
|
||||||
|
),
|
||||||
|
nofollow=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
body = self.sanitize_fragment(body)
|
body = self.sanitize_fragment(body)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue