Compare commits
9 commits
main
...
reddit-med
| Author | SHA1 | Date | |
|---|---|---|---|
| f75fd89dc9 | |||
| c100ff03cf | |||
| f1ba1f99fe | |||
| c502caa8cf | |||
| a7d169e281 | |||
| b3adab82fa | |||
| 0579d6f61b | |||
| e0744d3a68 | |||
| 6b7ac32dfa |
4 changed files with 2440 additions and 37 deletions
|
|
@ -42,6 +42,12 @@ REDDIT_API_URL = "https://oauth.reddit.com"
|
||||||
RATE_LIMIT = 60
|
RATE_LIMIT = 60
|
||||||
RATE_LIMIT_DURATION = timedelta(seconds=60)
|
RATE_LIMIT_DURATION = timedelta(seconds=60)
|
||||||
|
|
||||||
|
REDDIT_IMAGE_EXTENSIONS = (".jpg", ".png", ".gif")
|
||||||
|
REDDIT_VIDEO_EXTENSIONS = (".mp4", ".gifv", ".webm")
|
||||||
|
|
||||||
|
# see type prefixes on https://www.reddit.com/dev/api/
|
||||||
|
REDDIT_POST = "t3"
|
||||||
|
|
||||||
|
|
||||||
def get_reddit_authorization_url(user):
|
def get_reddit_authorization_url(user):
|
||||||
state = str(uuid4())
|
state = str(uuid4())
|
||||||
|
|
@ -114,18 +120,23 @@ class RedditBuilder(Builder):
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
for post in posts:
|
for post in posts:
|
||||||
if not "data" in post:
|
if not "data" in post or post["kind"] != REDDIT_POST:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
remote_identifier = post["data"]["id"]
|
data = post["data"]
|
||||||
title = truncate_text(Post, "title", post["data"]["title"])
|
|
||||||
author = truncate_text(Post, "author", post["data"]["author"])
|
remote_identifier = data["id"]
|
||||||
url_fragment = f"{post['data']['permalink']}"
|
title = truncate_text(Post, "title", data["title"])
|
||||||
|
author = truncate_text(Post, "author", data["author"])
|
||||||
|
post_url_fragment = data["permalink"]
|
||||||
|
direct_url = data["url"]
|
||||||
|
is_text_post = data["is_self"]
|
||||||
|
|
||||||
if remote_identifier in results:
|
if remote_identifier in results:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
uncleaned_body = post["data"]["selftext_html"]
|
if is_text_post:
|
||||||
|
uncleaned_body = data["selftext_html"]
|
||||||
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
|
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
|
||||||
body = (
|
body = (
|
||||||
bleach.clean(
|
bleach.clean(
|
||||||
|
|
@ -138,6 +149,25 @@ class RedditBuilder(Builder):
|
||||||
if unescaped_body
|
if unescaped_body
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
|
elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
|
||||||
|
body = f'<div><img alt="{title}" src="{direct_url}" loading="lazy" /></div>'
|
||||||
|
elif data["is_video"]:
|
||||||
|
video_info = data["secure_media"]["reddit_video"]
|
||||||
|
|
||||||
|
body = f'<div><video controls muted><source src="{video_info["fallback_url"]}" type="video/mp4" /></video></div>'
|
||||||
|
elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
|
||||||
|
extension = next(
|
||||||
|
extension.replace(".", "")
|
||||||
|
for extension in REDDIT_VIDEO_EXTENSIONS
|
||||||
|
if direct_url.endswith(extension)
|
||||||
|
)
|
||||||
|
|
||||||
|
if extension == "gifv":
|
||||||
|
body = f'<div><video controls muted><source src="{direct_url.replace(extension, "mp4")}" type="video/mp4" /></video></div>'
|
||||||
|
else:
|
||||||
|
body = f'<div><video controls muted><source src="{direct_url}" type="video/{extension}" /></video></div>'
|
||||||
|
else:
|
||||||
|
body = f'<div><a target="_blank" rel="noopener noreferrer" alt="{title}" href="{direct_url}" class="link">Direct url</a></div>'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
|
parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
|
||||||
|
|
@ -146,12 +176,12 @@ class RedditBuilder(Builder):
|
||||||
logging.warning(f"Failed parsing timestamp from {url_fragment}")
|
logging.warning(f"Failed parsing timestamp from {url_fragment}")
|
||||||
created_date = timezone.now()
|
created_date = timezone.now()
|
||||||
|
|
||||||
data = {
|
post_data = {
|
||||||
"remote_identifier": remote_identifier,
|
"remote_identifier": remote_identifier,
|
||||||
"title": title,
|
"title": title,
|
||||||
"body": body,
|
"body": body,
|
||||||
"author": author,
|
"author": author,
|
||||||
"url": f"{REDDIT_URL}{url_fragment}",
|
"url": f"{REDDIT_URL}{post_url_fragment}",
|
||||||
"publication_date": created_date,
|
"publication_date": created_date,
|
||||||
"rule": rule,
|
"rule": rule,
|
||||||
}
|
}
|
||||||
|
|
@ -159,13 +189,13 @@ class RedditBuilder(Builder):
|
||||||
if remote_identifier in self.existing_posts:
|
if remote_identifier in self.existing_posts:
|
||||||
existing_post = self.existing_posts[remote_identifier]
|
existing_post = self.existing_posts[remote_identifier]
|
||||||
|
|
||||||
for key, value in data.items():
|
for key, value in post_data.items():
|
||||||
setattr(existing_post, key, value)
|
setattr(existing_post, key, value)
|
||||||
|
|
||||||
results[existing_post.remote_identifier] = existing_post
|
results[existing_post.remote_identifier] = existing_post
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results[remote_identifier] = Post(**data)
|
results[remote_identifier] = Post(**post_data)
|
||||||
|
|
||||||
return results.values()
|
return results.values()
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -86,7 +86,7 @@ class RedditBuilderTestCase(TestCase):
|
||||||
def test_update_posts(self):
|
def test_update_posts(self):
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
existing_post = RedditPostFactory(
|
existing_post = RedditPostFactory(
|
||||||
remote_identifier="hngsj8",
|
remote_identifier="hm0qct",
|
||||||
author="Old author",
|
author="Old author",
|
||||||
title="Old title",
|
title="Old title",
|
||||||
body="Old body",
|
body="Old body",
|
||||||
|
|
@ -108,17 +108,24 @@ class RedditBuilderTestCase(TestCase):
|
||||||
|
|
||||||
existing_post.refresh_from_db()
|
existing_post.refresh_from_db()
|
||||||
|
|
||||||
self.assertEquals(existing_post.remote_identifier, "hngsj8")
|
self.assertEquals(existing_post.remote_identifier, "hm0qct")
|
||||||
self.assertEquals(existing_post.author, "nixcraft")
|
self.assertEquals(existing_post.author, "AutoModerator")
|
||||||
self.assertEquals(existing_post.title, "KeePassXC 2.6.0 released")
|
self.assertEquals(
|
||||||
self.assertEquals(existing_post.body, "")
|
existing_post.title,
|
||||||
|
"Linux Experiences/Rants or Education/Certifications thread - July 06, 2020",
|
||||||
|
)
|
||||||
|
self.assertIn(
|
||||||
|
"This megathread is also to hear opinions from anyone just starting out "
|
||||||
|
"with Linux or those that have used Linux (GNU or otherwise) for a long time.",
|
||||||
|
existing_post.body,
|
||||||
|
)
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
existing_post.publication_date,
|
existing_post.publication_date,
|
||||||
pytz.utc.localize(datetime(2020, 7, 8, 15, 11, 6)),
|
pytz.utc.localize(datetime(2020, 7, 6, 6, 11, 22)),
|
||||||
)
|
)
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
existing_post.url,
|
existing_post.url,
|
||||||
"https://www.reddit.com/r/linux/comments/hngsj8/" "keepassxc_260_released/",
|
"https://www.reddit.com/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/",
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_html_sanitizing(self):
|
def test_html_sanitizing(self):
|
||||||
|
|
@ -219,3 +226,185 @@ class RedditBuilderTestCase(TestCase):
|
||||||
duplicate_post.title,
|
duplicate_post.title,
|
||||||
"Linux Experiences/Rants or Education/Certifications thread - July 06, 2020",
|
"Linux Experiences/Rants or Education/Certifications thread - July 06, 2020",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_image_post(self):
|
||||||
|
builder = RedditBuilder
|
||||||
|
|
||||||
|
subreddit = SubredditFactory()
|
||||||
|
mock_stream = MagicMock(rule=subreddit)
|
||||||
|
|
||||||
|
with builder((image_mock, mock_stream)) as builder:
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
||||||
|
self.assertCountEqual(("hr64xh", "hr4bxo", "hr14y5", "hr2fv0"), posts.keys())
|
||||||
|
|
||||||
|
post = posts["hr64xh"]
|
||||||
|
|
||||||
|
title = (
|
||||||
|
"Ya’ll, I just can’t... this is my "
|
||||||
|
"son, Judah. My wife and I have no "
|
||||||
|
"idea how we created such a "
|
||||||
|
"beautiful child."
|
||||||
|
)
|
||||||
|
url = "https://i.redd.it/cm2qybia1va51.jpg"
|
||||||
|
|
||||||
|
self.assertEquals(
|
||||||
|
"https://www.reddit.com/r/aww/comments/hr64xh/yall_i_just_cant_this_is_my_son_judah_my_wife_and/",
|
||||||
|
post.url,
|
||||||
|
)
|
||||||
|
self.assertEquals(
|
||||||
|
f'<div><img alt="{title}" src="{url}" loading="lazy" /></div>', post.body
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_external_image_post(self):
|
||||||
|
builder = RedditBuilder
|
||||||
|
|
||||||
|
subreddit = SubredditFactory()
|
||||||
|
mock_stream = MagicMock(rule=subreddit)
|
||||||
|
|
||||||
|
with builder((external_image_mock, mock_stream)) as builder:
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
||||||
|
self.assertCountEqual(("hr41am", "huoldn"), posts.keys())
|
||||||
|
|
||||||
|
post = posts["hr41am"]
|
||||||
|
|
||||||
|
url = "http://gfycat.com/thatalivedogwoodclubgall"
|
||||||
|
title = "Excited cows have a new brush!"
|
||||||
|
|
||||||
|
self.assertEquals(
|
||||||
|
f'<div><a target="_blank" rel="noopener noreferrer" alt="{title}" href="{url}" class="link">Direct url</a></div>',
|
||||||
|
post.body,
|
||||||
|
)
|
||||||
|
self.assertEquals(
|
||||||
|
"https://www.reddit.com/r/aww/comments/hr41am/excited_cows_have_a_new_brush/",
|
||||||
|
post.url,
|
||||||
|
)
|
||||||
|
|
||||||
|
post = posts["huoldn"]
|
||||||
|
|
||||||
|
url = "https://i.imgur.com/usfMVUJ.jpg"
|
||||||
|
title = "Novosibirsk Zoo welcomes 16 cobalt-eyed Pallas’s cat kittens"
|
||||||
|
|
||||||
|
self.assertEquals(
|
||||||
|
f'<div><img alt="{title}" src="{url}" loading="lazy" /></div>', post.body
|
||||||
|
)
|
||||||
|
self.assertEquals(
|
||||||
|
"https://www.reddit.com/r/aww/comments/huoldn/novosibirsk_zoo_welcomes_16_cobalteyed_pallass/",
|
||||||
|
post.url,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_video_post(self):
|
||||||
|
builder = RedditBuilder
|
||||||
|
|
||||||
|
subreddit = SubredditFactory()
|
||||||
|
mock_stream = MagicMock(rule=subreddit)
|
||||||
|
|
||||||
|
with builder((video_mock, mock_stream)) as builder:
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
||||||
|
self.assertCountEqual(("hr32jf", "hr1r00", "hqy0ny", "hr0uzh"), posts.keys())
|
||||||
|
|
||||||
|
post = posts["hr1r00"]
|
||||||
|
|
||||||
|
url = "https://v.redd.it/eyvbxaeqtta51/DASH_480.mp4?source=fallback"
|
||||||
|
|
||||||
|
self.assertEquals(
|
||||||
|
post.url,
|
||||||
|
"https://www.reddit.com/r/aww/comments/hr1r00/cool_catt_and_his_clingy_girlfriend/",
|
||||||
|
)
|
||||||
|
self.assertEquals(
|
||||||
|
f'<div><video controls muted><source src="{url}" type="video/mp4" /></video></div>',
|
||||||
|
post.body,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_external_video_post(self):
|
||||||
|
builder = RedditBuilder
|
||||||
|
|
||||||
|
subreddit = SubredditFactory()
|
||||||
|
mock_stream = MagicMock(rule=subreddit)
|
||||||
|
|
||||||
|
with builder((external_video_mock, mock_stream)) as builder:
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
post = Post.objects.get()
|
||||||
|
|
||||||
|
self.assertEquals(post.remote_identifier, "hulh8k")
|
||||||
|
|
||||||
|
self.assertEquals(
|
||||||
|
post.url,
|
||||||
|
"https://www.reddit.com/r/aww/comments/hulh8k/dog_splashing_in_water/",
|
||||||
|
)
|
||||||
|
|
||||||
|
title = "Dog splashing in water"
|
||||||
|
url = "https://gfycat.com/excellentinfantileamericanwigeon"
|
||||||
|
|
||||||
|
self.assertEquals(
|
||||||
|
f'<div><a target="_blank" rel="noopener noreferrer" alt="{title}" href="{url}" class="link">Direct url</a></div>',
|
||||||
|
post.body,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_external_gifv_video_post(self):
|
||||||
|
builder = RedditBuilder
|
||||||
|
|
||||||
|
subreddit = SubredditFactory()
|
||||||
|
mock_stream = MagicMock(rule=subreddit)
|
||||||
|
|
||||||
|
with builder((external_gifv_mock, mock_stream)) as builder:
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
post = Post.objects.get()
|
||||||
|
|
||||||
|
self.assertEquals(post.remote_identifier, "humdlf")
|
||||||
|
|
||||||
|
self.assertEquals(
|
||||||
|
post.url, "https://www.reddit.com/r/aww/comments/humdlf/if_i_fits_i_sits/"
|
||||||
|
)
|
||||||
|
|
||||||
|
url = "https://i.imgur.com/grVh2AG.mp4"
|
||||||
|
|
||||||
|
self.assertEquals(
|
||||||
|
f'<div><video controls muted><source src="{url}" type="video/mp4" /></video></div>',
|
||||||
|
post.body,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_link_only_post(self):
|
||||||
|
builder = RedditBuilder
|
||||||
|
|
||||||
|
subreddit = SubredditFactory()
|
||||||
|
mock_stream = MagicMock(rule=subreddit)
|
||||||
|
|
||||||
|
with builder((simple_mock, mock_stream)) as builder:
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
post = Post.objects.get(remote_identifier="hngsj8")
|
||||||
|
|
||||||
|
title = "KeePassXC 2.6.0 released"
|
||||||
|
url = "https://keepassxc.org/blog/2020-07-07-2.6.0-released/"
|
||||||
|
|
||||||
|
self.assertIn(
|
||||||
|
f'<div><a target="_blank" rel="noopener noreferrer" alt="{title}" href="{url}" class="link">Direct url</a></div>',
|
||||||
|
post.body,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEquals(
|
||||||
|
post.url,
|
||||||
|
"https://www.reddit.com/r/linux/comments/hngsj8/keepassxc_260_released/",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_skip_not_known_post_type(self):
|
||||||
|
builder = RedditBuilder
|
||||||
|
|
||||||
|
subreddit = SubredditFactory()
|
||||||
|
mock_stream = MagicMock(rule=subreddit)
|
||||||
|
|
||||||
|
with builder((unknown_mock, mock_stream)) as builder:
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
self.assertEquals(Post.objects.count(), 0)
|
||||||
|
|
|
||||||
|
|
@ -68,14 +68,9 @@
|
||||||
margin: 20px 0 5px 0;
|
margin: 20px 0 5px 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
& img {
|
& img, video {
|
||||||
padding: 10px 10px 30px 10px;
|
padding: 10px 0;
|
||||||
|
max-width: 100%;
|
||||||
max-width: 70%;
|
|
||||||
width: inherit;
|
|
||||||
height: 100%;
|
|
||||||
|
|
||||||
align-self: center;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue