From e8947d1182df83c68c3afa15da60ff72d66ec2d5 Mon Sep 17 00:00:00 2001 From: sonny Date: Mon, 13 Jul 2020 23:18:52 +0200 Subject: [PATCH] Squashed commit of the following: commit 99fd94580f95dcbfb77b73e2de846f76a5709ef9 Author: Sonny Date: Sat Feb 15 21:45:16 2020 +0100 Use postgres password As of https://gitlab.com/gitlab-com/support-forum/issues/5199 --- src/newsreader/conf/production.py | 2 +- src/newsreader/news/collection/forms.py | 3 +- src/newsreader/news/collection/reddit.py | 18 +- .../collection/tests/reddit/builder/mocks.py | 333 ++++++++++++++++++ .../collection/tests/reddit/builder/tests.py | 60 +++- src/newsreader/news/core/tests/factories.py | 12 +- 6 files changed, 407 insertions(+), 21 deletions(-) diff --git a/src/newsreader/conf/production.py b/src/newsreader/conf/production.py index 852498e..5bc11a9 100644 --- a/src/newsreader/conf/production.py +++ b/src/newsreader/conf/production.py @@ -48,7 +48,7 @@ TEMPLATES = [ # Reddit integration REDDIT_CLIENT_ID = os.environ["REDDIT_CLIENT_ID"] REDDIT_CLIENT_SECRET = os.environ["REDDIT_CLIENT_SECRET"] -REDDIT_REDIRECT_URL = "https://rss.fudiggity.nl/accounts/settings/reddit/callback/" +REDDIT_REDIRECT_URL = os.environ["REDDIT_CALLBACK_URL"] # Third party settings AXES_HANDLER = "axes.handlers.database.AxesDatabaseHandler" diff --git a/src/newsreader/news/collection/forms.py b/src/newsreader/news/collection/forms.py index 1d9b996..a8aac52 100644 --- a/src/newsreader/news/collection/forms.py +++ b/src/newsreader/news/collection/forms.py @@ -13,7 +13,8 @@ def get_reddit_help_text(): return mark_safe( "Only subreddits are supported. For example: " "https://www.reddit.com/r/aww" + " href='https://reddit.com/r/aww'>https://www.reddit.com/r/aww." + " Note that subreddit urls should NOT include 'www' because Reddit is picky." ) diff --git a/src/newsreader/news/collection/reddit.py b/src/newsreader/news/collection/reddit.py index 2bb7bd9..1e2837b 100644 --- a/src/newsreader/news/collection/reddit.py +++ b/src/newsreader/news/collection/reddit.py @@ -111,6 +111,8 @@ class RedditBuilder(Builder): self.instances = self.build(posts, stream.rule) def build(self, posts, rule): + results = {} + for post in posts: if not "data" in post: continue @@ -120,6 +122,9 @@ class RedditBuilder(Builder): author = truncate_text(Post, "author", post["data"]["author"]) url_fragment = f"{post['data']['permalink']}" + if remote_identifier in results: + continue + uncleaned_body = post["data"]["selftext_html"] unescaped_body = unescape(uncleaned_body) if uncleaned_body else "" body = ( @@ -154,14 +159,15 @@ class RedditBuilder(Builder): if remote_identifier in self.existing_posts: existing_post = self.existing_posts[remote_identifier] - if created_date > existing_post.publication_date: - for key, value in data.items(): - setattr(existing_post, key, value) + for key, value in data.items(): + setattr(existing_post, key, value) - yield existing_post - continue + results[existing_post.remote_identifier] = existing_post + continue - yield Post(**data) + results[remote_identifier] = Post(**data) + + return results.values() def save(self): for post in self.instances: diff --git a/src/newsreader/news/collection/tests/reddit/builder/mocks.py b/src/newsreader/news/collection/tests/reddit/builder/mocks.py index 53ce372..fabc802 100644 --- a/src/newsreader/news/collection/tests/reddit/builder/mocks.py +++ b/src/newsreader/news/collection/tests/reddit/builder/mocks.py @@ -1376,3 +1376,336 @@ title_mock = { "before": None, }, } + +duplicate_mock = { + "kind": "Listing", + "data": { + "modhash": "rjewztai5w0ab64547311ae1fb1f9cf81cd18949bfb629cb7f", + "dist": 27, + "children": [ + { + "kind": "t3", + "data": { + "approved_at_utc": None, + "subreddit": "linux", + "selftext": "Welcome to r/linux rants and experiences! This megathread is also to hear opinions from anyone just starting out with Linux or those that have used Linux (GNU or otherwise) for a long time.\n\nLet us know what's annoying you, whats making you happy, or something that you want to get out to r/linux but didn't make the cut into a full post of it's own.\n\nFor those looking for certifications please use this megathread to ask about how to get certified whether it's for the business world or for your own satisfaction. Be sure to check out r/linuxadmin for more discussion in the SysAdmin world!\n\n_Please keep questions in r/linuxquestions, r/linux4noobs, or the Wednesday automod thread._", + "author_fullname": "t2_6l4z3", + "saved": False, + "mod_reason_title": None, + "gilded": 0, + "clicked": False, + "title": "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020", + "link_flair_richtext": [], + "subreddit_name_prefixed": "r/linux", + "hidden": False, + "pwls": 6, + "link_flair_css_class": None, + "downs": 0, + "top_awarded_type": None, + "hide_score": False, + "name": "t3_hm0qct", + "quarantine": False, + "link_flair_text_color": "dark", + "upvote_ratio": 0.7, + "author_flair_background_color": None, + "subreddit_type": "public", + "ups": 8, + "total_awards_received": 0, + "media_embed": {}, + "author_flair_template_id": None, + "is_original_content": False, + "user_reports": [], + "secure_media": None, + "is_reddit_media_domain": False, + "is_meta": False, + "category": None, + "secure_media_embed": {}, + "link_flair_text": None, + "can_mod_post": False, + "score": 8, + "approved_by": None, + "author_premium": True, + "thumbnail": "", + "edited": False, + "author_flair_css_class": None, + "author_flair_richtext": [], + "gildings": {}, + "content_categories": None, + "is_self": True, + "mod_note": None, + "created": 1594037482.0, + "link_flair_type": "text", + "wls": 6, + "removed_by_category": None, + "banned_by": None, + "author_flair_type": "text", + "domain": "self.linux", + "allow_live_comments": False, + "selftext_html": "<!-- SC_OFF --><div class='md'><p>Welcome to <a href='/r/linux'>r/linux</a> rants and experiences! This megathread is also to hear opinions from anyone just starting out with Linux or those that have used Linux (GNU or otherwise) for a long time.</p>\n\n<p>Let us know what&#39;s annoying you, whats making you happy, or something that you want to get out to <a href='/r/linux'>r/linux</a> but didn&#39;t make the cut into a full post of it&#39;s own.</p>\n\n<p>For those looking for certifications please use this megathread to ask about how to get certified whether it&#39;s for the business world or for your own satisfaction. Be sure to check out <a href='/r/linuxadmin'>r/linuxadmin</a> for more discussion in the SysAdmin world!</p>\n\n<p><em>Please keep questions in <a href='/r/linuxquestions'>r/linuxquestions</a>, <a href='/r/linux4noobs'>r/linux4noobs</a>, or the Wednesday automod thread.</em></p>\n</div><!-- SC_ON -->", + "likes": None, + "suggested_sort": None, + "banned_at_utc": None, + "view_count": None, + "archived": False, + "no_follow": True, + "is_crosspostable": True, + "pinned": False, + "over_18": False, + "all_awardings": [], + "awarders": [], + "media_only": False, + "can_gild": True, + "spoiler": False, + "locked": False, + "author_flair_text": None, + "treatment_tags": [], + "visited": False, + "removed_by": None, + "num_reports": None, + "distinguished": "moderator", + "subreddit_id": "t5_2qh1a", + "mod_reason_by": None, + "removal_reason": None, + "link_flair_background_color": "", + "id": "hm0qct", + "is_robot_indexable": True, + "report_reasons": None, + "author": "AutoModerator", + "discussion_type": None, + "num_comments": 9, + "send_replies": False, + "whitelist_status": "all_ads", + "contest_mode": False, + "mod_reports": [], + "author_patreon_flair": False, + "author_flair_text_color": None, + "permalink": "/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/", + "parent_whitelist_status": "all_ads", + "stickied": True, + "url": "https://www.reddit.com/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/", + "subreddit_subscribers": 544037, + "created_utc": 1594008682.0, + "num_crossposts": 0, + "media": None, + "is_video": False, + }, + }, + { + "kind": "t3", + "data": { + "approved_at_utc": None, + "subreddit": "linux", + "selftext": "Welcome to r/linux! If you're new to Linux or trying to get started this thread is for you. Get help here or as always, check out r/linuxquestions or r/linux4noobs\n\nThis megathread is for all your question needs. As we don't allow questions on r/linux outside of this megathread, please consider using r/linuxquestions or r/linux4noobs for the best solution to your problem.\n\nAsk your hardware requests here too or try r/linuxhardware!", + "author_fullname": "t2_6l4z3", + "saved": False, + "mod_reason_title": None, + "gilded": 0, + "clicked": False, + "title": "Weekly Questions and Hardware Thread - July 08, 2020", + "link_flair_richtext": [], + "subreddit_name_prefixed": "r/linux", + "hidden": False, + "pwls": 6, + "link_flair_css_class": None, + "downs": 0, + "top_awarded_type": None, + "hide_score": False, + "name": "t3_hna75r", + "quarantine": False, + "link_flair_text_color": "dark", + "upvote_ratio": 0.6, + "author_flair_background_color": None, + "subreddit_type": "public", + "ups": 2, + "total_awards_received": 0, + "media_embed": {}, + "author_flair_template_id": None, + "is_original_content": False, + "user_reports": [], + "secure_media": None, + "is_reddit_media_domain": False, + "is_meta": False, + "category": None, + "secure_media_embed": {}, + "link_flair_text": None, + "can_mod_post": False, + "score": 2, + "approved_by": None, + "author_premium": True, + "thumbnail": "", + "edited": False, + "author_flair_css_class": None, + "author_flair_richtext": [], + "gildings": {}, + "content_categories": None, + "is_self": True, + "mod_note": None, + "created": 1594210138.0, + "link_flair_type": "text", + "wls": 6, + "removed_by_category": None, + "banned_by": None, + "author_flair_type": "text", + "domain": "self.linux", + "allow_live_comments": False, + "selftext_html": '<!-- SC_OFF --><div class="md"><p>Welcome to <a href="/r/linux">r/linux</a>! If you&#39;re new to Linux or trying to get started this thread is for you. Get help here or as always, check out <a href="/r/linuxquestions">r/linuxquestions</a> or <a href="/r/linux4noobs">r/linux4noobs</a></p>\n\n<p>This megathread is for all your question needs. As we don&#39;t allow questions on <a href="/r/linux">r/linux</a> outside of this megathread, please consider using <a href="/r/linuxquestions">r/linuxquestions</a> or <a href="/r/linux4noobs">r/linux4noobs</a> for the best solution to your problem.</p>\n\n<p>Ask your hardware requests here too or try <a href="/r/linuxhardware">r/linuxhardware</a>!</p>\n</div><!-- SC_ON -->', + "likes": None, + "suggested_sort": "new", + "banned_at_utc": None, + "view_count": None, + "archived": False, + "no_follow": True, + "is_crosspostable": True, + "pinned": False, + "over_18": False, + "all_awardings": [], + "awarders": [], + "media_only": False, + "can_gild": True, + "spoiler": False, + "locked": False, + "author_flair_text": None, + "treatment_tags": [], + "visited": False, + "removed_by": None, + "num_reports": None, + "distinguished": "moderator", + "subreddit_id": "t5_2qh1a", + "mod_reason_by": None, + "removal_reason": None, + "link_flair_background_color": "", + "id": "hna75r", + "is_robot_indexable": True, + "report_reasons": None, + "author": "AutoModerator", + "discussion_type": None, + "num_comments": 2, + "send_replies": False, + "whitelist_status": "all_ads", + "contest_mode": False, + "mod_reports": [], + "author_patreon_flair": False, + "author_flair_text_color": None, + "permalink": "/r/linux/comments/hna75r/weekly_questions_and_hardware_thread_july_08_2020/", + "parent_whitelist_status": "all_ads", + "stickied": True, + "url": "https://www.reddit.com/r/linux/comments/hna75r/weekly_questions_and_hardware_thread_july_08_2020/", + "subreddit_subscribers": 544037, + "created_utc": 1594181338.0, + "num_crossposts": 0, + "media": None, + "is_video": False, + }, + }, + { + "kind": "t3", + "data": { + "approved_at_utc": None, + "subreddit": "linux", + "selftext": "Welcome to r/linux rants and experiences! This megathread is also to hear opinions from anyone just starting out with Linux or those that have used Linux (GNU or otherwise) for a long time.\n\nLet us know what's annoying you, whats making you happy, or something that you want to get out to r/linux but didn't make the cut into a full post of it's own.\n\nFor those looking for certifications please use this megathread to ask about how to get certified whether it's for the business world or for your own satisfaction. Be sure to check out r/linuxadmin for more discussion in the SysAdmin world!\n\n_Please keep questions in r/linuxquestions, r/linux4noobs, or the Wednesday automod thread._", + "author_fullname": "t2_6l4z3", + "saved": False, + "mod_reason_title": None, + "gilded": 0, + "clicked": False, + "title": "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020", + "link_flair_richtext": [], + "subreddit_name_prefixed": "r/linux", + "hidden": False, + "pwls": 6, + "link_flair_css_class": None, + "downs": 0, + "top_awarded_type": None, + "hide_score": False, + "name": "t3_hm0qct", + "quarantine": False, + "link_flair_text_color": "dark", + "upvote_ratio": 0.7, + "author_flair_background_color": None, + "subreddit_type": "public", + "ups": 8, + "total_awards_received": 0, + "media_embed": {}, + "author_flair_template_id": None, + "is_original_content": False, + "user_reports": [], + "secure_media": None, + "is_reddit_media_domain": False, + "is_meta": False, + "category": None, + "secure_media_embed": {}, + "link_flair_text": None, + "can_mod_post": False, + "score": 8, + "approved_by": None, + "author_premium": True, + "thumbnail": "", + "edited": False, + "author_flair_css_class": None, + "author_flair_richtext": [], + "gildings": {}, + "content_categories": None, + "is_self": True, + "mod_note": None, + "created": 1594037482.0, + "link_flair_type": "text", + "wls": 6, + "removed_by_category": None, + "banned_by": None, + "author_flair_type": "text", + "domain": "self.linux", + "allow_live_comments": False, + "selftext_html": "<!-- SC_OFF --><div class='md'><p>Welcome to <a href='/r/linux'>r/linux</a> rants and experiences! This megathread is also to hear opinions from anyone just starting out with Linux or those that have used Linux (GNU or otherwise) for a long time.</p>\n\n<p>Let us know what&#39;s annoying you, whats making you happy, or something that you want to get out to <a href='/r/linux'>r/linux</a> but didn&#39;t make the cut into a full post of it&#39;s own.</p>\n\n<p>For those looking for certifications please use this megathread to ask about how to get certified whether it&#39;s for the business world or for your own satisfaction. Be sure to check out <a href='/r/linuxadmin'>r/linuxadmin</a> for more discussion in the SysAdmin world!</p>\n\n<p><em>Please keep questions in <a href='/r/linuxquestions'>r/linuxquestions</a>, <a href='/r/linux4noobs'>r/linux4noobs</a>, or the Wednesday automod thread.</em></p>\n</div><!-- SC_ON -->", + "likes": None, + "suggested_sort": None, + "banned_at_utc": None, + "view_count": None, + "archived": False, + "no_follow": True, + "is_crosspostable": True, + "pinned": False, + "over_18": False, + "all_awardings": [], + "awarders": [], + "media_only": False, + "can_gild": True, + "spoiler": False, + "locked": False, + "author_flair_text": None, + "treatment_tags": [], + "visited": False, + "removed_by": None, + "num_reports": None, + "distinguished": "moderator", + "subreddit_id": "t5_2qh1a", + "mod_reason_by": None, + "removal_reason": None, + "link_flair_background_color": "", + "id": "hm0qct", + "is_robot_indexable": True, + "report_reasons": None, + "author": "AutoModerator", + "discussion_type": None, + "num_comments": 9, + "send_replies": False, + "whitelist_status": "all_ads", + "contest_mode": False, + "mod_reports": [], + "author_patreon_flair": False, + "author_flair_text_color": None, + "permalink": "/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/", + "parent_whitelist_status": "all_ads", + "stickied": True, + "url": "https://www.reddit.com/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/", + "subreddit_subscribers": 544037, + "created_utc": 1594008682.0, + "num_crossposts": 0, + "media": None, + "is_video": False, + }, + }, + ], + "after": "t3_hmytic", + "before": None, + }, +} diff --git a/src/newsreader/news/collection/tests/reddit/builder/tests.py b/src/newsreader/news/collection/tests/reddit/builder/tests.py index 3085199..eb8182a 100644 --- a/src/newsreader/news/collection/tests/reddit/builder/tests.py +++ b/src/newsreader/news/collection/tests/reddit/builder/tests.py @@ -7,16 +7,9 @@ import pytz from newsreader.news.collection.reddit import RedditBuilder from newsreader.news.collection.tests.factories import SubredditFactory -from newsreader.news.collection.tests.reddit.builder.mocks import ( - author_mock, - empty_mock, - simple_mock, - title_mock, - unknown_mock, - unsanitized_mock, -) +from newsreader.news.collection.tests.reddit.builder.mocks import * from newsreader.news.core.models import Post -from newsreader.news.core.tests.factories import PostFactory +from newsreader.news.core.tests.factories import RedditPostFactory class RedditBuilderTestCase(TestCase): @@ -92,10 +85,8 @@ class RedditBuilderTestCase(TestCase): def test_update_posts(self): subreddit = SubredditFactory() - existing_publication_date = pytz.utc.localize(datetime(2020, 7, 8, 14, 0, 0)) - existing_post = PostFactory( + existing_post = RedditPostFactory( remote_identifier="hngsj8", - publication_date=existing_publication_date, author="Old author", title="Old title", body="Old body", @@ -183,3 +174,48 @@ class RedditBuilderTestCase(TestCase): post.title, 'Board statement on the LibreOffice 7.0 RC "Personal EditionBoard statement on the LibreOffice 7.0 RC "Personal Edition" label" labelBoard statement on the LibreOffice 7.0 RC "PersBoard statement on t…', ) + + def test_duplicate_in_response(self): + builder = RedditBuilder + + subreddit = SubredditFactory() + mock_stream = MagicMock(rule=subreddit) + + with builder((duplicate_mock, mock_stream)) as builder: + builder.save() + + posts = {post.remote_identifier: post for post in Post.objects.all()} + + self.assertEquals(Post.objects.count(), 2) + self.assertCountEqual(("hm0qct", "hna75r"), posts.keys()) + + def test_duplicate_in_database(self): + builder = RedditBuilder + + subreddit = SubredditFactory() + mock_stream = MagicMock(rule=subreddit) + + duplicate_post = RedditPostFactory( + remote_identifier="hm0qct", rule=subreddit, title="foo" + ) + + with builder((simple_mock, mock_stream)) as builder: + builder.save() + + posts = {post.remote_identifier: post for post in Post.objects.all()} + + self.assertEquals(Post.objects.count(), 5) + self.assertCountEqual( + ("hm0qct", "hna75r", "hngs71", "hngsj8", "hnd7cy"), posts.keys() + ) + + duplicate_post.refresh_from_db() + + self.assertEquals( + duplicate_post.publication_date, + pytz.utc.localize(datetime(2020, 7, 6, 6, 11, 22)), + ) + self.assertEquals( + duplicate_post.title, + "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020", + ) diff --git a/src/newsreader/news/core/tests/factories.py b/src/newsreader/news/core/tests/factories.py index 46eeeae..966e70b 100644 --- a/src/newsreader/news/core/tests/factories.py +++ b/src/newsreader/news/core/tests/factories.py @@ -1,7 +1,9 @@ import factory +import factory.fuzzy import pytz from newsreader.accounts.tests.factories import UserFactory +from newsreader.news.collection.reddit import REDDIT_URL from newsreader.news.core.models import Category, Post @@ -19,7 +21,7 @@ class PostFactory(factory.django.DjangoModelFactory): author = factory.Faker("name") publication_date = factory.Faker("date_time_this_year", tzinfo=pytz.utc) url = factory.Faker("url") - remote_identifier = factory.Faker("url") + remote_identifier = factory.Faker("uuid4") rule = factory.SubFactory( "newsreader.news.collection.tests.factories.CollectionRuleFactory" @@ -29,3 +31,11 @@ class PostFactory(factory.django.DjangoModelFactory): class Meta: model = Post + + +class RedditPostFactory(PostFactory): + remote_identifier = factory.Faker("uuid4") + url = factory.fuzzy.FuzzyText(length=10, prefix=f"{REDDIT_URL}/") + rule = factory.SubFactory( + "newsreader.news.collection.tests.factories.SubredditFactory" + )