diff --git a/src/newsreader/news/collection/tests/reddit/builder/mocks.py b/src/newsreader/news/collection/tests/reddit/builder/mocks.py index 53ce372..fabc802 100644 --- a/src/newsreader/news/collection/tests/reddit/builder/mocks.py +++ b/src/newsreader/news/collection/tests/reddit/builder/mocks.py @@ -1376,3 +1376,336 @@ title_mock = { "before": None, }, } + +duplicate_mock = { + "kind": "Listing", + "data": { + "modhash": "rjewztai5w0ab64547311ae1fb1f9cf81cd18949bfb629cb7f", + "dist": 27, + "children": [ + { + "kind": "t3", + "data": { + "approved_at_utc": None, + "subreddit": "linux", + "selftext": "Welcome to r/linux rants and experiences! This megathread is also to hear opinions from anyone just starting out with Linux or those that have used Linux (GNU or otherwise) for a long time.\n\nLet us know what's annoying you, whats making you happy, or something that you want to get out to r/linux but didn't make the cut into a full post of it's own.\n\nFor those looking for certifications please use this megathread to ask about how to get certified whether it's for the business world or for your own satisfaction. Be sure to check out r/linuxadmin for more discussion in the SysAdmin world!\n\n_Please keep questions in r/linuxquestions, r/linux4noobs, or the Wednesday automod thread._", + "author_fullname": "t2_6l4z3", + "saved": False, + "mod_reason_title": None, + "gilded": 0, + "clicked": False, + "title": "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020", + "link_flair_richtext": [], + "subreddit_name_prefixed": "r/linux", + "hidden": False, + "pwls": 6, + "link_flair_css_class": None, + "downs": 0, + "top_awarded_type": None, + "hide_score": False, + "name": "t3_hm0qct", + "quarantine": False, + "link_flair_text_color": "dark", + "upvote_ratio": 0.7, + "author_flair_background_color": None, + "subreddit_type": "public", + "ups": 8, + "total_awards_received": 0, + "media_embed": {}, + "author_flair_template_id": None, + "is_original_content": False, + "user_reports": [], + "secure_media": None, + "is_reddit_media_domain": False, + "is_meta": False, + "category": None, + "secure_media_embed": {}, + "link_flair_text": None, + "can_mod_post": False, + "score": 8, + "approved_by": None, + "author_premium": True, + "thumbnail": "", + "edited": False, + "author_flair_css_class": None, + "author_flair_richtext": [], + "gildings": {}, + "content_categories": None, + "is_self": True, + "mod_note": None, + "created": 1594037482.0, + "link_flair_type": "text", + "wls": 6, + "removed_by_category": None, + "banned_by": None, + "author_flair_type": "text", + "domain": "self.linux", + "allow_live_comments": False, + "selftext_html": "<!-- SC_OFF --><div class='md'><p>Welcome to <a href='/r/linux'>r/linux</a> rants and experiences! This megathread is also to hear opinions from anyone just starting out with Linux or those that have used Linux (GNU or otherwise) for a long time.</p>\n\n<p>Let us know what&#39;s annoying you, whats making you happy, or something that you want to get out to <a href='/r/linux'>r/linux</a> but didn&#39;t make the cut into a full post of it&#39;s own.</p>\n\n<p>For those looking for certifications please use this megathread to ask about how to get certified whether it&#39;s for the business world or for your own satisfaction. Be sure to check out <a href='/r/linuxadmin'>r/linuxadmin</a> for more discussion in the SysAdmin world!</p>\n\n<p><em>Please keep questions in <a href='/r/linuxquestions'>r/linuxquestions</a>, <a href='/r/linux4noobs'>r/linux4noobs</a>, or the Wednesday automod thread.</em></p>\n</div><!-- SC_ON -->", + "likes": None, + "suggested_sort": None, + "banned_at_utc": None, + "view_count": None, + "archived": False, + "no_follow": True, + "is_crosspostable": True, + "pinned": False, + "over_18": False, + "all_awardings": [], + "awarders": [], + "media_only": False, + "can_gild": True, + "spoiler": False, + "locked": False, + "author_flair_text": None, + "treatment_tags": [], + "visited": False, + "removed_by": None, + "num_reports": None, + "distinguished": "moderator", + "subreddit_id": "t5_2qh1a", + "mod_reason_by": None, + "removal_reason": None, + "link_flair_background_color": "", + "id": "hm0qct", + "is_robot_indexable": True, + "report_reasons": None, + "author": "AutoModerator", + "discussion_type": None, + "num_comments": 9, + "send_replies": False, + "whitelist_status": "all_ads", + "contest_mode": False, + "mod_reports": [], + "author_patreon_flair": False, + "author_flair_text_color": None, + "permalink": "/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/", + "parent_whitelist_status": "all_ads", + "stickied": True, + "url": "https://www.reddit.com/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/", + "subreddit_subscribers": 544037, + "created_utc": 1594008682.0, + "num_crossposts": 0, + "media": None, + "is_video": False, + }, + }, + { + "kind": "t3", + "data": { + "approved_at_utc": None, + "subreddit": "linux", + "selftext": "Welcome to r/linux! If you're new to Linux or trying to get started this thread is for you. Get help here or as always, check out r/linuxquestions or r/linux4noobs\n\nThis megathread is for all your question needs. As we don't allow questions on r/linux outside of this megathread, please consider using r/linuxquestions or r/linux4noobs for the best solution to your problem.\n\nAsk your hardware requests here too or try r/linuxhardware!", + "author_fullname": "t2_6l4z3", + "saved": False, + "mod_reason_title": None, + "gilded": 0, + "clicked": False, + "title": "Weekly Questions and Hardware Thread - July 08, 2020", + "link_flair_richtext": [], + "subreddit_name_prefixed": "r/linux", + "hidden": False, + "pwls": 6, + "link_flair_css_class": None, + "downs": 0, + "top_awarded_type": None, + "hide_score": False, + "name": "t3_hna75r", + "quarantine": False, + "link_flair_text_color": "dark", + "upvote_ratio": 0.6, + "author_flair_background_color": None, + "subreddit_type": "public", + "ups": 2, + "total_awards_received": 0, + "media_embed": {}, + "author_flair_template_id": None, + "is_original_content": False, + "user_reports": [], + "secure_media": None, + "is_reddit_media_domain": False, + "is_meta": False, + "category": None, + "secure_media_embed": {}, + "link_flair_text": None, + "can_mod_post": False, + "score": 2, + "approved_by": None, + "author_premium": True, + "thumbnail": "", + "edited": False, + "author_flair_css_class": None, + "author_flair_richtext": [], + "gildings": {}, + "content_categories": None, + "is_self": True, + "mod_note": None, + "created": 1594210138.0, + "link_flair_type": "text", + "wls": 6, + "removed_by_category": None, + "banned_by": None, + "author_flair_type": "text", + "domain": "self.linux", + "allow_live_comments": False, + "selftext_html": '<!-- SC_OFF --><div class="md"><p>Welcome to <a href="/r/linux">r/linux</a>! If you&#39;re new to Linux or trying to get started this thread is for you. Get help here or as always, check out <a href="/r/linuxquestions">r/linuxquestions</a> or <a href="/r/linux4noobs">r/linux4noobs</a></p>\n\n<p>This megathread is for all your question needs. As we don&#39;t allow questions on <a href="/r/linux">r/linux</a> outside of this megathread, please consider using <a href="/r/linuxquestions">r/linuxquestions</a> or <a href="/r/linux4noobs">r/linux4noobs</a> for the best solution to your problem.</p>\n\n<p>Ask your hardware requests here too or try <a href="/r/linuxhardware">r/linuxhardware</a>!</p>\n</div><!-- SC_ON -->', + "likes": None, + "suggested_sort": "new", + "banned_at_utc": None, + "view_count": None, + "archived": False, + "no_follow": True, + "is_crosspostable": True, + "pinned": False, + "over_18": False, + "all_awardings": [], + "awarders": [], + "media_only": False, + "can_gild": True, + "spoiler": False, + "locked": False, + "author_flair_text": None, + "treatment_tags": [], + "visited": False, + "removed_by": None, + "num_reports": None, + "distinguished": "moderator", + "subreddit_id": "t5_2qh1a", + "mod_reason_by": None, + "removal_reason": None, + "link_flair_background_color": "", + "id": "hna75r", + "is_robot_indexable": True, + "report_reasons": None, + "author": "AutoModerator", + "discussion_type": None, + "num_comments": 2, + "send_replies": False, + "whitelist_status": "all_ads", + "contest_mode": False, + "mod_reports": [], + "author_patreon_flair": False, + "author_flair_text_color": None, + "permalink": "/r/linux/comments/hna75r/weekly_questions_and_hardware_thread_july_08_2020/", + "parent_whitelist_status": "all_ads", + "stickied": True, + "url": "https://www.reddit.com/r/linux/comments/hna75r/weekly_questions_and_hardware_thread_july_08_2020/", + "subreddit_subscribers": 544037, + "created_utc": 1594181338.0, + "num_crossposts": 0, + "media": None, + "is_video": False, + }, + }, + { + "kind": "t3", + "data": { + "approved_at_utc": None, + "subreddit": "linux", + "selftext": "Welcome to r/linux rants and experiences! This megathread is also to hear opinions from anyone just starting out with Linux or those that have used Linux (GNU or otherwise) for a long time.\n\nLet us know what's annoying you, whats making you happy, or something that you want to get out to r/linux but didn't make the cut into a full post of it's own.\n\nFor those looking for certifications please use this megathread to ask about how to get certified whether it's for the business world or for your own satisfaction. Be sure to check out r/linuxadmin for more discussion in the SysAdmin world!\n\n_Please keep questions in r/linuxquestions, r/linux4noobs, or the Wednesday automod thread._", + "author_fullname": "t2_6l4z3", + "saved": False, + "mod_reason_title": None, + "gilded": 0, + "clicked": False, + "title": "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020", + "link_flair_richtext": [], + "subreddit_name_prefixed": "r/linux", + "hidden": False, + "pwls": 6, + "link_flair_css_class": None, + "downs": 0, + "top_awarded_type": None, + "hide_score": False, + "name": "t3_hm0qct", + "quarantine": False, + "link_flair_text_color": "dark", + "upvote_ratio": 0.7, + "author_flair_background_color": None, + "subreddit_type": "public", + "ups": 8, + "total_awards_received": 0, + "media_embed": {}, + "author_flair_template_id": None, + "is_original_content": False, + "user_reports": [], + "secure_media": None, + "is_reddit_media_domain": False, + "is_meta": False, + "category": None, + "secure_media_embed": {}, + "link_flair_text": None, + "can_mod_post": False, + "score": 8, + "approved_by": None, + "author_premium": True, + "thumbnail": "", + "edited": False, + "author_flair_css_class": None, + "author_flair_richtext": [], + "gildings": {}, + "content_categories": None, + "is_self": True, + "mod_note": None, + "created": 1594037482.0, + "link_flair_type": "text", + "wls": 6, + "removed_by_category": None, + "banned_by": None, + "author_flair_type": "text", + "domain": "self.linux", + "allow_live_comments": False, + "selftext_html": "<!-- SC_OFF --><div class='md'><p>Welcome to <a href='/r/linux'>r/linux</a> rants and experiences! This megathread is also to hear opinions from anyone just starting out with Linux or those that have used Linux (GNU or otherwise) for a long time.</p>\n\n<p>Let us know what&#39;s annoying you, whats making you happy, or something that you want to get out to <a href='/r/linux'>r/linux</a> but didn&#39;t make the cut into a full post of it&#39;s own.</p>\n\n<p>For those looking for certifications please use this megathread to ask about how to get certified whether it&#39;s for the business world or for your own satisfaction. Be sure to check out <a href='/r/linuxadmin'>r/linuxadmin</a> for more discussion in the SysAdmin world!</p>\n\n<p><em>Please keep questions in <a href='/r/linuxquestions'>r/linuxquestions</a>, <a href='/r/linux4noobs'>r/linux4noobs</a>, or the Wednesday automod thread.</em></p>\n</div><!-- SC_ON -->", + "likes": None, + "suggested_sort": None, + "banned_at_utc": None, + "view_count": None, + "archived": False, + "no_follow": True, + "is_crosspostable": True, + "pinned": False, + "over_18": False, + "all_awardings": [], + "awarders": [], + "media_only": False, + "can_gild": True, + "spoiler": False, + "locked": False, + "author_flair_text": None, + "treatment_tags": [], + "visited": False, + "removed_by": None, + "num_reports": None, + "distinguished": "moderator", + "subreddit_id": "t5_2qh1a", + "mod_reason_by": None, + "removal_reason": None, + "link_flair_background_color": "", + "id": "hm0qct", + "is_robot_indexable": True, + "report_reasons": None, + "author": "AutoModerator", + "discussion_type": None, + "num_comments": 9, + "send_replies": False, + "whitelist_status": "all_ads", + "contest_mode": False, + "mod_reports": [], + "author_patreon_flair": False, + "author_flair_text_color": None, + "permalink": "/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/", + "parent_whitelist_status": "all_ads", + "stickied": True, + "url": "https://www.reddit.com/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/", + "subreddit_subscribers": 544037, + "created_utc": 1594008682.0, + "num_crossposts": 0, + "media": None, + "is_video": False, + }, + }, + ], + "after": "t3_hmytic", + "before": None, + }, +} diff --git a/src/newsreader/news/collection/tests/reddit/builder/tests.py b/src/newsreader/news/collection/tests/reddit/builder/tests.py index 3085199..e1a6770 100644 --- a/src/newsreader/news/collection/tests/reddit/builder/tests.py +++ b/src/newsreader/news/collection/tests/reddit/builder/tests.py @@ -7,16 +7,9 @@ import pytz from newsreader.news.collection.reddit import RedditBuilder from newsreader.news.collection.tests.factories import SubredditFactory -from newsreader.news.collection.tests.reddit.builder.mocks import ( - author_mock, - empty_mock, - simple_mock, - title_mock, - unknown_mock, - unsanitized_mock, -) +from newsreader.news.collection.tests.reddit.builder.mocks import * from newsreader.news.core.models import Post -from newsreader.news.core.tests.factories import PostFactory +from newsreader.news.core.tests.factories import RedditPostFactory class RedditBuilderTestCase(TestCase): @@ -93,7 +86,7 @@ class RedditBuilderTestCase(TestCase): def test_update_posts(self): subreddit = SubredditFactory() existing_publication_date = pytz.utc.localize(datetime(2020, 7, 8, 14, 0, 0)) - existing_post = PostFactory( + existing_post = RedditPostFactory( remote_identifier="hngsj8", publication_date=existing_publication_date, author="Old author", @@ -183,3 +176,50 @@ class RedditBuilderTestCase(TestCase): post.title, 'Board statement on the LibreOffice 7.0 RC "Personal EditionBoard statement on the LibreOffice 7.0 RC "Personal Edition" label" labelBoard statement on the LibreOffice 7.0 RC "PersBoard statement on t…', ) + + def test_duplicate_in_response(self): + builder = RedditBuilder + + subreddit = SubredditFactory() + mock_stream = MagicMock(rule=subreddit) + + with builder((duplicate_mock, mock_stream)) as builder: + builder.save() + + posts = {post.remote_identifier: post for post in Post.objects.all()} + + self.assertEquals(Post.objects.count(), 2) + self.assertCountEqual(("hm0qct", "hna75r"), posts.keys()) + + def test_duplicate_in_database(self): + builder = RedditBuilder + + subreddit = SubredditFactory() + mock_stream = MagicMock(rule=subreddit) + + duplicate_post = RedditPostFactory( + publication_date=pytz.utc.localize(datetime(2020, 7, 1, 9, 20, 22)), + remote_identifier="hm0qct", + title="foo", + ) + + with builder((simple_mock, mock_stream)) as builder: + builder.save() + + posts = {post.remote_identifier: post for post in Post.objects.all()} + + self.assertEquals(Post.objects.count(), 5) + self.assertCountEqual( + ("hm0qct", "hna75r", "hngs71", "hngsj8", "hnd7cy"), posts.keys() + ) + + duplicate_post.refresh_from_db() + + self.assertEquals( + duplicate_post.publication_date, + pytz.utc.localize(datetime(2020, 7, 6, 14, 11, 22)), + ) + self.assertEquals( + duplicate_post.title, + "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020", + ) diff --git a/src/newsreader/news/core/tests/factories.py b/src/newsreader/news/core/tests/factories.py index 46eeeae..0c15744 100644 --- a/src/newsreader/news/core/tests/factories.py +++ b/src/newsreader/news/core/tests/factories.py @@ -1,7 +1,9 @@ import factory +import factory.fuzzy import pytz from newsreader.accounts.tests.factories import UserFactory +from newsreader.news.collection.reddit import REDDIT_URL from newsreader.news.core.models import Category, Post @@ -19,7 +21,7 @@ class PostFactory(factory.django.DjangoModelFactory): author = factory.Faker("name") publication_date = factory.Faker("date_time_this_year", tzinfo=pytz.utc) url = factory.Faker("url") - remote_identifier = factory.Faker("url") + remote_identifier = factory.Faker("uuid4") rule = factory.SubFactory( "newsreader.news.collection.tests.factories.CollectionRuleFactory" @@ -29,3 +31,11 @@ class PostFactory(factory.django.DjangoModelFactory): class Meta: model = Post + + +class RedditPostFactory(PostFactory): + remote_identifier = factory.Faker("uuid4") + url = factory.fuzzy.FuzzyText(length=10, prefix=REDDIT_URL) + rule = factory.SubFactory( + "newsreader.news.collection.tests.factories.SubredditFactory" + )