From c41f35917eb358fceb412124bfb8b839582ebbf6 Mon Sep 17 00:00:00 2001 From: Sonny Bakker Date: Sun, 27 Sep 2020 11:17:31 +0200 Subject: [PATCH] Skip existing posts --- .../collection/tests/twitter/builder/tests.py | 16 ++++++++++++++++ src/newsreader/news/collection/twitter.py | 5 ++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/newsreader/news/collection/tests/twitter/builder/tests.py b/src/newsreader/news/collection/tests/twitter/builder/tests.py index 2943f48..f9052f9 100644 --- a/src/newsreader/news/collection/tests/twitter/builder/tests.py +++ b/src/newsreader/news/collection/tests/twitter/builder/tests.py @@ -22,6 +22,7 @@ from newsreader.news.collection.tests.twitter.builder.mocks import ( from newsreader.news.collection.twitter import TWITTER_URL, TwitterBuilder from newsreader.news.collection.utils import truncate_text from newsreader.news.core.models import Post +from newsreader.news.core.tests.factories import PostFactory class TwitterBuilderTestCase(TestCase): @@ -394,3 +395,18 @@ class TwitterBuilderTestCase(TestCase): ), ) self.assertEquals(post.body, mark_safe(full_text)) + + def test_existing_posts(self): + builder = TwitterBuilder + + profile = TwitterTimelineFactory(screen_name="RobertsSpaceInd") + mock_stream = Mock(rule=profile) + + PostFactory(rule=profile, remote_identifier="1291528756373286914") + PostFactory(rule=profile, remote_identifier="1288550304095416320") + + with builder(simple_mock, mock_stream) as builder: + builder.build() + builder.save() + + self.assertEquals(Post.objects.count(), 2) diff --git a/src/newsreader/news/collection/twitter.py b/src/newsreader/news/collection/twitter.py index 0824532..730f12e 100644 --- a/src/newsreader/news/collection/twitter.py +++ b/src/newsreader/news/collection/twitter.py @@ -52,8 +52,11 @@ class TwitterBuilder(PostBuilder): for post in self.payload: remote_identifier = post["id_str"] - url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}" + if remote_identifier in self.existing_posts: + continue + + url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}" body = urlize(post["full_text"], nofollow=True) title = truncate_text( Post, "title", self.sanitize_fragment(post["full_text"])