diff --git a/src/newsreader/news/collection/base.py b/src/newsreader/news/collection/base.py index f980191..5de3454 100644 --- a/src/newsreader/news/collection/base.py +++ b/src/newsreader/news/collection/base.py @@ -2,6 +2,7 @@ from bs4 import BeautifulSoup from newsreader.news.collection.exceptions import StreamParseException from newsreader.news.collection.utils import fetch +from newsreader.news.core.models import Post class Stream: @@ -54,22 +55,33 @@ class Builder: instances = [] stream = None + rule_type = None def __init__(self, stream): self.stream = stream def __enter__(self): + _, stream = self.stream + + self.instances = [] + self.existing_posts = { + post.remote_identifier: post + for post in Post.objects.filter(rule=stream.rule, rule__type=self.rule_type) + } + self.create_posts(self.stream) + return self def __exit__(self, *args, **kwargs): pass def create_posts(self, stream): - pass + raise NotImplementedError def save(self): - pass + for post in self.instances: + post.save() class Meta: abstract = True diff --git a/src/newsreader/news/collection/feed.py b/src/newsreader/news/collection/feed.py index f67a109..cb5618f 100644 --- a/src/newsreader/news/collection/feed.py +++ b/src/newsreader/news/collection/feed.py @@ -37,20 +37,7 @@ logger = logging.getLogger(__name__) class FeedBuilder(Builder): - instances = [] - - def __enter__(self): - _, stream = self.stream - - self.instances = [] - self.existing_posts = { - post.remote_identifier: post - for post in Post.objects.filter( - rule=stream.rule, rule__type=RuleTypeChoices.feed - ) - } - - return super().__enter__() + rule__type = RuleTypeChoices.feed def create_posts(self, stream): data, stream = stream @@ -114,10 +101,6 @@ class FeedBuilder(Builder): content = "\n ".join([item.get("value") for item in items]) return self.sanitize_fragment(content) - def save(self): - for post in self.instances: - post.save() - class FeedStream(Stream): def read(self): diff --git a/src/newsreader/news/collection/migrations/0009_auto_20200807_2030.py b/src/newsreader/news/collection/migrations/0009_auto_20200807_2030.py new file mode 100644 index 0000000..2ce4cb3 --- /dev/null +++ b/src/newsreader/news/collection/migrations/0009_auto_20200807_2030.py @@ -0,0 +1,29 @@ +# Generated by Django 3.0.7 on 2020-08-07 18:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [("collection", "0008_collectionrule_type")] + + operations = [ + migrations.AddField( + model_name="collectionrule", + name="screen_name", + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AlterField( + model_name="collectionrule", + name="type", + field=models.CharField( + choices=[ + ("feed", "Feed"), + ("subreddit", "Subreddit"), + ("twitter", "Twitter"), + ], + default="feed", + max_length=20, + ), + ), + ] diff --git a/src/newsreader/news/collection/models.py b/src/newsreader/news/collection/models.py index 35841ba..5bada6a 100644 --- a/src/newsreader/news/collection/models.py +++ b/src/newsreader/news/collection/models.py @@ -57,11 +57,15 @@ class CollectionRule(TimeStampedModel): on_delete=models.CASCADE, ) + # Twitter + screen_name = models.CharField(max_length=255, blank=True, null=True) + objects = CollectionRuleQuerySet.as_manager() def __str__(self): return self.name + # TODO add twitter url @property def update_url(self): if self.type == RuleTypeChoices.subreddit: diff --git a/src/newsreader/news/collection/reddit.py b/src/newsreader/news/collection/reddit.py index 557271c..7ef4784 100644 --- a/src/newsreader/news/collection/reddit.py +++ b/src/newsreader/news/collection/reddit.py @@ -94,18 +94,7 @@ def get_reddit_access_token(code, user): class RedditBuilder(Builder): - def __enter__(self): - _, stream = self.stream - - self.instances = [] - self.existing_posts = { - post.remote_identifier: post - for post in Post.objects.filter( - rule=stream.rule, rule__type=RuleTypeChoices.subreddit - ) - } - - return super().__enter__() + rule__type = RuleTypeChoices.subreddit def create_posts(self, stream): data, stream = stream @@ -218,10 +207,6 @@ class RedditBuilder(Builder): return results.values() - def save(self): - for post in self.instances: - post.save() - class RedditScheduler: max_amount = RATE_LIMIT diff --git a/src/newsreader/news/collection/tests/factories.py b/src/newsreader/news/collection/tests/factories.py index a84365d..761844b 100644 --- a/src/newsreader/news/collection/tests/factories.py +++ b/src/newsreader/news/collection/tests/factories.py @@ -32,3 +32,4 @@ class SubredditFactory(CollectionRuleFactory): class TwitterProfileFactory(CollectionRuleFactory): type = RuleTypeChoices.twitter + screen_name = factory.Faker("user_name") diff --git a/src/newsreader/news/collection/tests/twitter/builder/tests.py b/src/newsreader/news/collection/tests/twitter/builder/tests.py index 8e79913..c27edb0 100644 --- a/src/newsreader/news/collection/tests/twitter/builder/tests.py +++ b/src/newsreader/news/collection/tests/twitter/builder/tests.py @@ -18,11 +18,10 @@ class TwitterBuilderTestCase(TestCase): def setUp(self): self.maxDiff = None - @skip("Not implemented") def test_simple_post(self): builder = TwitterBuilder - profile = TwitterProfileFactory() + profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) with builder((simple_mock, mock_stream)) as builder: @@ -39,10 +38,10 @@ class TwitterBuilderTestCase(TestCase): full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX" self.assertEquals(post.rule, profile) - self.assertEquals(post.title, truncatechars(full_text, 20)) + self.assertEquals(post.title, truncatechars(full_text, 40)) self.assertEquals(post.body, format_html(full_text)) - self.assertEquals(post.author, "Star Citizen") + self.assertEquals(post.author, "RobertsSpaceInd") self.assertEquals( post.url, f"{TWITTER_URL}/RobertsSpaceInd/1291528756373286914" ) @@ -50,6 +49,22 @@ class TwitterBuilderTestCase(TestCase): post.publication_date, pytz.utc.localize(datetime(2020, 8, 7, 0, 17, 5)) ) + post = posts["1288550304095416320"] + + full_text = "@RelicCcb Hi Christoper, we have checked the status of your investigation and it is still ongoing." + + self.assertEquals(post.rule, profile) + self.assertEquals(post.title, truncatechars(full_text, 40)) + self.assertEquals(post.body, format_html(full_text)) + + self.assertEquals(post.author, "RobertsSpaceInd") + self.assertEquals( + post.url, f"{TWITTER_URL}/RobertsSpaceInd/1288550304095416320" + ) + self.assertEquals( + post.publication_date, pytz.utc.localize(datetime(2020, 7, 29, 19, 1, 47)) + ) + # Note that only one media type can be uploaded to an Tweet # see https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/extended-entities-object @skip("Not implemented") diff --git a/src/newsreader/news/collection/twitter.py b/src/newsreader/news/collection/twitter.py index 5061e38..2775841 100644 --- a/src/newsreader/news/collection/twitter.py +++ b/src/newsreader/news/collection/twitter.py @@ -1,4 +1,13 @@ +from datetime import datetime + +from django.template.defaultfilters import truncatechars +from django.utils.html import format_html + +import pytz + from newsreader.news.collection.base import Builder, Client, Collector, Stream +from newsreader.news.collection.choices import RuleTypeChoices +from newsreader.news.core.models import Post TWITTER_URL = "https://twitter.com" @@ -9,18 +18,38 @@ class TwitterScheduler: class TwitterBuilder(Builder): - def __enter__(self): - _, stream = self.stream + rule__type = RuleTypeChoices.twitter - self.instances = [] - self.existing_posts = { - post.remote_identifier: post - for post in Post.objects.filter( - rule=stream.rule, rule__type=RuleTypeChoices.twitter + def create_posts(self, stream): + data, stream = stream + + if not data: + return + + self.instances = self.build(data, stream.rule) + + def build(self, posts, rule): + results = {} + + for post in posts: + remote_identifier = post["id_str"] + publication_date = pytz.utc.localize( + datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y") ) - } - return super().__enter__() + data = { + "remote_identifier": remote_identifier, + "title": truncatechars(post["full_text"], 40), + "body": format_html(post["full_text"]), + "author": rule.screen_name, + "publication_date": publication_date, + "url": (f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"), + "rule": rule, + } + + results[remote_identifier] = Post(**data) + + return results.values() class TwitterStream(Stream):