Add simple builder scenario

This commit is contained in:
Sonny Bakker 2020-08-07 21:10:23 +02:00
parent 90bbcb4d27
commit f5d1e9ba5f
8 changed files with 107 additions and 49 deletions

View file

@ -2,6 +2,7 @@ from bs4 import BeautifulSoup
from newsreader.news.collection.exceptions import StreamParseException from newsreader.news.collection.exceptions import StreamParseException
from newsreader.news.collection.utils import fetch from newsreader.news.collection.utils import fetch
from newsreader.news.core.models import Post
class Stream: class Stream:
@ -54,22 +55,33 @@ class Builder:
instances = [] instances = []
stream = None stream = None
rule_type = None
def __init__(self, stream): def __init__(self, stream):
self.stream = stream self.stream = stream
def __enter__(self): def __enter__(self):
_, stream = self.stream
self.instances = []
self.existing_posts = {
post.remote_identifier: post
for post in Post.objects.filter(rule=stream.rule, rule__type=self.rule_type)
}
self.create_posts(self.stream) self.create_posts(self.stream)
return self return self
def __exit__(self, *args, **kwargs): def __exit__(self, *args, **kwargs):
pass pass
def create_posts(self, stream): def create_posts(self, stream):
pass raise NotImplementedError
def save(self): def save(self):
pass for post in self.instances:
post.save()
class Meta: class Meta:
abstract = True abstract = True

View file

@ -37,20 +37,7 @@ logger = logging.getLogger(__name__)
class FeedBuilder(Builder): class FeedBuilder(Builder):
instances = [] rule__type = RuleTypeChoices.feed
def __enter__(self):
_, stream = self.stream
self.instances = []
self.existing_posts = {
post.remote_identifier: post
for post in Post.objects.filter(
rule=stream.rule, rule__type=RuleTypeChoices.feed
)
}
return super().__enter__()
def create_posts(self, stream): def create_posts(self, stream):
data, stream = stream data, stream = stream
@ -114,10 +101,6 @@ class FeedBuilder(Builder):
content = "\n ".join([item.get("value") for item in items]) content = "\n ".join([item.get("value") for item in items])
return self.sanitize_fragment(content) return self.sanitize_fragment(content)
def save(self):
for post in self.instances:
post.save()
class FeedStream(Stream): class FeedStream(Stream):
def read(self): def read(self):

View file

@ -0,0 +1,29 @@
# Generated by Django 3.0.7 on 2020-08-07 18:30
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("collection", "0008_collectionrule_type")]
operations = [
migrations.AddField(
model_name="collectionrule",
name="screen_name",
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name="collectionrule",
name="type",
field=models.CharField(
choices=[
("feed", "Feed"),
("subreddit", "Subreddit"),
("twitter", "Twitter"),
],
default="feed",
max_length=20,
),
),
]

View file

@ -57,11 +57,15 @@ class CollectionRule(TimeStampedModel):
on_delete=models.CASCADE, on_delete=models.CASCADE,
) )
# Twitter
screen_name = models.CharField(max_length=255, blank=True, null=True)
objects = CollectionRuleQuerySet.as_manager() objects = CollectionRuleQuerySet.as_manager()
def __str__(self): def __str__(self):
return self.name return self.name
# TODO add twitter url
@property @property
def update_url(self): def update_url(self):
if self.type == RuleTypeChoices.subreddit: if self.type == RuleTypeChoices.subreddit:

View file

@ -94,18 +94,7 @@ def get_reddit_access_token(code, user):
class RedditBuilder(Builder): class RedditBuilder(Builder):
def __enter__(self): rule__type = RuleTypeChoices.subreddit
_, stream = self.stream
self.instances = []
self.existing_posts = {
post.remote_identifier: post
for post in Post.objects.filter(
rule=stream.rule, rule__type=RuleTypeChoices.subreddit
)
}
return super().__enter__()
def create_posts(self, stream): def create_posts(self, stream):
data, stream = stream data, stream = stream
@ -218,10 +207,6 @@ class RedditBuilder(Builder):
return results.values() return results.values()
def save(self):
for post in self.instances:
post.save()
class RedditScheduler: class RedditScheduler:
max_amount = RATE_LIMIT max_amount = RATE_LIMIT

View file

@ -32,3 +32,4 @@ class SubredditFactory(CollectionRuleFactory):
class TwitterProfileFactory(CollectionRuleFactory): class TwitterProfileFactory(CollectionRuleFactory):
type = RuleTypeChoices.twitter type = RuleTypeChoices.twitter
screen_name = factory.Faker("user_name")

View file

@ -18,11 +18,10 @@ class TwitterBuilderTestCase(TestCase):
def setUp(self): def setUp(self):
self.maxDiff = None self.maxDiff = None
@skip("Not implemented")
def test_simple_post(self): def test_simple_post(self):
builder = TwitterBuilder builder = TwitterBuilder
profile = TwitterProfileFactory() profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder((simple_mock, mock_stream)) as builder: with builder((simple_mock, mock_stream)) as builder:
@ -39,10 +38,10 @@ class TwitterBuilderTestCase(TestCase):
full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX" full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
self.assertEquals(post.rule, profile) self.assertEquals(post.rule, profile)
self.assertEquals(post.title, truncatechars(full_text, 20)) self.assertEquals(post.title, truncatechars(full_text, 40))
self.assertEquals(post.body, format_html(full_text)) self.assertEquals(post.body, format_html(full_text))
self.assertEquals(post.author, "Star Citizen") self.assertEquals(post.author, "RobertsSpaceInd")
self.assertEquals( self.assertEquals(
post.url, f"{TWITTER_URL}/RobertsSpaceInd/1291528756373286914" post.url, f"{TWITTER_URL}/RobertsSpaceInd/1291528756373286914"
) )
@ -50,6 +49,22 @@ class TwitterBuilderTestCase(TestCase):
post.publication_date, pytz.utc.localize(datetime(2020, 8, 7, 0, 17, 5)) post.publication_date, pytz.utc.localize(datetime(2020, 8, 7, 0, 17, 5))
) )
post = posts["1288550304095416320"]
full_text = "@RelicCcb Hi Christoper, we have checked the status of your investigation and it is still ongoing."
self.assertEquals(post.rule, profile)
self.assertEquals(post.title, truncatechars(full_text, 40))
self.assertEquals(post.body, format_html(full_text))
self.assertEquals(post.author, "RobertsSpaceInd")
self.assertEquals(
post.url, f"{TWITTER_URL}/RobertsSpaceInd/1288550304095416320"
)
self.assertEquals(
post.publication_date, pytz.utc.localize(datetime(2020, 7, 29, 19, 1, 47))
)
# Note that only one media type can be uploaded to an Tweet # Note that only one media type can be uploaded to an Tweet
# see https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/extended-entities-object # see https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/extended-entities-object
@skip("Not implemented") @skip("Not implemented")

View file

@ -1,4 +1,13 @@
from datetime import datetime
from django.template.defaultfilters import truncatechars
from django.utils.html import format_html
import pytz
from newsreader.news.collection.base import Builder, Client, Collector, Stream from newsreader.news.collection.base import Builder, Client, Collector, Stream
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.core.models import Post
TWITTER_URL = "https://twitter.com" TWITTER_URL = "https://twitter.com"
@ -9,18 +18,38 @@ class TwitterScheduler:
class TwitterBuilder(Builder): class TwitterBuilder(Builder):
def __enter__(self): rule__type = RuleTypeChoices.twitter
_, stream = self.stream
self.instances = [] def create_posts(self, stream):
self.existing_posts = { data, stream = stream
post.remote_identifier: post
for post in Post.objects.filter( if not data:
rule=stream.rule, rule__type=RuleTypeChoices.twitter return
self.instances = self.build(data, stream.rule)
def build(self, posts, rule):
results = {}
for post in posts:
remote_identifier = post["id_str"]
publication_date = pytz.utc.localize(
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
) )
data = {
"remote_identifier": remote_identifier,
"title": truncatechars(post["full_text"], 40),
"body": format_html(post["full_text"]),
"author": rule.screen_name,
"publication_date": publication_date,
"url": (f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"),
"rule": rule,
} }
return super().__enter__() results[remote_identifier] = Post(**data)
return results.values()
class TwitterStream(Stream): class TwitterStream(Stream):