Add simple builder scenario
This commit is contained in:
parent
90bbcb4d27
commit
f5d1e9ba5f
8 changed files with 107 additions and 49 deletions
|
|
@ -2,6 +2,7 @@ from bs4 import BeautifulSoup
|
|||
|
||||
from newsreader.news.collection.exceptions import StreamParseException
|
||||
from newsreader.news.collection.utils import fetch
|
||||
from newsreader.news.core.models import Post
|
||||
|
||||
|
||||
class Stream:
|
||||
|
|
@ -54,22 +55,33 @@ class Builder:
|
|||
|
||||
instances = []
|
||||
stream = None
|
||||
rule_type = None
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
|
||||
def __enter__(self):
|
||||
_, stream = self.stream
|
||||
|
||||
self.instances = []
|
||||
self.existing_posts = {
|
||||
post.remote_identifier: post
|
||||
for post in Post.objects.filter(rule=stream.rule, rule__type=self.rule_type)
|
||||
}
|
||||
|
||||
self.create_posts(self.stream)
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def create_posts(self, stream):
|
||||
pass
|
||||
raise NotImplementedError
|
||||
|
||||
def save(self):
|
||||
pass
|
||||
for post in self.instances:
|
||||
post.save()
|
||||
|
||||
class Meta:
|
||||
abstract = True
|
||||
|
|
|
|||
|
|
@ -37,20 +37,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class FeedBuilder(Builder):
|
||||
instances = []
|
||||
|
||||
def __enter__(self):
|
||||
_, stream = self.stream
|
||||
|
||||
self.instances = []
|
||||
self.existing_posts = {
|
||||
post.remote_identifier: post
|
||||
for post in Post.objects.filter(
|
||||
rule=stream.rule, rule__type=RuleTypeChoices.feed
|
||||
)
|
||||
}
|
||||
|
||||
return super().__enter__()
|
||||
rule__type = RuleTypeChoices.feed
|
||||
|
||||
def create_posts(self, stream):
|
||||
data, stream = stream
|
||||
|
|
@ -114,10 +101,6 @@ class FeedBuilder(Builder):
|
|||
content = "\n ".join([item.get("value") for item in items])
|
||||
return self.sanitize_fragment(content)
|
||||
|
||||
def save(self):
|
||||
for post in self.instances:
|
||||
post.save()
|
||||
|
||||
|
||||
class FeedStream(Stream):
|
||||
def read(self):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
# Generated by Django 3.0.7 on 2020-08-07 18:30
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [("collection", "0008_collectionrule_type")]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="collectionrule",
|
||||
name="screen_name",
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name="collectionrule",
|
||||
name="type",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("feed", "Feed"),
|
||||
("subreddit", "Subreddit"),
|
||||
("twitter", "Twitter"),
|
||||
],
|
||||
default="feed",
|
||||
max_length=20,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
@ -57,11 +57,15 @@ class CollectionRule(TimeStampedModel):
|
|||
on_delete=models.CASCADE,
|
||||
)
|
||||
|
||||
# Twitter
|
||||
screen_name = models.CharField(max_length=255, blank=True, null=True)
|
||||
|
||||
objects = CollectionRuleQuerySet.as_manager()
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
# TODO add twitter url
|
||||
@property
|
||||
def update_url(self):
|
||||
if self.type == RuleTypeChoices.subreddit:
|
||||
|
|
|
|||
|
|
@ -94,18 +94,7 @@ def get_reddit_access_token(code, user):
|
|||
|
||||
|
||||
class RedditBuilder(Builder):
|
||||
def __enter__(self):
|
||||
_, stream = self.stream
|
||||
|
||||
self.instances = []
|
||||
self.existing_posts = {
|
||||
post.remote_identifier: post
|
||||
for post in Post.objects.filter(
|
||||
rule=stream.rule, rule__type=RuleTypeChoices.subreddit
|
||||
)
|
||||
}
|
||||
|
||||
return super().__enter__()
|
||||
rule__type = RuleTypeChoices.subreddit
|
||||
|
||||
def create_posts(self, stream):
|
||||
data, stream = stream
|
||||
|
|
@ -218,10 +207,6 @@ class RedditBuilder(Builder):
|
|||
|
||||
return results.values()
|
||||
|
||||
def save(self):
|
||||
for post in self.instances:
|
||||
post.save()
|
||||
|
||||
|
||||
class RedditScheduler:
|
||||
max_amount = RATE_LIMIT
|
||||
|
|
|
|||
|
|
@ -32,3 +32,4 @@ class SubredditFactory(CollectionRuleFactory):
|
|||
|
||||
class TwitterProfileFactory(CollectionRuleFactory):
|
||||
type = RuleTypeChoices.twitter
|
||||
screen_name = factory.Faker("user_name")
|
||||
|
|
|
|||
|
|
@ -18,11 +18,10 @@ class TwitterBuilderTestCase(TestCase):
|
|||
def setUp(self):
|
||||
self.maxDiff = None
|
||||
|
||||
@skip("Not implemented")
|
||||
def test_simple_post(self):
|
||||
builder = TwitterBuilder
|
||||
|
||||
profile = TwitterProfileFactory()
|
||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||
mock_stream = MagicMock(rule=profile)
|
||||
|
||||
with builder((simple_mock, mock_stream)) as builder:
|
||||
|
|
@ -39,10 +38,10 @@ class TwitterBuilderTestCase(TestCase):
|
|||
full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
|
||||
|
||||
self.assertEquals(post.rule, profile)
|
||||
self.assertEquals(post.title, truncatechars(full_text, 20))
|
||||
self.assertEquals(post.title, truncatechars(full_text, 40))
|
||||
self.assertEquals(post.body, format_html(full_text))
|
||||
|
||||
self.assertEquals(post.author, "Star Citizen")
|
||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||
self.assertEquals(
|
||||
post.url, f"{TWITTER_URL}/RobertsSpaceInd/1291528756373286914"
|
||||
)
|
||||
|
|
@ -50,6 +49,22 @@ class TwitterBuilderTestCase(TestCase):
|
|||
post.publication_date, pytz.utc.localize(datetime(2020, 8, 7, 0, 17, 5))
|
||||
)
|
||||
|
||||
post = posts["1288550304095416320"]
|
||||
|
||||
full_text = "@RelicCcb Hi Christoper, we have checked the status of your investigation and it is still ongoing."
|
||||
|
||||
self.assertEquals(post.rule, profile)
|
||||
self.assertEquals(post.title, truncatechars(full_text, 40))
|
||||
self.assertEquals(post.body, format_html(full_text))
|
||||
|
||||
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||
self.assertEquals(
|
||||
post.url, f"{TWITTER_URL}/RobertsSpaceInd/1288550304095416320"
|
||||
)
|
||||
self.assertEquals(
|
||||
post.publication_date, pytz.utc.localize(datetime(2020, 7, 29, 19, 1, 47))
|
||||
)
|
||||
|
||||
# Note that only one media type can be uploaded to an Tweet
|
||||
# see https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/extended-entities-object
|
||||
@skip("Not implemented")
|
||||
|
|
|
|||
|
|
@ -1,4 +1,13 @@
|
|||
from datetime import datetime
|
||||
|
||||
from django.template.defaultfilters import truncatechars
|
||||
from django.utils.html import format_html
|
||||
|
||||
import pytz
|
||||
|
||||
from newsreader.news.collection.base import Builder, Client, Collector, Stream
|
||||
from newsreader.news.collection.choices import RuleTypeChoices
|
||||
from newsreader.news.core.models import Post
|
||||
|
||||
|
||||
TWITTER_URL = "https://twitter.com"
|
||||
|
|
@ -9,18 +18,38 @@ class TwitterScheduler:
|
|||
|
||||
|
||||
class TwitterBuilder(Builder):
|
||||
def __enter__(self):
|
||||
_, stream = self.stream
|
||||
rule__type = RuleTypeChoices.twitter
|
||||
|
||||
self.instances = []
|
||||
self.existing_posts = {
|
||||
post.remote_identifier: post
|
||||
for post in Post.objects.filter(
|
||||
rule=stream.rule, rule__type=RuleTypeChoices.twitter
|
||||
def create_posts(self, stream):
|
||||
data, stream = stream
|
||||
|
||||
if not data:
|
||||
return
|
||||
|
||||
self.instances = self.build(data, stream.rule)
|
||||
|
||||
def build(self, posts, rule):
|
||||
results = {}
|
||||
|
||||
for post in posts:
|
||||
remote_identifier = post["id_str"]
|
||||
publication_date = pytz.utc.localize(
|
||||
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
||||
)
|
||||
|
||||
data = {
|
||||
"remote_identifier": remote_identifier,
|
||||
"title": truncatechars(post["full_text"], 40),
|
||||
"body": format_html(post["full_text"]),
|
||||
"author": rule.screen_name,
|
||||
"publication_date": publication_date,
|
||||
"url": (f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"),
|
||||
"rule": rule,
|
||||
}
|
||||
|
||||
return super().__enter__()
|
||||
results[remote_identifier] = Post(**data)
|
||||
|
||||
return results.values()
|
||||
|
||||
|
||||
class TwitterStream(Stream):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue