Add simple builder scenario
This commit is contained in:
parent
90bbcb4d27
commit
f5d1e9ba5f
8 changed files with 107 additions and 49 deletions
|
|
@ -2,6 +2,7 @@ from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from newsreader.news.collection.exceptions import StreamParseException
|
from newsreader.news.collection.exceptions import StreamParseException
|
||||||
from newsreader.news.collection.utils import fetch
|
from newsreader.news.collection.utils import fetch
|
||||||
|
from newsreader.news.core.models import Post
|
||||||
|
|
||||||
|
|
||||||
class Stream:
|
class Stream:
|
||||||
|
|
@ -54,22 +55,33 @@ class Builder:
|
||||||
|
|
||||||
instances = []
|
instances = []
|
||||||
stream = None
|
stream = None
|
||||||
|
rule_type = None
|
||||||
|
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
|
_, stream = self.stream
|
||||||
|
|
||||||
|
self.instances = []
|
||||||
|
self.existing_posts = {
|
||||||
|
post.remote_identifier: post
|
||||||
|
for post in Post.objects.filter(rule=stream.rule, rule__type=self.rule_type)
|
||||||
|
}
|
||||||
|
|
||||||
self.create_posts(self.stream)
|
self.create_posts(self.stream)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __exit__(self, *args, **kwargs):
|
def __exit__(self, *args, **kwargs):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def create_posts(self, stream):
|
def create_posts(self, stream):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def save(self):
|
def save(self):
|
||||||
pass
|
for post in self.instances:
|
||||||
|
post.save()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
|
|
|
||||||
|
|
@ -37,20 +37,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class FeedBuilder(Builder):
|
class FeedBuilder(Builder):
|
||||||
instances = []
|
rule__type = RuleTypeChoices.feed
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
_, stream = self.stream
|
|
||||||
|
|
||||||
self.instances = []
|
|
||||||
self.existing_posts = {
|
|
||||||
post.remote_identifier: post
|
|
||||||
for post in Post.objects.filter(
|
|
||||||
rule=stream.rule, rule__type=RuleTypeChoices.feed
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return super().__enter__()
|
|
||||||
|
|
||||||
def create_posts(self, stream):
|
def create_posts(self, stream):
|
||||||
data, stream = stream
|
data, stream = stream
|
||||||
|
|
@ -114,10 +101,6 @@ class FeedBuilder(Builder):
|
||||||
content = "\n ".join([item.get("value") for item in items])
|
content = "\n ".join([item.get("value") for item in items])
|
||||||
return self.sanitize_fragment(content)
|
return self.sanitize_fragment(content)
|
||||||
|
|
||||||
def save(self):
|
|
||||||
for post in self.instances:
|
|
||||||
post.save()
|
|
||||||
|
|
||||||
|
|
||||||
class FeedStream(Stream):
|
class FeedStream(Stream):
|
||||||
def read(self):
|
def read(self):
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,29 @@
|
||||||
|
# Generated by Django 3.0.7 on 2020-08-07 18:30
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [("collection", "0008_collectionrule_type")]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="collectionrule",
|
||||||
|
name="screen_name",
|
||||||
|
field=models.CharField(blank=True, max_length=255, null=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="collectionrule",
|
||||||
|
name="type",
|
||||||
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("feed", "Feed"),
|
||||||
|
("subreddit", "Subreddit"),
|
||||||
|
("twitter", "Twitter"),
|
||||||
|
],
|
||||||
|
default="feed",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
@ -57,11 +57,15 @@ class CollectionRule(TimeStampedModel):
|
||||||
on_delete=models.CASCADE,
|
on_delete=models.CASCADE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Twitter
|
||||||
|
screen_name = models.CharField(max_length=255, blank=True, null=True)
|
||||||
|
|
||||||
objects = CollectionRuleQuerySet.as_manager()
|
objects = CollectionRuleQuerySet.as_manager()
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
|
# TODO add twitter url
|
||||||
@property
|
@property
|
||||||
def update_url(self):
|
def update_url(self):
|
||||||
if self.type == RuleTypeChoices.subreddit:
|
if self.type == RuleTypeChoices.subreddit:
|
||||||
|
|
|
||||||
|
|
@ -94,18 +94,7 @@ def get_reddit_access_token(code, user):
|
||||||
|
|
||||||
|
|
||||||
class RedditBuilder(Builder):
|
class RedditBuilder(Builder):
|
||||||
def __enter__(self):
|
rule__type = RuleTypeChoices.subreddit
|
||||||
_, stream = self.stream
|
|
||||||
|
|
||||||
self.instances = []
|
|
||||||
self.existing_posts = {
|
|
||||||
post.remote_identifier: post
|
|
||||||
for post in Post.objects.filter(
|
|
||||||
rule=stream.rule, rule__type=RuleTypeChoices.subreddit
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return super().__enter__()
|
|
||||||
|
|
||||||
def create_posts(self, stream):
|
def create_posts(self, stream):
|
||||||
data, stream = stream
|
data, stream = stream
|
||||||
|
|
@ -218,10 +207,6 @@ class RedditBuilder(Builder):
|
||||||
|
|
||||||
return results.values()
|
return results.values()
|
||||||
|
|
||||||
def save(self):
|
|
||||||
for post in self.instances:
|
|
||||||
post.save()
|
|
||||||
|
|
||||||
|
|
||||||
class RedditScheduler:
|
class RedditScheduler:
|
||||||
max_amount = RATE_LIMIT
|
max_amount = RATE_LIMIT
|
||||||
|
|
|
||||||
|
|
@ -32,3 +32,4 @@ class SubredditFactory(CollectionRuleFactory):
|
||||||
|
|
||||||
class TwitterProfileFactory(CollectionRuleFactory):
|
class TwitterProfileFactory(CollectionRuleFactory):
|
||||||
type = RuleTypeChoices.twitter
|
type = RuleTypeChoices.twitter
|
||||||
|
screen_name = factory.Faker("user_name")
|
||||||
|
|
|
||||||
|
|
@ -18,11 +18,10 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.maxDiff = None
|
self.maxDiff = None
|
||||||
|
|
||||||
@skip("Not implemented")
|
|
||||||
def test_simple_post(self):
|
def test_simple_post(self):
|
||||||
builder = TwitterBuilder
|
builder = TwitterBuilder
|
||||||
|
|
||||||
profile = TwitterProfileFactory()
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder((simple_mock, mock_stream)) as builder:
|
with builder((simple_mock, mock_stream)) as builder:
|
||||||
|
|
@ -39,10 +38,10 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
|
full_text = "@ArieNeoSC Here you go, goodnight!\n\nhttps://t.co/trAcIxBMlX"
|
||||||
|
|
||||||
self.assertEquals(post.rule, profile)
|
self.assertEquals(post.rule, profile)
|
||||||
self.assertEquals(post.title, truncatechars(full_text, 20))
|
self.assertEquals(post.title, truncatechars(full_text, 40))
|
||||||
self.assertEquals(post.body, format_html(full_text))
|
self.assertEquals(post.body, format_html(full_text))
|
||||||
|
|
||||||
self.assertEquals(post.author, "Star Citizen")
|
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
post.url, f"{TWITTER_URL}/RobertsSpaceInd/1291528756373286914"
|
post.url, f"{TWITTER_URL}/RobertsSpaceInd/1291528756373286914"
|
||||||
)
|
)
|
||||||
|
|
@ -50,6 +49,22 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
post.publication_date, pytz.utc.localize(datetime(2020, 8, 7, 0, 17, 5))
|
post.publication_date, pytz.utc.localize(datetime(2020, 8, 7, 0, 17, 5))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
post = posts["1288550304095416320"]
|
||||||
|
|
||||||
|
full_text = "@RelicCcb Hi Christoper, we have checked the status of your investigation and it is still ongoing."
|
||||||
|
|
||||||
|
self.assertEquals(post.rule, profile)
|
||||||
|
self.assertEquals(post.title, truncatechars(full_text, 40))
|
||||||
|
self.assertEquals(post.body, format_html(full_text))
|
||||||
|
|
||||||
|
self.assertEquals(post.author, "RobertsSpaceInd")
|
||||||
|
self.assertEquals(
|
||||||
|
post.url, f"{TWITTER_URL}/RobertsSpaceInd/1288550304095416320"
|
||||||
|
)
|
||||||
|
self.assertEquals(
|
||||||
|
post.publication_date, pytz.utc.localize(datetime(2020, 7, 29, 19, 1, 47))
|
||||||
|
)
|
||||||
|
|
||||||
# Note that only one media type can be uploaded to an Tweet
|
# Note that only one media type can be uploaded to an Tweet
|
||||||
# see https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/extended-entities-object
|
# see https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/extended-entities-object
|
||||||
@skip("Not implemented")
|
@skip("Not implemented")
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,13 @@
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from django.template.defaultfilters import truncatechars
|
||||||
|
from django.utils.html import format_html
|
||||||
|
|
||||||
|
import pytz
|
||||||
|
|
||||||
from newsreader.news.collection.base import Builder, Client, Collector, Stream
|
from newsreader.news.collection.base import Builder, Client, Collector, Stream
|
||||||
|
from newsreader.news.collection.choices import RuleTypeChoices
|
||||||
|
from newsreader.news.core.models import Post
|
||||||
|
|
||||||
|
|
||||||
TWITTER_URL = "https://twitter.com"
|
TWITTER_URL = "https://twitter.com"
|
||||||
|
|
@ -9,18 +18,38 @@ class TwitterScheduler:
|
||||||
|
|
||||||
|
|
||||||
class TwitterBuilder(Builder):
|
class TwitterBuilder(Builder):
|
||||||
def __enter__(self):
|
rule__type = RuleTypeChoices.twitter
|
||||||
_, stream = self.stream
|
|
||||||
|
|
||||||
self.instances = []
|
def create_posts(self, stream):
|
||||||
self.existing_posts = {
|
data, stream = stream
|
||||||
post.remote_identifier: post
|
|
||||||
for post in Post.objects.filter(
|
if not data:
|
||||||
rule=stream.rule, rule__type=RuleTypeChoices.twitter
|
return
|
||||||
|
|
||||||
|
self.instances = self.build(data, stream.rule)
|
||||||
|
|
||||||
|
def build(self, posts, rule):
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for post in posts:
|
||||||
|
remote_identifier = post["id_str"]
|
||||||
|
publication_date = pytz.utc.localize(
|
||||||
|
datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
||||||
)
|
)
|
||||||
}
|
|
||||||
|
|
||||||
return super().__enter__()
|
data = {
|
||||||
|
"remote_identifier": remote_identifier,
|
||||||
|
"title": truncatechars(post["full_text"], 40),
|
||||||
|
"body": format_html(post["full_text"]),
|
||||||
|
"author": rule.screen_name,
|
||||||
|
"publication_date": publication_date,
|
||||||
|
"url": (f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"),
|
||||||
|
"rule": rule,
|
||||||
|
}
|
||||||
|
|
||||||
|
results[remote_identifier] = Post(**data)
|
||||||
|
|
||||||
|
return results.values()
|
||||||
|
|
||||||
|
|
||||||
class TwitterStream(Stream):
|
class TwitterStream(Stream):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue