Alot of plumbing

This commit is contained in:
Sonny Bakker 2020-09-13 13:32:52 +02:00
parent 6be8862a7d
commit 150c492628
16 changed files with 462 additions and 387 deletions

View file

@ -1,13 +1,10 @@
import bleach
from bs4 import BeautifulSoup
from newsreader.news.collection.constants import (
WHITELISTED_ATTRIBUTES,
WHITELISTED_TAGS,
)
from newsreader.news.collection.exceptions import StreamParseException
from newsreader.news.collection.utils import fetch
from newsreader.news.collection.models import CollectionRule
from newsreader.news.core.models import Post
@ -33,7 +30,7 @@ class Stream:
class Client:
"""
Retrieves the data with streams
Retrieves the data through streams
"""
stream = Stream
@ -56,33 +53,24 @@ class Client:
class Builder:
"""
Creates the collected posts
Builds instances of various types
"""
instances = []
stream = None
rule_type = None
payload = None
def __init__(self, stream):
def __init__(self, payload, stream):
self.payload = payload
self.stream = stream
def __enter__(self):
_, stream = self.stream
self.instances = []
self.existing_posts = {
post.remote_identifier: post
for post in Post.objects.filter(rule=stream.rule, rule__type=self.rule_type)
}
self.create_posts(self.stream)
return self
def __exit__(self, *args, **kwargs):
pass
def create_posts(self, stream):
def build(self):
raise NotImplementedError
def sanitize_fragment(self, fragment):
@ -97,10 +85,6 @@ class Builder:
strip_comments=True,
)
def save(self):
for post in self.instances:
post.save()
class Meta:
abstract = True
@ -118,46 +102,59 @@ class Collector:
self.builder = builder if builder else self.builder
def collect(self, rules=None):
with self.client(rules=rules) as client:
for data, stream in client:
with self.builder((data, stream)) as builder:
builder.save()
raise NotImplementedError
class Meta:
abstract = True
class WebsiteStream(Stream):
def __init__(self, url):
self.url = url
class PostBuilder(Builder):
rule_type = None
def read(self):
response = fetch(self.url)
return (self.parse(response.content), self)
def parse(self, payload):
try:
return BeautifulSoup(payload, "lxml")
except TypeError:
raise StreamParseException("Could not parse given HTML")
class URLBuilder(Builder):
def __enter__(self):
return self
self.existing_posts = {
post.remote_identifier: post
for post in Post.objects.filter(
rule=self.stream.rule, rule__type=self.rule_type
)
}
def build(self):
data, stream = self.stream
rule = stream.rule
return super().__enter__()
try:
url = data["feed"]["link"]
except (KeyError, TypeError):
url = None
def save(self):
for post in self.instances:
post.save()
if url:
rule.website_url = url
rule.save()
class Meta:
abstract = True
return rule, url
class PostStream(Stream):
rule_type = None
class PostClient(Client):
stream = PostStream
def __init__(self, rules=[]):
if rules:
self.rules = rules
else:
self.rules = CollectionRule.objects.enabled().filter(
type=self.stream.rule_type
)
def set_rule_error(self, rule, exception):
length = rule._meta.get_field("error").max_length
rule.error = exception.message[-length:]
rule.succeeded = False
class PostCollector(Collector):
def collect(self, rules=None):
with self.client(rules=rules) as client:
for payload, stream in client:
with self.builder(payload, stream) as builder:
builder.build()
builder.save()

View file

@ -9,6 +9,6 @@ class RuleTypeChoices(TextChoices):
class TwitterPostTypeChoices(TextChoices):
photo = "photo", _("Poto")
photo = "photo", _("Photo")
video = "video", _("Video")
animated_gif = "animated_gif", _("GIF")

View file

@ -1,16 +1,12 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urljoin, urlparse
from newsreader.news.collection.base import (
Builder,
Client,
Collector,
Stream,
URLBuilder,
WebsiteStream,
)
from newsreader.news.collection.exceptions import StreamException
from bs4 import BeautifulSoup
from newsreader.news.collection.base import Builder, Client, Collector, Stream
from newsreader.news.collection.exceptions import StreamException, StreamParseException
from newsreader.news.collection.feed import FeedClient
from newsreader.news.collection.utils import fetch
LINK_RELS = [
@ -21,17 +17,45 @@ LINK_RELS = [
]
class WebsiteStream(Stream):
def read(self):
response = fetch(self.rule.website_url)
return self.parse(response.content), self
def parse(self, payload):
try:
return BeautifulSoup(payload, features="lxml")
except TypeError:
raise StreamParseException("Could not parse given HTML")
class WebsiteURLBuilder(Builder):
def build(self):
try:
url = self.payload["feed"]["link"]
except (KeyError, TypeError):
url = None
self.instances = [(self.stream, url)] if url else []
def save(self):
for stream, url in self.instances:
stream.rule.website_url = url
stream.rule.save()
class FaviconBuilder(Builder):
def build(self):
rule, soup = self.stream
rule = self.stream.rule
url = self.parse(soup, rule.website_url)
url = self.parse()
if url:
rule.favicon = url
rule.save()
self.instances = [(rule, url)] if url else []
def parse(self):
soup = self.payload
def parse(self, soup, website_url):
if not soup.head:
return
@ -44,9 +68,9 @@ class FaviconBuilder(Builder):
parsed_url = urlparse(url)
if not parsed_url.scheme and not parsed_url.netloc:
if not website_url:
if not self.stream.rule.website_url:
return
return urljoin(website_url, url)
return urljoin(self.stream.rule.website_url, url)
elif not parsed_url.scheme:
return urljoin(f"https://{parsed_url.netloc}", parsed_url.path)
@ -73,6 +97,11 @@ class FaviconBuilder(Builder):
elif icons:
return icons.pop()
def save(self):
for rule, favicon_url in self.instances:
rule.favicon = favicon_url
rule.save()
class FaviconClient(Client):
stream = WebsiteStream
@ -82,39 +111,35 @@ class FaviconClient(Client):
def __enter__(self):
with ThreadPoolExecutor(max_workers=10) as executor:
futures = {
executor.submit(stream.read): rule for rule, stream in self.streams
}
futures = [executor.submit(stream.read) for stream in self.streams]
for future in as_completed(futures):
rule = futures[future]
try:
response_data, stream = future.result()
payload, stream = future.result()
except StreamException:
continue
yield (rule, response_data)
yield payload, stream
class FaviconCollector(Collector):
feed_client, favicon_client = (FeedClient, FaviconClient)
url_builder, favicon_builder = (URLBuilder, FaviconBuilder)
url_builder, favicon_builder = (WebsiteURLBuilder, FaviconBuilder)
def collect(self, rules=None):
streams = []
with self.feed_client(rules=rules) as client:
for data, stream in client:
with self.url_builder((data, stream)) as builder:
rule, url = builder.build()
for payload, stream in client:
with self.url_builder(payload, stream) as builder:
builder.build()
builder.save()
if not url:
continue
streams.append((rule, WebsiteStream(url)))
if builder.instances:
streams.append(WebsiteStream(stream.rule))
with self.favicon_client(streams) as client:
for rule, data in client:
with self.favicon_builder((rule, data)) as builder:
for payload, stream in client:
with self.favicon_builder(payload, stream) as builder:
builder.build()
builder.save()

View file

@ -10,7 +10,12 @@ import pytz
from feedparser import parse
from newsreader.news.collection.base import Builder, Client, Collector, Stream
from newsreader.news.collection.base import (
PostBuilder,
PostClient,
PostCollector,
PostStream,
)
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.exceptions import (
StreamDeniedException,
@ -19,7 +24,6 @@ from newsreader.news.collection.exceptions import (
StreamParseException,
StreamTimeOutException,
)
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.utils import (
build_publication_date,
fetch,
@ -31,19 +35,10 @@ from newsreader.news.core.models import Post
logger = logging.getLogger(__name__)
class FeedBuilder(Builder):
class FeedBuilder(PostBuilder):
rule__type = RuleTypeChoices.feed
def create_posts(self, stream):
data, stream = stream
with FeedDuplicateHandler(stream.rule) as duplicate_handler:
entries = data.get("entries", [])
instances = self.build(entries, stream.rule)
self.instances = duplicate_handler.check(instances)
def build(self, entries, rule):
def build(self):
field_mapping = {
"id": "remote_identifier",
"title": "title",
@ -52,40 +47,47 @@ class FeedBuilder(Builder):
"published_parsed": "publication_date",
"author": "author",
}
tz = pytz.timezone(self.stream.rule.timezone)
instances = []
tz = pytz.timezone(rule.timezone)
with FeedDuplicateHandler(self.stream.rule) as duplicate_handler:
entries = self.payload.get("entries", [])
for entry in entries:
data = {"rule_id": rule.pk}
for entry in entries:
data = {"rule_id": self.stream.rule.pk}
for field, model_field in field_mapping.items():
if not field in entry:
continue
for field, model_field in field_mapping.items():
if not field in entry:
continue
value = truncate_text(Post, model_field, entry[field])
value = truncate_text(Post, model_field, entry[field])
if field == "published_parsed":
data[model_field] = build_publication_date(value, tz)
elif field == "summary":
data[model_field] = self.sanitize_fragment(value)
else:
data[model_field] = value
if field == "published_parsed":
data[model_field] = build_publication_date(value, tz)
elif field == "summary":
data[model_field] = self.sanitize_fragment(value)
else:
data[model_field] = value
if "content" in entry:
content = self.get_content(entry["content"])
body = data.get("body", "")
if "content" in entry:
content = self.get_content(entry["content"])
body = data.get("body", "")
if not body or len(body) < len(content):
data["body"] = content
if not body or len(body) < len(content):
data["body"] = content
yield Post(**data)
instances.append(Post(**data))
self.instances = duplicate_handler.check(instances)
def get_content(self, items):
content = "\n ".join([item.get("value") for item in items])
return self.sanitize_fragment(content)
class FeedStream(Stream):
class FeedStream(PostStream):
rule_type = RuleTypeChoices.feed
def read(self):
response = fetch(self.rule.url)
@ -99,17 +101,9 @@ class FeedStream(Stream):
raise StreamParseException(response=response, message=message) from e
class FeedClient(Client):
class FeedClient(PostClient):
stream = FeedStream
def __init__(self, rules=[]):
if rules:
self.rules = rules
else:
self.rules = CollectionRule.objects.filter(
enabled=True, type=RuleTypeChoices.feed
)
def __enter__(self):
streams = [self.stream(rule) for rule in self.rules]
@ -120,13 +114,13 @@ class FeedClient(Client):
stream = futures[future]
try:
response_data = future.result()
payload = future.result()
stream.rule.error = None
stream.rule.succeeded = True
stream.rule.last_suceeded = timezone.now()
yield response_data
yield payload
except (StreamNotFoundException, StreamTimeOutException) as e:
logger.warning(f"Request failed for {stream.rule.url}")
@ -142,14 +136,8 @@ class FeedClient(Client):
finally:
stream.rule.save()
def set_rule_error(self, rule, exception):
length = rule._meta.get_field("error").max_length
rule.error = exception.message[-length:]
rule.succeeded = False
class FeedCollector(Collector):
class FeedCollector(PostCollector):
builder = FeedBuilder
client = FeedClient

View file

@ -15,7 +15,12 @@ from django.utils.html import format_html
import pytz
import requests
from newsreader.news.collection.base import Builder, Client, Collector, Stream
from newsreader.news.collection.base import (
PostBuilder,
PostClient,
PostCollector,
PostStream,
)
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.constants import (
WHITELISTED_ATTRIBUTES,
@ -92,21 +97,17 @@ def get_reddit_access_token(code, user):
return response_data["access_token"], response_data["refresh_token"]
class RedditBuilder(Builder):
rule__type = RuleTypeChoices.subreddit
class RedditBuilder(PostBuilder):
rule_type = RuleTypeChoices.subreddit
def create_posts(self, stream):
data, stream = stream
posts = []
def build(self):
results = {}
if not "data" in data or not "children" in data["data"]:
if not "data" in self.payload or not "children" in self.payload["data"]:
return
posts = data["data"]["children"]
self.instances = self.build(posts, stream.rule)
def build(self, posts, rule):
results = {}
posts = self.payload["data"]["children"]
rule = self.stream.rule
for post in posts:
if not "data" in post or post["kind"] != REDDIT_POST:
@ -170,7 +171,9 @@ class RedditBuilder(Builder):
parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
created_date = pytz.utc.localize(parsed_date)
except (OverflowError, OSError):
logging.warning(f"Failed parsing timestamp from {url_fragment}")
logging.warning(
f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}"
)
created_date = timezone.now()
post_data = {
@ -194,52 +197,11 @@ class RedditBuilder(Builder):
results[remote_identifier] = Post(**post_data)
return results.values()
self.instances = results.values()
class RedditScheduler:
max_amount = RATE_LIMIT
max_user_amount = RATE_LIMIT / 4
def __init__(self, subreddits=[]):
if not subreddits:
self.subreddits = CollectionRule.objects.filter(
type=RuleTypeChoices.subreddit,
user__reddit_access_token__isnull=False,
user__reddit_refresh_token__isnull=False,
enabled=True,
).order_by("last_suceeded")[:200]
else:
self.subreddits = subreddits
def get_scheduled_rules(self):
rule_mapping = {}
current_amount = 0
for subreddit in self.subreddits:
user_pk = subreddit.user.pk
if current_amount == self.max_amount:
break
if user_pk in rule_mapping:
max_amount_reached = len(rule_mapping[user_pk]) == self.max_user_amount
if max_amount_reached:
continue
rule_mapping[user_pk].append(subreddit)
current_amount += 1
continue
rule_mapping[user_pk] = [subreddit]
current_amount += 1
return list(rule_mapping.values())
class RedditStream(Stream):
class RedditStream(PostStream):
rule_type = RuleTypeChoices.subreddit
headers = {}
user = None
@ -261,16 +223,13 @@ class RedditStream(Stream):
return response.json()
except JSONDecodeError as e:
raise StreamParseException(
response=response, message=f"Failed parsing json"
response=response, message="Failed parsing json"
) from e
class RedditClient(Client):
class RedditClient(PostClient):
stream = RedditStream
def __init__(self, rules=[]):
self.rules = rules
def __enter__(self):
streams = [[self.stream(rule) for rule in batch] for batch in self.rules]
rate_limitted = False
@ -324,13 +283,49 @@ class RedditClient(Client):
finally:
stream.rule.save()
def set_rule_error(self, rule, exception):
length = rule._meta.get_field("error").max_length
rule.error = exception.message[-length:]
rule.succeeded = False
class RedditCollector(Collector):
class RedditCollector(PostCollector):
builder = RedditBuilder
client = RedditClient
class RedditScheduler:
max_amount = RATE_LIMIT
max_user_amount = RATE_LIMIT / 4
def __init__(self, subreddits=[]):
if not subreddits:
self.subreddits = CollectionRule.objects.filter(
type=RuleTypeChoices.subreddit,
user__reddit_access_token__isnull=False,
user__reddit_refresh_token__isnull=False,
enabled=True,
).order_by("last_suceeded")[:200]
else:
self.subreddits = subreddits
def get_scheduled_rules(self):
rule_mapping = {}
current_amount = 0
for subreddit in self.subreddits:
user_pk = subreddit.user.pk
if current_amount == self.max_amount:
break
if user_pk in rule_mapping:
max_amount_reached = len(rule_mapping[user_pk]) == self.max_user_amount
if max_amount_reached:
continue
rule_mapping[user_pk].append(subreddit)
current_amount += 1
continue
rule_mapping[user_pk] = [subreddit]
current_amount += 1
return list(rule_mapping.values())

View file

@ -1,3 +1,5 @@
from unittest.mock import Mock
from django.test import TestCase
from newsreader.news.collection.favicon import FaviconBuilder
@ -12,8 +14,11 @@ class FaviconBuilderTestCase(TestCase):
def test_simple(self):
rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, simple_mock)) as builder:
with FaviconBuilder(simple_mock, Mock(rule=rule)) as builder:
builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")
@ -22,24 +27,33 @@ class FaviconBuilderTestCase(TestCase):
website_url="https://www.theguardian.com/", favicon=None
)
with FaviconBuilder((rule, mock_without_url)) as builder:
with FaviconBuilder(mock_without_url, Mock(rule=rule)) as builder:
builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, "https://www.theguardian.com/favicon.ico")
def test_without_header(self):
rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, mock_without_header)) as builder:
with FaviconBuilder(mock_without_header, Mock(rule=rule)) as builder:
builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, None)
def test_weird_path(self):
rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, mock_with_weird_path)) as builder:
with FaviconBuilder(mock_with_weird_path, Mock(rule=rule)) as builder:
builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(
rule.favicon, "https://www.theguardian.com/jabadaba/doe/favicon.ico"
@ -48,15 +62,21 @@ class FaviconBuilderTestCase(TestCase):
def test_other_url(self):
rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, mock_with_other_url)) as builder:
with FaviconBuilder(mock_with_other_url, Mock(rule=rule)) as builder:
builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, "https://www.theguardian.com/icon.png")
def test_url_with_favicon_takes_precedence(self):
rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, mock_with_multiple_icons)) as builder:
with FaviconBuilder(mock_with_multiple_icons, Mock(rule=rule)) as builder:
builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")

View file

@ -1,4 +1,4 @@
from unittest.mock import MagicMock
from unittest.mock import Mock
from django.test import TestCase
@ -19,22 +19,22 @@ class FaviconClientTestCase(TestCase):
def test_simple(self):
rule = CollectionRuleFactory()
stream = MagicMock(url="https://www.bbc.com")
stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.return_value = (simple_mock, stream)
with FaviconClient([(rule, stream)]) as client:
for rule, data in client:
self.assertEquals(rule.pk, rule.pk)
self.assertEquals(data, simple_mock)
with FaviconClient([stream]) as client:
for payload, stream in client:
self.assertEquals(stream.rule.pk, rule.pk)
self.assertEquals(payload, simple_mock)
stream.read.assert_called_once_with()
def test_client_catches_stream_exception(self):
rule = CollectionRuleFactory(error=None, succeeded=True)
stream = MagicMock(url="https://www.bbc.com")
stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.side_effect = StreamException
with FaviconClient([(rule, stream)]) as client:
with FaviconClient([stream]) as client:
for rule, data in client:
pass
@ -46,10 +46,10 @@ class FaviconClientTestCase(TestCase):
def test_client_catches_stream_not_found_exception(self):
rule = CollectionRuleFactory(error=None, succeeded=True)
stream = MagicMock(url="https://www.bbc.com")
stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.side_effect = StreamNotFoundException
with FaviconClient([(rule, stream)]) as client:
with FaviconClient([stream]) as client:
for rule, data in client:
pass
@ -61,10 +61,10 @@ class FaviconClientTestCase(TestCase):
def test_client_catches_stream_denied_exception(self):
rule = CollectionRuleFactory(error=None, succeeded=True)
stream = MagicMock(url="https://www.bbc.com")
stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.side_effect = StreamDeniedException
with FaviconClient([(rule, stream)]) as client:
with FaviconClient([stream]) as client:
for rule, data in client:
pass
@ -76,10 +76,10 @@ class FaviconClientTestCase(TestCase):
def test_client_catches_stream_timed_out(self):
rule = CollectionRuleFactory(error=None, succeeded=True)
stream = MagicMock(url="https://www.bbc.com")
stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.side_effect = StreamTimeOutException
with FaviconClient([(rule, stream)]) as client:
with FaviconClient([stream]) as client:
for rule, data in client:
pass

View file

@ -1,4 +1,4 @@
from unittest.mock import MagicMock, patch
from unittest.mock import Mock, patch
from django.test import TestCase
@ -38,8 +38,8 @@ class FaviconCollectorTestCase(TestCase):
def test_simple(self):
rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
self.mocked_website_read.return_value = (website_mock, MagicMock())
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.return_value = (website_mock, Mock(rule=rule))
collector = FaviconCollector()
collector.collect()
@ -54,8 +54,11 @@ class FaviconCollectorTestCase(TestCase):
def test_empty_stream(self):
rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
self.mocked_website_read.return_value = (BeautifulSoup("", "lxml"), MagicMock())
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.return_value = (
BeautifulSoup("", "lxml"),
Mock(rule=rule),
)
collector = FaviconCollector()
collector.collect()
@ -70,7 +73,7 @@ class FaviconCollectorTestCase(TestCase):
def test_not_found(self):
rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamNotFoundException
collector = FaviconCollector()
@ -86,7 +89,7 @@ class FaviconCollectorTestCase(TestCase):
def test_denied(self):
rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamDeniedException
collector = FaviconCollector()
@ -102,7 +105,7 @@ class FaviconCollectorTestCase(TestCase):
def test_forbidden(self):
rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamForbiddenException
collector = FaviconCollector()
@ -118,7 +121,7 @@ class FaviconCollectorTestCase(TestCase):
def test_timed_out(self):
rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamTimeOutException
collector = FaviconCollector()
@ -134,7 +137,7 @@ class FaviconCollectorTestCase(TestCase):
def test_wrong_stream_content_type(self):
rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamParseException
collector = FaviconCollector()

View file

@ -1,5 +1,5 @@
from datetime import date, datetime, time
from unittest.mock import MagicMock
from unittest.mock import Mock
from django.test import TestCase
from django.utils import timezone
@ -24,9 +24,10 @@ class FeedBuilderTestCase(TestCase):
def test_basic_entry(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((simple_mock, mock_stream)) as builder:
with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()
@ -55,9 +56,10 @@ class FeedBuilderTestCase(TestCase):
def test_multiple_entries(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((multiple_mock, mock_stream)) as builder:
with builder(multiple_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = Post.objects.order_by("-publication_date")
@ -116,9 +118,10 @@ class FeedBuilderTestCase(TestCase):
def test_entries_without_remote_identifier(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_without_identifier, mock_stream)) as builder:
with builder(mock_without_identifier, mock_stream) as builder:
builder.build()
builder.save()
posts = Post.objects.order_by("-publication_date")
@ -155,9 +158,10 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_publication_date(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_without_publish_date, mock_stream)) as builder:
with builder(mock_without_publish_date, mock_stream) as builder:
builder.build()
builder.save()
posts = Post.objects.order_by("-publication_date")
@ -187,9 +191,10 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_url(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_without_url, mock_stream)) as builder:
with builder(mock_without_url, mock_stream) as builder:
builder.build()
builder.save()
posts = Post.objects.order_by("-publication_date")
@ -213,9 +218,10 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_body(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_without_body, mock_stream)) as builder:
with builder(mock_without_body, mock_stream) as builder:
builder.build()
builder.save()
posts = Post.objects.order_by("-publication_date")
@ -247,9 +253,10 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_author(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_without_author, mock_stream)) as builder:
with builder(mock_without_author, mock_stream) as builder:
builder.build()
builder.save()
posts = Post.objects.order_by("-publication_date")
@ -275,9 +282,10 @@ class FeedBuilderTestCase(TestCase):
def test_empty_entries(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_without_entries, mock_stream)) as builder:
with builder(mock_without_entries, mock_stream) as builder:
builder.build()
builder.save()
self.assertEquals(Post.objects.count(), 0)
@ -285,7 +293,7 @@ class FeedBuilderTestCase(TestCase):
def test_update_entries(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
existing_first_post = FeedPostFactory.create(
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule
@ -295,7 +303,8 @@ class FeedBuilderTestCase(TestCase):
remote_identifier="a5479c66-8fae-11e9-8422-00163ef6bee7", rule=rule
)
with builder((mock_with_update_entries, mock_stream)) as builder:
with builder(mock_with_update_entries, mock_stream) as builder:
builder.build()
builder.save()
self.assertEquals(Post.objects.count(), 3)
@ -315,9 +324,10 @@ class FeedBuilderTestCase(TestCase):
def test_html_sanitizing(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_with_html, mock_stream)) as builder:
with builder(mock_with_html, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()
@ -337,9 +347,10 @@ class FeedBuilderTestCase(TestCase):
def test_long_author_text_is_truncated(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_with_long_author, mock_stream)) as builder:
with builder(mock_with_long_author, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()
@ -351,9 +362,10 @@ class FeedBuilderTestCase(TestCase):
def test_long_title_text_is_truncated(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_with_long_title, mock_stream)) as builder:
with builder(mock_with_long_title, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()
@ -366,9 +378,10 @@ class FeedBuilderTestCase(TestCase):
def test_long_title_exotic_title(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_with_long_exotic_title, mock_stream)) as builder:
with builder(mock_with_long_exotic_title, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()
@ -381,9 +394,10 @@ class FeedBuilderTestCase(TestCase):
def test_content_detail_is_prioritized_if_longer(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_with_longer_content_detail, mock_stream)) as builder:
with builder(mock_with_longer_content_detail, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()
@ -398,9 +412,10 @@ class FeedBuilderTestCase(TestCase):
def test_content_detail_is_not_prioritized_if_shorter(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_with_shorter_content_detail, mock_stream)) as builder:
with builder(mock_with_shorter_content_detail, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()
@ -414,9 +429,10 @@ class FeedBuilderTestCase(TestCase):
def test_content_detail_is_concatinated(self):
builder = FeedBuilder
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
with builder((mock_with_multiple_content_detail, mock_stream)) as builder:
with builder(mock_with_multiple_content_detail, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()

View file

@ -1,4 +1,4 @@
from unittest.mock import MagicMock, patch
from unittest.mock import Mock, patch
from django.test import TestCase
from django.utils.lorem_ipsum import words
@ -28,7 +28,7 @@ class FeedClientTestCase(TestCase):
def test_client_retrieves_single_rules(self):
rule = FeedFactory.create()
mock_stream = MagicMock(rule=rule)
mock_stream = Mock(rule=rule)
self.mocked_read.return_value = (simple_mock, mock_stream)

View file

@ -1,6 +1,6 @@
from datetime import date, datetime, time
from time import struct_time
from unittest.mock import MagicMock, patch
from unittest.mock import Mock, patch
from django.test import TestCase
from django.utils import timezone
@ -56,7 +56,7 @@ class FeedCollectorTestCase(TestCase):
@freeze_time("2019-10-30 12:30:00")
def test_emtpy_batch(self):
self.mocked_fetch.return_value = MagicMock()
self.mocked_fetch.return_value = Mock()
self.mocked_parse.return_value = empty_mock
rule = FeedFactory()

View file

@ -1,4 +1,4 @@
from unittest.mock import MagicMock, patch
from unittest.mock import Mock, patch
from django.test import TestCase
@ -27,7 +27,7 @@ class FeedStreamTestCase(TestCase):
patch.stopall()
def test_simple_stream(self):
self.mocked_fetch.return_value = MagicMock(content=simple_mock)
self.mocked_fetch.return_value = Mock(content=simple_mock)
rule = FeedFactory()
stream = FeedStream(rule)
@ -95,7 +95,7 @@ class FeedStreamTestCase(TestCase):
@patch("newsreader.news.collection.feed.parse")
def test_stream_raises_parse_exception(self, mocked_parse):
self.mocked_fetch.return_value = MagicMock()
self.mocked_fetch.return_value = Mock()
mocked_parse.side_effect = TypeError
rule = FeedFactory()

View file

@ -1,5 +1,5 @@
from datetime import datetime
from unittest.mock import MagicMock
from unittest.mock import Mock
from django.test import TestCase
@ -20,9 +20,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((simple_mock, mock_stream)) as builder:
with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -65,9 +66,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((empty_mock, mock_stream)) as builder:
with builder(empty_mock, mock_stream) as builder:
builder.build()
builder.save()
self.assertEquals(Post.objects.count(), 0)
@ -76,9 +78,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((unknown_mock, mock_stream)) as builder:
with builder(unknown_mock, mock_stream) as builder:
builder.build()
builder.save()
self.assertEquals(Post.objects.count(), 0)
@ -95,9 +98,10 @@ class RedditBuilderTestCase(TestCase):
)
builder = RedditBuilder
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((simple_mock, mock_stream)) as builder:
with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -132,9 +136,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((unsanitized_mock, mock_stream)) as builder:
with builder(unsanitized_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -149,9 +154,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((author_mock, mock_stream)) as builder:
with builder(author_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -166,9 +172,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((title_mock, mock_stream)) as builder:
with builder(title_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -186,9 +193,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((duplicate_mock, mock_stream)) as builder:
with builder(duplicate_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -200,13 +208,14 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
duplicate_post = RedditPostFactory(
remote_identifier="hm0qct", rule=subreddit, title="foo"
)
with builder((simple_mock, mock_stream)) as builder:
with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -231,9 +240,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((image_mock, mock_stream)) as builder:
with builder(image_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -262,9 +272,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((external_image_mock, mock_stream)) as builder:
with builder(external_image_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -302,9 +313,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((video_mock, mock_stream)) as builder:
with builder(video_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -328,9 +340,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((external_video_mock, mock_stream)) as builder:
with builder(external_video_mock, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()
@ -354,9 +367,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((external_gifv_mock, mock_stream)) as builder:
with builder(external_gifv_mock, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get()
@ -376,9 +390,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((simple_mock, mock_stream)) as builder:
with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save()
post = Post.objects.get(remote_identifier="hngsj8")
@ -400,9 +415,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
mock_stream = Mock(rule=subreddit)
with builder((unknown_mock, mock_stream)) as builder:
with builder(unknown_mock, mock_stream) as builder:
builder.build()
builder.save()
self.assertEquals(Post.objects.count(), 0)

View file

@ -1,10 +1,9 @@
from unittest.mock import MagicMock, patch
from unittest.mock import Mock, patch
from django.test import TestCase
from bs4 import BeautifulSoup
from newsreader.news.collection.base import URLBuilder, WebsiteStream
from newsreader.news.collection.exceptions import (
StreamDeniedException,
StreamException,
@ -13,6 +12,7 @@ from newsreader.news.collection.exceptions import (
StreamParseException,
StreamTimeOutException,
)
from newsreader.news.collection.favicon import WebsiteStream, WebsiteURLBuilder
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock
@ -20,117 +20,125 @@ from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock
class WebsiteStreamTestCase(TestCase):
def setUp(self):
self.patched_fetch = patch("newsreader.news.collection.base.fetch")
self.patched_fetch = patch("newsreader.news.collection.favicon.fetch")
self.mocked_fetch = self.patched_fetch.start()
def tearDown(self):
patch.stopall()
def test_simple(self):
self.mocked_fetch.return_value = MagicMock(content=simple_mock)
self.mocked_fetch.return_value = Mock(content=simple_mock)
rule = CollectionRuleFactory()
stream = WebsiteStream(rule.url)
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule)
return_value = stream.read()
self.mocked_fetch.assert_called_once_with(rule.url)
self.assertEquals(return_value, (BeautifulSoup(simple_mock, "lxml"), stream))
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
self.assertEquals(
return_value, (BeautifulSoup(simple_mock, features="lxml"), stream)
)
def test_raises_exception(self):
self.mocked_fetch.side_effect = StreamException
rule = CollectionRuleFactory()
stream = WebsiteStream(rule.url)
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule)
with self.assertRaises(StreamException):
stream.read()
self.mocked_fetch.assert_called_once_with(rule.url)
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
def test_raises_denied_exception(self):
self.mocked_fetch.side_effect = StreamDeniedException
rule = CollectionRuleFactory()
stream = WebsiteStream(rule.url)
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule)
with self.assertRaises(StreamDeniedException):
stream.read()
self.mocked_fetch.assert_called_once_with(rule.url)
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
def test_raises_stream_not_found_exception(self):
self.mocked_fetch.side_effect = StreamNotFoundException
rule = CollectionRuleFactory()
stream = WebsiteStream(rule.url)
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule)
with self.assertRaises(StreamNotFoundException):
stream.read()
self.mocked_fetch.assert_called_once_with(rule.url)
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
def test_stream_raises_time_out_exception(self):
self.mocked_fetch.side_effect = StreamTimeOutException
rule = CollectionRuleFactory()
stream = WebsiteStream(rule.url)
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule)
with self.assertRaises(StreamTimeOutException):
stream.read()
self.mocked_fetch.assert_called_once_with(rule.url)
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
def test_stream_raises_forbidden_exception(self):
self.mocked_fetch.side_effect = StreamForbiddenException
rule = CollectionRuleFactory()
stream = WebsiteStream(rule.url)
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule)
with self.assertRaises(StreamForbiddenException):
stream.read()
self.mocked_fetch.assert_called_once_with(rule.url)
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
@patch("newsreader.news.collection.base.WebsiteStream.parse")
@patch("newsreader.news.collection.favicon.WebsiteStream.parse")
def test_stream_raises_parse_exception(self, mocked_parse):
self.mocked_fetch.return_value = MagicMock()
self.mocked_fetch.return_value = Mock()
mocked_parse.side_effect = StreamParseException
rule = CollectionRuleFactory()
stream = WebsiteStream(rule.url)
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule)
with self.assertRaises(StreamParseException):
stream.read()
self.mocked_fetch.assert_called_once_with(rule.url)
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
class URLBuilderTestCase(TestCase):
class WebsiteURLBuilderTestCase(TestCase):
def test_simple(self):
initial_rule = CollectionRuleFactory()
with URLBuilder((simple_feed_mock, MagicMock(rule=initial_rule))) as builder:
rule, url = builder.build()
with WebsiteURLBuilder(simple_feed_mock, Mock(rule=initial_rule)) as builder:
builder.build()
builder.save()
self.assertEquals(rule.pk, initial_rule.pk)
self.assertEquals(url, "https://www.bbc.co.uk/news/")
initial_rule.refresh_from_db()
self.assertEquals(initial_rule.website_url, "https://www.bbc.co.uk/news/")
def test_no_link(self):
initial_rule = CollectionRuleFactory()
initial_rule = CollectionRuleFactory(website_url=None)
with URLBuilder(
(feed_mock_without_link, MagicMock(rule=initial_rule))
with WebsiteURLBuilder(
feed_mock_without_link, Mock(rule=initial_rule)
) as builder:
rule, url = builder.build()
builder.build()
builder.save()
self.assertEquals(rule.pk, initial_rule.pk)
self.assertEquals(url, None)
initial_rule.refresh_from_db()
self.assertEquals(initial_rule.website_url, None)
def test_no_data(self):
initial_rule = CollectionRuleFactory()
initial_rule = CollectionRuleFactory(website_url=None)
with URLBuilder((None, MagicMock(rule=initial_rule))) as builder:
rule, url = builder.build()
with WebsiteURLBuilder(None, Mock(rule=initial_rule)) as builder:
builder.build()
builder.save()
self.assertEquals(rule.pk, initial_rule.pk)
self.assertEquals(url, None)
initial_rule.refresh_from_db()
self.assertEquals(initial_rule.website_url, None)

View file

@ -34,7 +34,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile)
with builder((simple_mock, mock_stream)) as builder:
with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -83,7 +84,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile)
with builder((image_mock, mock_stream)) as builder:
with builder(image_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -123,7 +125,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile)
with builder((video_mock, mock_stream)) as builder:
with builder(video_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -165,7 +168,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile)
with builder((video_without_bitrate_mock, mock_stream)) as builder:
with builder(video_without_bitrate_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -186,7 +190,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile)
with builder((gif_mock, mock_stream)) as builder:
with builder(gif_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -211,7 +216,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile)
with builder((retweet_mock, mock_stream)) as builder:
with builder(retweet_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -246,7 +252,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile)
with builder((quoted_mock, mock_stream)) as builder:
with builder(quoted_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -276,7 +283,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile)
with builder(([], mock_stream)) as builder:
with builder([], mock_stream) as builder:
builder.build()
builder.save()
self.assertEquals(Post.objects.count(), 0)
@ -287,7 +295,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile)
with builder((unsanitized_mock, mock_stream)) as builder:
with builder(unsanitized_mock, mock_stream) as builder:
builder.build()
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}

View file

@ -8,7 +8,12 @@ import pytz
from ftfy import fix_text
from newsreader.news.collection.base import Builder, Client, Collector, Stream
from newsreader.news.collection.base import (
PostBuilder,
PostClient,
PostCollector,
PostStream,
)
from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
from newsreader.news.collection.utils import truncate_text
from newsreader.news.core.models import Post
@ -20,25 +25,14 @@ TWITTER_URL = "https://twitter.com"
TWITTER_API_URL = "https://api.twitter.com/1.1"
class TwitterScheduler:
pass
class TwitterBuilder(PostBuilder):
rule_type = RuleTypeChoices.twitter
class TwitterBuilder(Builder):
rule__type = RuleTypeChoices.twitter
def create_posts(self, stream):
data, stream = stream
if not data:
return
self.instances = self.build(data, stream.rule)
def build(self, posts, rule):
def build(self):
results = {}
rule = self.stream.rule
for post in posts:
for post in self.payload:
remote_identifier = post["id_str"]
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
@ -83,7 +77,7 @@ class TwitterBuilder(Builder):
results[remote_identifier] = Post(**data)
return results.values()
self.instances = results.values()
def get_media_entities(self, post):
media_entities = post["extended_entities"]["media"]
@ -133,13 +127,17 @@ class TwitterBuilder(Builder):
return formatted_entities
class TwitterStream(Stream):
class TwitterStream(PostStream):
pass
class TwitterClient(Client):
class TwitterClient(PostClient):
pass
class TwitterCollector(Collector):
class TwitterCollector(PostCollector):
pass
class TwitterScheduler:
pass