Alot of plumbing

This commit is contained in:
Sonny Bakker 2020-09-13 13:32:52 +02:00
parent 6be8862a7d
commit 150c492628
16 changed files with 462 additions and 387 deletions

View file

@ -1,13 +1,10 @@
import bleach import bleach
from bs4 import BeautifulSoup
from newsreader.news.collection.constants import ( from newsreader.news.collection.constants import (
WHITELISTED_ATTRIBUTES, WHITELISTED_ATTRIBUTES,
WHITELISTED_TAGS, WHITELISTED_TAGS,
) )
from newsreader.news.collection.exceptions import StreamParseException from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.utils import fetch
from newsreader.news.core.models import Post from newsreader.news.core.models import Post
@ -33,7 +30,7 @@ class Stream:
class Client: class Client:
""" """
Retrieves the data with streams Retrieves the data through streams
""" """
stream = Stream stream = Stream
@ -56,33 +53,24 @@ class Client:
class Builder: class Builder:
""" """
Creates the collected posts Builds instances of various types
""" """
instances = [] instances = []
stream = None stream = None
rule_type = None payload = None
def __init__(self, stream): def __init__(self, payload, stream):
self.payload = payload
self.stream = stream self.stream = stream
def __enter__(self): def __enter__(self):
_, stream = self.stream
self.instances = []
self.existing_posts = {
post.remote_identifier: post
for post in Post.objects.filter(rule=stream.rule, rule__type=self.rule_type)
}
self.create_posts(self.stream)
return self return self
def __exit__(self, *args, **kwargs): def __exit__(self, *args, **kwargs):
pass pass
def create_posts(self, stream): def build(self):
raise NotImplementedError raise NotImplementedError
def sanitize_fragment(self, fragment): def sanitize_fragment(self, fragment):
@ -97,10 +85,6 @@ class Builder:
strip_comments=True, strip_comments=True,
) )
def save(self):
for post in self.instances:
post.save()
class Meta: class Meta:
abstract = True abstract = True
@ -118,46 +102,59 @@ class Collector:
self.builder = builder if builder else self.builder self.builder = builder if builder else self.builder
def collect(self, rules=None): def collect(self, rules=None):
with self.client(rules=rules) as client: raise NotImplementedError
for data, stream in client:
with self.builder((data, stream)) as builder:
builder.save()
class Meta: class Meta:
abstract = True abstract = True
class WebsiteStream(Stream): class PostBuilder(Builder):
def __init__(self, url): rule_type = None
self.url = url
def read(self):
response = fetch(self.url)
return (self.parse(response.content), self)
def parse(self, payload):
try:
return BeautifulSoup(payload, "lxml")
except TypeError:
raise StreamParseException("Could not parse given HTML")
class URLBuilder(Builder):
def __enter__(self): def __enter__(self):
return self self.existing_posts = {
post.remote_identifier: post
for post in Post.objects.filter(
rule=self.stream.rule, rule__type=self.rule_type
)
}
def build(self): return super().__enter__()
data, stream = self.stream
rule = stream.rule
try: def save(self):
url = data["feed"]["link"] for post in self.instances:
except (KeyError, TypeError): post.save()
url = None
if url: class Meta:
rule.website_url = url abstract = True
rule.save()
return rule, url
class PostStream(Stream):
rule_type = None
class PostClient(Client):
stream = PostStream
def __init__(self, rules=[]):
if rules:
self.rules = rules
else:
self.rules = CollectionRule.objects.enabled().filter(
type=self.stream.rule_type
)
def set_rule_error(self, rule, exception):
length = rule._meta.get_field("error").max_length
rule.error = exception.message[-length:]
rule.succeeded = False
class PostCollector(Collector):
def collect(self, rules=None):
with self.client(rules=rules) as client:
for payload, stream in client:
with self.builder(payload, stream) as builder:
builder.build()
builder.save()

View file

@ -9,6 +9,6 @@ class RuleTypeChoices(TextChoices):
class TwitterPostTypeChoices(TextChoices): class TwitterPostTypeChoices(TextChoices):
photo = "photo", _("Poto") photo = "photo", _("Photo")
video = "video", _("Video") video = "video", _("Video")
animated_gif = "animated_gif", _("GIF") animated_gif = "animated_gif", _("GIF")

View file

@ -1,16 +1,12 @@
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urljoin, urlparse from urllib.parse import urljoin, urlparse
from newsreader.news.collection.base import ( from bs4 import BeautifulSoup
Builder,
Client, from newsreader.news.collection.base import Builder, Client, Collector, Stream
Collector, from newsreader.news.collection.exceptions import StreamException, StreamParseException
Stream,
URLBuilder,
WebsiteStream,
)
from newsreader.news.collection.exceptions import StreamException
from newsreader.news.collection.feed import FeedClient from newsreader.news.collection.feed import FeedClient
from newsreader.news.collection.utils import fetch
LINK_RELS = [ LINK_RELS = [
@ -21,17 +17,45 @@ LINK_RELS = [
] ]
class WebsiteStream(Stream):
def read(self):
response = fetch(self.rule.website_url)
return self.parse(response.content), self
def parse(self, payload):
try:
return BeautifulSoup(payload, features="lxml")
except TypeError:
raise StreamParseException("Could not parse given HTML")
class WebsiteURLBuilder(Builder):
def build(self):
try:
url = self.payload["feed"]["link"]
except (KeyError, TypeError):
url = None
self.instances = [(self.stream, url)] if url else []
def save(self):
for stream, url in self.instances:
stream.rule.website_url = url
stream.rule.save()
class FaviconBuilder(Builder): class FaviconBuilder(Builder):
def build(self): def build(self):
rule, soup = self.stream rule = self.stream.rule
url = self.parse(soup, rule.website_url) url = self.parse()
if url: self.instances = [(rule, url)] if url else []
rule.favicon = url
rule.save() def parse(self):
soup = self.payload
def parse(self, soup, website_url):
if not soup.head: if not soup.head:
return return
@ -44,9 +68,9 @@ class FaviconBuilder(Builder):
parsed_url = urlparse(url) parsed_url = urlparse(url)
if not parsed_url.scheme and not parsed_url.netloc: if not parsed_url.scheme and not parsed_url.netloc:
if not website_url: if not self.stream.rule.website_url:
return return
return urljoin(website_url, url) return urljoin(self.stream.rule.website_url, url)
elif not parsed_url.scheme: elif not parsed_url.scheme:
return urljoin(f"https://{parsed_url.netloc}", parsed_url.path) return urljoin(f"https://{parsed_url.netloc}", parsed_url.path)
@ -73,6 +97,11 @@ class FaviconBuilder(Builder):
elif icons: elif icons:
return icons.pop() return icons.pop()
def save(self):
for rule, favicon_url in self.instances:
rule.favicon = favicon_url
rule.save()
class FaviconClient(Client): class FaviconClient(Client):
stream = WebsiteStream stream = WebsiteStream
@ -82,39 +111,35 @@ class FaviconClient(Client):
def __enter__(self): def __enter__(self):
with ThreadPoolExecutor(max_workers=10) as executor: with ThreadPoolExecutor(max_workers=10) as executor:
futures = { futures = [executor.submit(stream.read) for stream in self.streams]
executor.submit(stream.read): rule for rule, stream in self.streams
}
for future in as_completed(futures): for future in as_completed(futures):
rule = futures[future]
try: try:
response_data, stream = future.result() payload, stream = future.result()
except StreamException: except StreamException:
continue continue
yield (rule, response_data) yield payload, stream
class FaviconCollector(Collector): class FaviconCollector(Collector):
feed_client, favicon_client = (FeedClient, FaviconClient) feed_client, favicon_client = (FeedClient, FaviconClient)
url_builder, favicon_builder = (URLBuilder, FaviconBuilder) url_builder, favicon_builder = (WebsiteURLBuilder, FaviconBuilder)
def collect(self, rules=None): def collect(self, rules=None):
streams = [] streams = []
with self.feed_client(rules=rules) as client: with self.feed_client(rules=rules) as client:
for data, stream in client: for payload, stream in client:
with self.url_builder((data, stream)) as builder: with self.url_builder(payload, stream) as builder:
rule, url = builder.build() builder.build()
builder.save()
if not url: if builder.instances:
continue streams.append(WebsiteStream(stream.rule))
streams.append((rule, WebsiteStream(url)))
with self.favicon_client(streams) as client: with self.favicon_client(streams) as client:
for rule, data in client: for payload, stream in client:
with self.favicon_builder((rule, data)) as builder: with self.favicon_builder(payload, stream) as builder:
builder.build() builder.build()
builder.save()

View file

@ -10,7 +10,12 @@ import pytz
from feedparser import parse from feedparser import parse
from newsreader.news.collection.base import Builder, Client, Collector, Stream from newsreader.news.collection.base import (
PostBuilder,
PostClient,
PostCollector,
PostStream,
)
from newsreader.news.collection.choices import RuleTypeChoices from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.exceptions import ( from newsreader.news.collection.exceptions import (
StreamDeniedException, StreamDeniedException,
@ -19,7 +24,6 @@ from newsreader.news.collection.exceptions import (
StreamParseException, StreamParseException,
StreamTimeOutException, StreamTimeOutException,
) )
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.utils import ( from newsreader.news.collection.utils import (
build_publication_date, build_publication_date,
fetch, fetch,
@ -31,19 +35,10 @@ from newsreader.news.core.models import Post
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class FeedBuilder(Builder): class FeedBuilder(PostBuilder):
rule__type = RuleTypeChoices.feed rule__type = RuleTypeChoices.feed
def create_posts(self, stream): def build(self):
data, stream = stream
with FeedDuplicateHandler(stream.rule) as duplicate_handler:
entries = data.get("entries", [])
instances = self.build(entries, stream.rule)
self.instances = duplicate_handler.check(instances)
def build(self, entries, rule):
field_mapping = { field_mapping = {
"id": "remote_identifier", "id": "remote_identifier",
"title": "title", "title": "title",
@ -52,11 +47,14 @@ class FeedBuilder(Builder):
"published_parsed": "publication_date", "published_parsed": "publication_date",
"author": "author", "author": "author",
} }
tz = pytz.timezone(self.stream.rule.timezone)
instances = []
tz = pytz.timezone(rule.timezone) with FeedDuplicateHandler(self.stream.rule) as duplicate_handler:
entries = self.payload.get("entries", [])
for entry in entries: for entry in entries:
data = {"rule_id": rule.pk} data = {"rule_id": self.stream.rule.pk}
for field, model_field in field_mapping.items(): for field, model_field in field_mapping.items():
if not field in entry: if not field in entry:
@ -78,14 +76,18 @@ class FeedBuilder(Builder):
if not body or len(body) < len(content): if not body or len(body) < len(content):
data["body"] = content data["body"] = content
yield Post(**data) instances.append(Post(**data))
self.instances = duplicate_handler.check(instances)
def get_content(self, items): def get_content(self, items):
content = "\n ".join([item.get("value") for item in items]) content = "\n ".join([item.get("value") for item in items])
return self.sanitize_fragment(content) return self.sanitize_fragment(content)
class FeedStream(Stream): class FeedStream(PostStream):
rule_type = RuleTypeChoices.feed
def read(self): def read(self):
response = fetch(self.rule.url) response = fetch(self.rule.url)
@ -99,17 +101,9 @@ class FeedStream(Stream):
raise StreamParseException(response=response, message=message) from e raise StreamParseException(response=response, message=message) from e
class FeedClient(Client): class FeedClient(PostClient):
stream = FeedStream stream = FeedStream
def __init__(self, rules=[]):
if rules:
self.rules = rules
else:
self.rules = CollectionRule.objects.filter(
enabled=True, type=RuleTypeChoices.feed
)
def __enter__(self): def __enter__(self):
streams = [self.stream(rule) for rule in self.rules] streams = [self.stream(rule) for rule in self.rules]
@ -120,13 +114,13 @@ class FeedClient(Client):
stream = futures[future] stream = futures[future]
try: try:
response_data = future.result() payload = future.result()
stream.rule.error = None stream.rule.error = None
stream.rule.succeeded = True stream.rule.succeeded = True
stream.rule.last_suceeded = timezone.now() stream.rule.last_suceeded = timezone.now()
yield response_data yield payload
except (StreamNotFoundException, StreamTimeOutException) as e: except (StreamNotFoundException, StreamTimeOutException) as e:
logger.warning(f"Request failed for {stream.rule.url}") logger.warning(f"Request failed for {stream.rule.url}")
@ -142,14 +136,8 @@ class FeedClient(Client):
finally: finally:
stream.rule.save() stream.rule.save()
def set_rule_error(self, rule, exception):
length = rule._meta.get_field("error").max_length
rule.error = exception.message[-length:] class FeedCollector(PostCollector):
rule.succeeded = False
class FeedCollector(Collector):
builder = FeedBuilder builder = FeedBuilder
client = FeedClient client = FeedClient

View file

@ -15,7 +15,12 @@ from django.utils.html import format_html
import pytz import pytz
import requests import requests
from newsreader.news.collection.base import Builder, Client, Collector, Stream from newsreader.news.collection.base import (
PostBuilder,
PostClient,
PostCollector,
PostStream,
)
from newsreader.news.collection.choices import RuleTypeChoices from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.constants import ( from newsreader.news.collection.constants import (
WHITELISTED_ATTRIBUTES, WHITELISTED_ATTRIBUTES,
@ -92,21 +97,17 @@ def get_reddit_access_token(code, user):
return response_data["access_token"], response_data["refresh_token"] return response_data["access_token"], response_data["refresh_token"]
class RedditBuilder(Builder): class RedditBuilder(PostBuilder):
rule__type = RuleTypeChoices.subreddit rule_type = RuleTypeChoices.subreddit
def create_posts(self, stream): def build(self):
data, stream = stream results = {}
posts = []
if not "data" in data or not "children" in data["data"]: if not "data" in self.payload or not "children" in self.payload["data"]:
return return
posts = data["data"]["children"] posts = self.payload["data"]["children"]
self.instances = self.build(posts, stream.rule) rule = self.stream.rule
def build(self, posts, rule):
results = {}
for post in posts: for post in posts:
if not "data" in post or post["kind"] != REDDIT_POST: if not "data" in post or post["kind"] != REDDIT_POST:
@ -170,7 +171,9 @@ class RedditBuilder(Builder):
parsed_date = datetime.fromtimestamp(post["data"]["created_utc"]) parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
created_date = pytz.utc.localize(parsed_date) created_date = pytz.utc.localize(parsed_date)
except (OverflowError, OSError): except (OverflowError, OSError):
logging.warning(f"Failed parsing timestamp from {url_fragment}") logging.warning(
f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}"
)
created_date = timezone.now() created_date = timezone.now()
post_data = { post_data = {
@ -194,52 +197,11 @@ class RedditBuilder(Builder):
results[remote_identifier] = Post(**post_data) results[remote_identifier] = Post(**post_data)
return results.values() self.instances = results.values()
class RedditScheduler: class RedditStream(PostStream):
max_amount = RATE_LIMIT rule_type = RuleTypeChoices.subreddit
max_user_amount = RATE_LIMIT / 4
def __init__(self, subreddits=[]):
if not subreddits:
self.subreddits = CollectionRule.objects.filter(
type=RuleTypeChoices.subreddit,
user__reddit_access_token__isnull=False,
user__reddit_refresh_token__isnull=False,
enabled=True,
).order_by("last_suceeded")[:200]
else:
self.subreddits = subreddits
def get_scheduled_rules(self):
rule_mapping = {}
current_amount = 0
for subreddit in self.subreddits:
user_pk = subreddit.user.pk
if current_amount == self.max_amount:
break
if user_pk in rule_mapping:
max_amount_reached = len(rule_mapping[user_pk]) == self.max_user_amount
if max_amount_reached:
continue
rule_mapping[user_pk].append(subreddit)
current_amount += 1
continue
rule_mapping[user_pk] = [subreddit]
current_amount += 1
return list(rule_mapping.values())
class RedditStream(Stream):
headers = {} headers = {}
user = None user = None
@ -261,16 +223,13 @@ class RedditStream(Stream):
return response.json() return response.json()
except JSONDecodeError as e: except JSONDecodeError as e:
raise StreamParseException( raise StreamParseException(
response=response, message=f"Failed parsing json" response=response, message="Failed parsing json"
) from e ) from e
class RedditClient(Client): class RedditClient(PostClient):
stream = RedditStream stream = RedditStream
def __init__(self, rules=[]):
self.rules = rules
def __enter__(self): def __enter__(self):
streams = [[self.stream(rule) for rule in batch] for batch in self.rules] streams = [[self.stream(rule) for rule in batch] for batch in self.rules]
rate_limitted = False rate_limitted = False
@ -324,13 +283,49 @@ class RedditClient(Client):
finally: finally:
stream.rule.save() stream.rule.save()
def set_rule_error(self, rule, exception):
length = rule._meta.get_field("error").max_length
rule.error = exception.message[-length:] class RedditCollector(PostCollector):
rule.succeeded = False
class RedditCollector(Collector):
builder = RedditBuilder builder = RedditBuilder
client = RedditClient client = RedditClient
class RedditScheduler:
max_amount = RATE_LIMIT
max_user_amount = RATE_LIMIT / 4
def __init__(self, subreddits=[]):
if not subreddits:
self.subreddits = CollectionRule.objects.filter(
type=RuleTypeChoices.subreddit,
user__reddit_access_token__isnull=False,
user__reddit_refresh_token__isnull=False,
enabled=True,
).order_by("last_suceeded")[:200]
else:
self.subreddits = subreddits
def get_scheduled_rules(self):
rule_mapping = {}
current_amount = 0
for subreddit in self.subreddits:
user_pk = subreddit.user.pk
if current_amount == self.max_amount:
break
if user_pk in rule_mapping:
max_amount_reached = len(rule_mapping[user_pk]) == self.max_user_amount
if max_amount_reached:
continue
rule_mapping[user_pk].append(subreddit)
current_amount += 1
continue
rule_mapping[user_pk] = [subreddit]
current_amount += 1
return list(rule_mapping.values())

View file

@ -1,3 +1,5 @@
from unittest.mock import Mock
from django.test import TestCase from django.test import TestCase
from newsreader.news.collection.favicon import FaviconBuilder from newsreader.news.collection.favicon import FaviconBuilder
@ -12,8 +14,11 @@ class FaviconBuilderTestCase(TestCase):
def test_simple(self): def test_simple(self):
rule = CollectionRuleFactory(favicon=None) rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, simple_mock)) as builder: with FaviconBuilder(simple_mock, Mock(rule=rule)) as builder:
builder.build() builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico") self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")
@ -22,24 +27,33 @@ class FaviconBuilderTestCase(TestCase):
website_url="https://www.theguardian.com/", favicon=None website_url="https://www.theguardian.com/", favicon=None
) )
with FaviconBuilder((rule, mock_without_url)) as builder: with FaviconBuilder(mock_without_url, Mock(rule=rule)) as builder:
builder.build() builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, "https://www.theguardian.com/favicon.ico") self.assertEquals(rule.favicon, "https://www.theguardian.com/favicon.ico")
def test_without_header(self): def test_without_header(self):
rule = CollectionRuleFactory(favicon=None) rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, mock_without_header)) as builder: with FaviconBuilder(mock_without_header, Mock(rule=rule)) as builder:
builder.build() builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, None) self.assertEquals(rule.favicon, None)
def test_weird_path(self): def test_weird_path(self):
rule = CollectionRuleFactory(favicon=None) rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, mock_with_weird_path)) as builder: with FaviconBuilder(mock_with_weird_path, Mock(rule=rule)) as builder:
builder.build() builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals( self.assertEquals(
rule.favicon, "https://www.theguardian.com/jabadaba/doe/favicon.ico" rule.favicon, "https://www.theguardian.com/jabadaba/doe/favicon.ico"
@ -48,15 +62,21 @@ class FaviconBuilderTestCase(TestCase):
def test_other_url(self): def test_other_url(self):
rule = CollectionRuleFactory(favicon=None) rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, mock_with_other_url)) as builder: with FaviconBuilder(mock_with_other_url, Mock(rule=rule)) as builder:
builder.build() builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, "https://www.theguardian.com/icon.png") self.assertEquals(rule.favicon, "https://www.theguardian.com/icon.png")
def test_url_with_favicon_takes_precedence(self): def test_url_with_favicon_takes_precedence(self):
rule = CollectionRuleFactory(favicon=None) rule = CollectionRuleFactory(favicon=None)
with FaviconBuilder((rule, mock_with_multiple_icons)) as builder: with FaviconBuilder(mock_with_multiple_icons, Mock(rule=rule)) as builder:
builder.build() builder.build()
builder.save()
rule.refresh_from_db()
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico") self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")

View file

@ -1,4 +1,4 @@
from unittest.mock import MagicMock from unittest.mock import Mock
from django.test import TestCase from django.test import TestCase
@ -19,22 +19,22 @@ class FaviconClientTestCase(TestCase):
def test_simple(self): def test_simple(self):
rule = CollectionRuleFactory() rule = CollectionRuleFactory()
stream = MagicMock(url="https://www.bbc.com") stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.return_value = (simple_mock, stream) stream.read.return_value = (simple_mock, stream)
with FaviconClient([(rule, stream)]) as client: with FaviconClient([stream]) as client:
for rule, data in client: for payload, stream in client:
self.assertEquals(rule.pk, rule.pk) self.assertEquals(stream.rule.pk, rule.pk)
self.assertEquals(data, simple_mock) self.assertEquals(payload, simple_mock)
stream.read.assert_called_once_with() stream.read.assert_called_once_with()
def test_client_catches_stream_exception(self): def test_client_catches_stream_exception(self):
rule = CollectionRuleFactory(error=None, succeeded=True) rule = CollectionRuleFactory(error=None, succeeded=True)
stream = MagicMock(url="https://www.bbc.com") stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.side_effect = StreamException stream.read.side_effect = StreamException
with FaviconClient([(rule, stream)]) as client: with FaviconClient([stream]) as client:
for rule, data in client: for rule, data in client:
pass pass
@ -46,10 +46,10 @@ class FaviconClientTestCase(TestCase):
def test_client_catches_stream_not_found_exception(self): def test_client_catches_stream_not_found_exception(self):
rule = CollectionRuleFactory(error=None, succeeded=True) rule = CollectionRuleFactory(error=None, succeeded=True)
stream = MagicMock(url="https://www.bbc.com") stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.side_effect = StreamNotFoundException stream.read.side_effect = StreamNotFoundException
with FaviconClient([(rule, stream)]) as client: with FaviconClient([stream]) as client:
for rule, data in client: for rule, data in client:
pass pass
@ -61,10 +61,10 @@ class FaviconClientTestCase(TestCase):
def test_client_catches_stream_denied_exception(self): def test_client_catches_stream_denied_exception(self):
rule = CollectionRuleFactory(error=None, succeeded=True) rule = CollectionRuleFactory(error=None, succeeded=True)
stream = MagicMock(url="https://www.bbc.com") stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.side_effect = StreamDeniedException stream.read.side_effect = StreamDeniedException
with FaviconClient([(rule, stream)]) as client: with FaviconClient([stream]) as client:
for rule, data in client: for rule, data in client:
pass pass
@ -76,10 +76,10 @@ class FaviconClientTestCase(TestCase):
def test_client_catches_stream_timed_out(self): def test_client_catches_stream_timed_out(self):
rule = CollectionRuleFactory(error=None, succeeded=True) rule = CollectionRuleFactory(error=None, succeeded=True)
stream = MagicMock(url="https://www.bbc.com") stream = Mock(url="https://www.bbc.com", rule=rule)
stream.read.side_effect = StreamTimeOutException stream.read.side_effect = StreamTimeOutException
with FaviconClient([(rule, stream)]) as client: with FaviconClient([stream]) as client:
for rule, data in client: for rule, data in client:
pass pass

View file

@ -1,4 +1,4 @@
from unittest.mock import MagicMock, patch from unittest.mock import Mock, patch
from django.test import TestCase from django.test import TestCase
@ -38,8 +38,8 @@ class FaviconCollectorTestCase(TestCase):
def test_simple(self): def test_simple(self):
rule = CollectionRuleFactory(succeeded=True, error=None) rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.return_value = (website_mock, MagicMock()) self.mocked_website_read.return_value = (website_mock, Mock(rule=rule))
collector = FaviconCollector() collector = FaviconCollector()
collector.collect() collector.collect()
@ -54,8 +54,11 @@ class FaviconCollectorTestCase(TestCase):
def test_empty_stream(self): def test_empty_stream(self):
rule = CollectionRuleFactory(succeeded=True, error=None) rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.return_value = (BeautifulSoup("", "lxml"), MagicMock()) self.mocked_website_read.return_value = (
BeautifulSoup("", "lxml"),
Mock(rule=rule),
)
collector = FaviconCollector() collector = FaviconCollector()
collector.collect() collector.collect()
@ -70,7 +73,7 @@ class FaviconCollectorTestCase(TestCase):
def test_not_found(self): def test_not_found(self):
rule = CollectionRuleFactory(succeeded=True, error=None) rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamNotFoundException self.mocked_website_read.side_effect = StreamNotFoundException
collector = FaviconCollector() collector = FaviconCollector()
@ -86,7 +89,7 @@ class FaviconCollectorTestCase(TestCase):
def test_denied(self): def test_denied(self):
rule = CollectionRuleFactory(succeeded=True, error=None) rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamDeniedException self.mocked_website_read.side_effect = StreamDeniedException
collector = FaviconCollector() collector = FaviconCollector()
@ -102,7 +105,7 @@ class FaviconCollectorTestCase(TestCase):
def test_forbidden(self): def test_forbidden(self):
rule = CollectionRuleFactory(succeeded=True, error=None) rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamForbiddenException self.mocked_website_read.side_effect = StreamForbiddenException
collector = FaviconCollector() collector = FaviconCollector()
@ -118,7 +121,7 @@ class FaviconCollectorTestCase(TestCase):
def test_timed_out(self): def test_timed_out(self):
rule = CollectionRuleFactory(succeeded=True, error=None) rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamTimeOutException self.mocked_website_read.side_effect = StreamTimeOutException
collector = FaviconCollector() collector = FaviconCollector()
@ -134,7 +137,7 @@ class FaviconCollectorTestCase(TestCase):
def test_wrong_stream_content_type(self): def test_wrong_stream_content_type(self):
rule = CollectionRuleFactory(succeeded=True, error=None) rule = CollectionRuleFactory(succeeded=True, error=None)
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
self.mocked_website_read.side_effect = StreamParseException self.mocked_website_read.side_effect = StreamParseException
collector = FaviconCollector() collector = FaviconCollector()

View file

@ -1,5 +1,5 @@
from datetime import date, datetime, time from datetime import date, datetime, time
from unittest.mock import MagicMock from unittest.mock import Mock
from django.test import TestCase from django.test import TestCase
from django.utils import timezone from django.utils import timezone
@ -24,9 +24,10 @@ class FeedBuilderTestCase(TestCase):
def test_basic_entry(self): def test_basic_entry(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((simple_mock, mock_stream)) as builder: with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()
@ -55,9 +56,10 @@ class FeedBuilderTestCase(TestCase):
def test_multiple_entries(self): def test_multiple_entries(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((multiple_mock, mock_stream)) as builder: with builder(multiple_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = Post.objects.order_by("-publication_date") posts = Post.objects.order_by("-publication_date")
@ -116,9 +118,10 @@ class FeedBuilderTestCase(TestCase):
def test_entries_without_remote_identifier(self): def test_entries_without_remote_identifier(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_without_identifier, mock_stream)) as builder: with builder(mock_without_identifier, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = Post.objects.order_by("-publication_date") posts = Post.objects.order_by("-publication_date")
@ -155,9 +158,10 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_publication_date(self): def test_entry_without_publication_date(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_without_publish_date, mock_stream)) as builder: with builder(mock_without_publish_date, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = Post.objects.order_by("-publication_date") posts = Post.objects.order_by("-publication_date")
@ -187,9 +191,10 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_url(self): def test_entry_without_url(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_without_url, mock_stream)) as builder: with builder(mock_without_url, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = Post.objects.order_by("-publication_date") posts = Post.objects.order_by("-publication_date")
@ -213,9 +218,10 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_body(self): def test_entry_without_body(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_without_body, mock_stream)) as builder: with builder(mock_without_body, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = Post.objects.order_by("-publication_date") posts = Post.objects.order_by("-publication_date")
@ -247,9 +253,10 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_author(self): def test_entry_without_author(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_without_author, mock_stream)) as builder: with builder(mock_without_author, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = Post.objects.order_by("-publication_date") posts = Post.objects.order_by("-publication_date")
@ -275,9 +282,10 @@ class FeedBuilderTestCase(TestCase):
def test_empty_entries(self): def test_empty_entries(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_without_entries, mock_stream)) as builder: with builder(mock_without_entries, mock_stream) as builder:
builder.build()
builder.save() builder.save()
self.assertEquals(Post.objects.count(), 0) self.assertEquals(Post.objects.count(), 0)
@ -285,7 +293,7 @@ class FeedBuilderTestCase(TestCase):
def test_update_entries(self): def test_update_entries(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
existing_first_post = FeedPostFactory.create( existing_first_post = FeedPostFactory.create(
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule
@ -295,7 +303,8 @@ class FeedBuilderTestCase(TestCase):
remote_identifier="a5479c66-8fae-11e9-8422-00163ef6bee7", rule=rule remote_identifier="a5479c66-8fae-11e9-8422-00163ef6bee7", rule=rule
) )
with builder((mock_with_update_entries, mock_stream)) as builder: with builder(mock_with_update_entries, mock_stream) as builder:
builder.build()
builder.save() builder.save()
self.assertEquals(Post.objects.count(), 3) self.assertEquals(Post.objects.count(), 3)
@ -315,9 +324,10 @@ class FeedBuilderTestCase(TestCase):
def test_html_sanitizing(self): def test_html_sanitizing(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_with_html, mock_stream)) as builder: with builder(mock_with_html, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()
@ -337,9 +347,10 @@ class FeedBuilderTestCase(TestCase):
def test_long_author_text_is_truncated(self): def test_long_author_text_is_truncated(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_with_long_author, mock_stream)) as builder: with builder(mock_with_long_author, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()
@ -351,9 +362,10 @@ class FeedBuilderTestCase(TestCase):
def test_long_title_text_is_truncated(self): def test_long_title_text_is_truncated(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_with_long_title, mock_stream)) as builder: with builder(mock_with_long_title, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()
@ -366,9 +378,10 @@ class FeedBuilderTestCase(TestCase):
def test_long_title_exotic_title(self): def test_long_title_exotic_title(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_with_long_exotic_title, mock_stream)) as builder: with builder(mock_with_long_exotic_title, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()
@ -381,9 +394,10 @@ class FeedBuilderTestCase(TestCase):
def test_content_detail_is_prioritized_if_longer(self): def test_content_detail_is_prioritized_if_longer(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_with_longer_content_detail, mock_stream)) as builder: with builder(mock_with_longer_content_detail, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()
@ -398,9 +412,10 @@ class FeedBuilderTestCase(TestCase):
def test_content_detail_is_not_prioritized_if_shorter(self): def test_content_detail_is_not_prioritized_if_shorter(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_with_shorter_content_detail, mock_stream)) as builder: with builder(mock_with_shorter_content_detail, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()
@ -414,9 +429,10 @@ class FeedBuilderTestCase(TestCase):
def test_content_detail_is_concatinated(self): def test_content_detail_is_concatinated(self):
builder = FeedBuilder builder = FeedBuilder
rule = FeedFactory() rule = FeedFactory()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
with builder((mock_with_multiple_content_detail, mock_stream)) as builder: with builder(mock_with_multiple_content_detail, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()

View file

@ -1,4 +1,4 @@
from unittest.mock import MagicMock, patch from unittest.mock import Mock, patch
from django.test import TestCase from django.test import TestCase
from django.utils.lorem_ipsum import words from django.utils.lorem_ipsum import words
@ -28,7 +28,7 @@ class FeedClientTestCase(TestCase):
def test_client_retrieves_single_rules(self): def test_client_retrieves_single_rules(self):
rule = FeedFactory.create() rule = FeedFactory.create()
mock_stream = MagicMock(rule=rule) mock_stream = Mock(rule=rule)
self.mocked_read.return_value = (simple_mock, mock_stream) self.mocked_read.return_value = (simple_mock, mock_stream)

View file

@ -1,6 +1,6 @@
from datetime import date, datetime, time from datetime import date, datetime, time
from time import struct_time from time import struct_time
from unittest.mock import MagicMock, patch from unittest.mock import Mock, patch
from django.test import TestCase from django.test import TestCase
from django.utils import timezone from django.utils import timezone
@ -56,7 +56,7 @@ class FeedCollectorTestCase(TestCase):
@freeze_time("2019-10-30 12:30:00") @freeze_time("2019-10-30 12:30:00")
def test_emtpy_batch(self): def test_emtpy_batch(self):
self.mocked_fetch.return_value = MagicMock() self.mocked_fetch.return_value = Mock()
self.mocked_parse.return_value = empty_mock self.mocked_parse.return_value = empty_mock
rule = FeedFactory() rule = FeedFactory()

View file

@ -1,4 +1,4 @@
from unittest.mock import MagicMock, patch from unittest.mock import Mock, patch
from django.test import TestCase from django.test import TestCase
@ -27,7 +27,7 @@ class FeedStreamTestCase(TestCase):
patch.stopall() patch.stopall()
def test_simple_stream(self): def test_simple_stream(self):
self.mocked_fetch.return_value = MagicMock(content=simple_mock) self.mocked_fetch.return_value = Mock(content=simple_mock)
rule = FeedFactory() rule = FeedFactory()
stream = FeedStream(rule) stream = FeedStream(rule)
@ -95,7 +95,7 @@ class FeedStreamTestCase(TestCase):
@patch("newsreader.news.collection.feed.parse") @patch("newsreader.news.collection.feed.parse")
def test_stream_raises_parse_exception(self, mocked_parse): def test_stream_raises_parse_exception(self, mocked_parse):
self.mocked_fetch.return_value = MagicMock() self.mocked_fetch.return_value = Mock()
mocked_parse.side_effect = TypeError mocked_parse.side_effect = TypeError
rule = FeedFactory() rule = FeedFactory()

View file

@ -1,5 +1,5 @@
from datetime import datetime from datetime import datetime
from unittest.mock import MagicMock from unittest.mock import Mock
from django.test import TestCase from django.test import TestCase
@ -20,9 +20,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((simple_mock, mock_stream)) as builder: with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -65,9 +66,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((empty_mock, mock_stream)) as builder: with builder(empty_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
self.assertEquals(Post.objects.count(), 0) self.assertEquals(Post.objects.count(), 0)
@ -76,9 +78,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((unknown_mock, mock_stream)) as builder: with builder(unknown_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
self.assertEquals(Post.objects.count(), 0) self.assertEquals(Post.objects.count(), 0)
@ -95,9 +98,10 @@ class RedditBuilderTestCase(TestCase):
) )
builder = RedditBuilder builder = RedditBuilder
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((simple_mock, mock_stream)) as builder: with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -132,9 +136,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((unsanitized_mock, mock_stream)) as builder: with builder(unsanitized_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -149,9 +154,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((author_mock, mock_stream)) as builder: with builder(author_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -166,9 +172,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((title_mock, mock_stream)) as builder: with builder(title_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -186,9 +193,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((duplicate_mock, mock_stream)) as builder: with builder(duplicate_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -200,13 +208,14 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
duplicate_post = RedditPostFactory( duplicate_post = RedditPostFactory(
remote_identifier="hm0qct", rule=subreddit, title="foo" remote_identifier="hm0qct", rule=subreddit, title="foo"
) )
with builder((simple_mock, mock_stream)) as builder: with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -231,9 +240,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((image_mock, mock_stream)) as builder: with builder(image_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -262,9 +272,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((external_image_mock, mock_stream)) as builder: with builder(external_image_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -302,9 +313,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((video_mock, mock_stream)) as builder: with builder(video_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -328,9 +340,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((external_video_mock, mock_stream)) as builder: with builder(external_video_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()
@ -354,9 +367,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((external_gifv_mock, mock_stream)) as builder: with builder(external_gifv_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get() post = Post.objects.get()
@ -376,9 +390,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((simple_mock, mock_stream)) as builder: with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
post = Post.objects.get(remote_identifier="hngsj8") post = Post.objects.get(remote_identifier="hngsj8")
@ -400,9 +415,10 @@ class RedditBuilderTestCase(TestCase):
builder = RedditBuilder builder = RedditBuilder
subreddit = SubredditFactory() subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit) mock_stream = Mock(rule=subreddit)
with builder((unknown_mock, mock_stream)) as builder: with builder(unknown_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
self.assertEquals(Post.objects.count(), 0) self.assertEquals(Post.objects.count(), 0)

View file

@ -1,10 +1,9 @@
from unittest.mock import MagicMock, patch from unittest.mock import Mock, patch
from django.test import TestCase from django.test import TestCase
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from newsreader.news.collection.base import URLBuilder, WebsiteStream
from newsreader.news.collection.exceptions import ( from newsreader.news.collection.exceptions import (
StreamDeniedException, StreamDeniedException,
StreamException, StreamException,
@ -13,6 +12,7 @@ from newsreader.news.collection.exceptions import (
StreamParseException, StreamParseException,
StreamTimeOutException, StreamTimeOutException,
) )
from newsreader.news.collection.favicon import WebsiteStream, WebsiteURLBuilder
from newsreader.news.collection.tests.factories import CollectionRuleFactory from newsreader.news.collection.tests.factories import CollectionRuleFactory
from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock
@ -20,117 +20,125 @@ from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock
class WebsiteStreamTestCase(TestCase): class WebsiteStreamTestCase(TestCase):
def setUp(self): def setUp(self):
self.patched_fetch = patch("newsreader.news.collection.base.fetch") self.patched_fetch = patch("newsreader.news.collection.favicon.fetch")
self.mocked_fetch = self.patched_fetch.start() self.mocked_fetch = self.patched_fetch.start()
def tearDown(self): def tearDown(self):
patch.stopall() patch.stopall()
def test_simple(self): def test_simple(self):
self.mocked_fetch.return_value = MagicMock(content=simple_mock) self.mocked_fetch.return_value = Mock(content=simple_mock)
rule = CollectionRuleFactory() rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule.url) stream = WebsiteStream(rule)
return_value = stream.read() return_value = stream.read()
self.mocked_fetch.assert_called_once_with(rule.url) self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
self.assertEquals(return_value, (BeautifulSoup(simple_mock, "lxml"), stream)) self.assertEquals(
return_value, (BeautifulSoup(simple_mock, features="lxml"), stream)
)
def test_raises_exception(self): def test_raises_exception(self):
self.mocked_fetch.side_effect = StreamException self.mocked_fetch.side_effect = StreamException
rule = CollectionRuleFactory() rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule.url) stream = WebsiteStream(rule)
with self.assertRaises(StreamException): with self.assertRaises(StreamException):
stream.read() stream.read()
self.mocked_fetch.assert_called_once_with(rule.url) self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
def test_raises_denied_exception(self): def test_raises_denied_exception(self):
self.mocked_fetch.side_effect = StreamDeniedException self.mocked_fetch.side_effect = StreamDeniedException
rule = CollectionRuleFactory() rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule.url) stream = WebsiteStream(rule)
with self.assertRaises(StreamDeniedException): with self.assertRaises(StreamDeniedException):
stream.read() stream.read()
self.mocked_fetch.assert_called_once_with(rule.url) self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
def test_raises_stream_not_found_exception(self): def test_raises_stream_not_found_exception(self):
self.mocked_fetch.side_effect = StreamNotFoundException self.mocked_fetch.side_effect = StreamNotFoundException
rule = CollectionRuleFactory() rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule.url) stream = WebsiteStream(rule)
with self.assertRaises(StreamNotFoundException): with self.assertRaises(StreamNotFoundException):
stream.read() stream.read()
self.mocked_fetch.assert_called_once_with(rule.url) self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
def test_stream_raises_time_out_exception(self): def test_stream_raises_time_out_exception(self):
self.mocked_fetch.side_effect = StreamTimeOutException self.mocked_fetch.side_effect = StreamTimeOutException
rule = CollectionRuleFactory() rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule.url) stream = WebsiteStream(rule)
with self.assertRaises(StreamTimeOutException): with self.assertRaises(StreamTimeOutException):
stream.read() stream.read()
self.mocked_fetch.assert_called_once_with(rule.url) self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
def test_stream_raises_forbidden_exception(self): def test_stream_raises_forbidden_exception(self):
self.mocked_fetch.side_effect = StreamForbiddenException self.mocked_fetch.side_effect = StreamForbiddenException
rule = CollectionRuleFactory() rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule.url) stream = WebsiteStream(rule)
with self.assertRaises(StreamForbiddenException): with self.assertRaises(StreamForbiddenException):
stream.read() stream.read()
self.mocked_fetch.assert_called_once_with(rule.url) self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
@patch("newsreader.news.collection.base.WebsiteStream.parse") @patch("newsreader.news.collection.favicon.WebsiteStream.parse")
def test_stream_raises_parse_exception(self, mocked_parse): def test_stream_raises_parse_exception(self, mocked_parse):
self.mocked_fetch.return_value = MagicMock() self.mocked_fetch.return_value = Mock()
mocked_parse.side_effect = StreamParseException mocked_parse.side_effect = StreamParseException
rule = CollectionRuleFactory() rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
stream = WebsiteStream(rule.url) stream = WebsiteStream(rule)
with self.assertRaises(StreamParseException): with self.assertRaises(StreamParseException):
stream.read() stream.read()
self.mocked_fetch.assert_called_once_with(rule.url) self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
class URLBuilderTestCase(TestCase): class WebsiteURLBuilderTestCase(TestCase):
def test_simple(self): def test_simple(self):
initial_rule = CollectionRuleFactory() initial_rule = CollectionRuleFactory()
with URLBuilder((simple_feed_mock, MagicMock(rule=initial_rule))) as builder: with WebsiteURLBuilder(simple_feed_mock, Mock(rule=initial_rule)) as builder:
rule, url = builder.build() builder.build()
builder.save()
self.assertEquals(rule.pk, initial_rule.pk) initial_rule.refresh_from_db()
self.assertEquals(url, "https://www.bbc.co.uk/news/")
self.assertEquals(initial_rule.website_url, "https://www.bbc.co.uk/news/")
def test_no_link(self): def test_no_link(self):
initial_rule = CollectionRuleFactory() initial_rule = CollectionRuleFactory(website_url=None)
with URLBuilder( with WebsiteURLBuilder(
(feed_mock_without_link, MagicMock(rule=initial_rule)) feed_mock_without_link, Mock(rule=initial_rule)
) as builder: ) as builder:
rule, url = builder.build() builder.build()
builder.save()
self.assertEquals(rule.pk, initial_rule.pk) initial_rule.refresh_from_db()
self.assertEquals(url, None)
self.assertEquals(initial_rule.website_url, None)
def test_no_data(self): def test_no_data(self):
initial_rule = CollectionRuleFactory() initial_rule = CollectionRuleFactory(website_url=None)
with URLBuilder((None, MagicMock(rule=initial_rule))) as builder: with WebsiteURLBuilder(None, Mock(rule=initial_rule)) as builder:
rule, url = builder.build() builder.build()
builder.save()
self.assertEquals(rule.pk, initial_rule.pk) initial_rule.refresh_from_db()
self.assertEquals(url, None)
self.assertEquals(initial_rule.website_url, None)

View file

@ -34,7 +34,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder((simple_mock, mock_stream)) as builder: with builder(simple_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -83,7 +84,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder((image_mock, mock_stream)) as builder: with builder(image_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -123,7 +125,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder((video_mock, mock_stream)) as builder: with builder(video_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -165,7 +168,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder((video_without_bitrate_mock, mock_stream)) as builder: with builder(video_without_bitrate_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -186,7 +190,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder((gif_mock, mock_stream)) as builder: with builder(gif_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -211,7 +216,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder((retweet_mock, mock_stream)) as builder: with builder(retweet_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -246,7 +252,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder((quoted_mock, mock_stream)) as builder: with builder(quoted_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}
@ -276,7 +283,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder(([], mock_stream)) as builder: with builder([], mock_stream) as builder:
builder.build()
builder.save() builder.save()
self.assertEquals(Post.objects.count(), 0) self.assertEquals(Post.objects.count(), 0)
@ -287,7 +295,8 @@ class TwitterBuilderTestCase(TestCase):
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
mock_stream = MagicMock(rule=profile) mock_stream = MagicMock(rule=profile)
with builder((unsanitized_mock, mock_stream)) as builder: with builder(unsanitized_mock, mock_stream) as builder:
builder.build()
builder.save() builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()} posts = {post.remote_identifier: post for post in Post.objects.all()}

View file

@ -8,7 +8,12 @@ import pytz
from ftfy import fix_text from ftfy import fix_text
from newsreader.news.collection.base import Builder, Client, Collector, Stream from newsreader.news.collection.base import (
PostBuilder,
PostClient,
PostCollector,
PostStream,
)
from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
from newsreader.news.collection.utils import truncate_text from newsreader.news.collection.utils import truncate_text
from newsreader.news.core.models import Post from newsreader.news.core.models import Post
@ -20,25 +25,14 @@ TWITTER_URL = "https://twitter.com"
TWITTER_API_URL = "https://api.twitter.com/1.1" TWITTER_API_URL = "https://api.twitter.com/1.1"
class TwitterScheduler: class TwitterBuilder(PostBuilder):
pass rule_type = RuleTypeChoices.twitter
def build(self):
class TwitterBuilder(Builder):
rule__type = RuleTypeChoices.twitter
def create_posts(self, stream):
data, stream = stream
if not data:
return
self.instances = self.build(data, stream.rule)
def build(self, posts, rule):
results = {} results = {}
rule = self.stream.rule
for post in posts: for post in self.payload:
remote_identifier = post["id_str"] remote_identifier = post["id_str"]
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}" url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
@ -83,7 +77,7 @@ class TwitterBuilder(Builder):
results[remote_identifier] = Post(**data) results[remote_identifier] = Post(**data)
return results.values() self.instances = results.values()
def get_media_entities(self, post): def get_media_entities(self, post):
media_entities = post["extended_entities"]["media"] media_entities = post["extended_entities"]["media"]
@ -133,13 +127,17 @@ class TwitterBuilder(Builder):
return formatted_entities return formatted_entities
class TwitterStream(Stream): class TwitterStream(PostStream):
pass pass
class TwitterClient(Client): class TwitterClient(PostClient):
pass pass
class TwitterCollector(Collector): class TwitterCollector(PostCollector):
pass
class TwitterScheduler:
pass pass