From 150c49262895adef3613638e57eaf4fd3751fbcf Mon Sep 17 00:00:00 2001 From: Sonny Bakker Date: Sun, 13 Sep 2020 13:32:52 +0200 Subject: [PATCH] Alot of plumbing --- src/newsreader/news/collection/base.py | 107 +++++++------- src/newsreader/news/collection/choices.py | 2 +- src/newsreader/news/collection/favicon.py | 93 +++++++----- src/newsreader/news/collection/feed.py | 90 +++++------- src/newsreader/news/collection/reddit.py | 133 +++++++++--------- .../collection/tests/favicon/builder/tests.py | 32 ++++- .../collection/tests/favicon/client/tests.py | 28 ++-- .../tests/favicon/collector/tests.py | 23 +-- .../collection/tests/feed/builder/tests.py | 82 ++++++----- .../collection/tests/feed/client/tests.py | 4 +- .../collection/tests/feed/collector/tests.py | 4 +- .../collection/tests/feed/stream/tests.py | 6 +- .../collection/tests/reddit/builder/tests.py | 82 ++++++----- src/newsreader/news/collection/tests/tests.py | 96 +++++++------ .../collection/tests/twitter/builder/tests.py | 27 ++-- src/newsreader/news/collection/twitter.py | 40 +++--- 16 files changed, 462 insertions(+), 387 deletions(-) diff --git a/src/newsreader/news/collection/base.py b/src/newsreader/news/collection/base.py index df122a6..700fae5 100644 --- a/src/newsreader/news/collection/base.py +++ b/src/newsreader/news/collection/base.py @@ -1,13 +1,10 @@ import bleach -from bs4 import BeautifulSoup - from newsreader.news.collection.constants import ( WHITELISTED_ATTRIBUTES, WHITELISTED_TAGS, ) -from newsreader.news.collection.exceptions import StreamParseException -from newsreader.news.collection.utils import fetch +from newsreader.news.collection.models import CollectionRule from newsreader.news.core.models import Post @@ -33,7 +30,7 @@ class Stream: class Client: """ - Retrieves the data with streams + Retrieves the data through streams """ stream = Stream @@ -56,33 +53,24 @@ class Client: class Builder: """ - Creates the collected posts + Builds instances of various types """ instances = [] stream = None - rule_type = None + payload = None - def __init__(self, stream): + def __init__(self, payload, stream): + self.payload = payload self.stream = stream def __enter__(self): - _, stream = self.stream - - self.instances = [] - self.existing_posts = { - post.remote_identifier: post - for post in Post.objects.filter(rule=stream.rule, rule__type=self.rule_type) - } - - self.create_posts(self.stream) - return self def __exit__(self, *args, **kwargs): pass - def create_posts(self, stream): + def build(self): raise NotImplementedError def sanitize_fragment(self, fragment): @@ -97,10 +85,6 @@ class Builder: strip_comments=True, ) - def save(self): - for post in self.instances: - post.save() - class Meta: abstract = True @@ -118,46 +102,59 @@ class Collector: self.builder = builder if builder else self.builder def collect(self, rules=None): - with self.client(rules=rules) as client: - for data, stream in client: - with self.builder((data, stream)) as builder: - builder.save() + raise NotImplementedError class Meta: abstract = True -class WebsiteStream(Stream): - def __init__(self, url): - self.url = url +class PostBuilder(Builder): + rule_type = None - def read(self): - response = fetch(self.url) - - return (self.parse(response.content), self) - - def parse(self, payload): - try: - return BeautifulSoup(payload, "lxml") - except TypeError: - raise StreamParseException("Could not parse given HTML") - - -class URLBuilder(Builder): def __enter__(self): - return self + self.existing_posts = { + post.remote_identifier: post + for post in Post.objects.filter( + rule=self.stream.rule, rule__type=self.rule_type + ) + } - def build(self): - data, stream = self.stream - rule = stream.rule + return super().__enter__() - try: - url = data["feed"]["link"] - except (KeyError, TypeError): - url = None + def save(self): + for post in self.instances: + post.save() - if url: - rule.website_url = url - rule.save() + class Meta: + abstract = True - return rule, url + +class PostStream(Stream): + rule_type = None + + +class PostClient(Client): + stream = PostStream + + def __init__(self, rules=[]): + if rules: + self.rules = rules + else: + self.rules = CollectionRule.objects.enabled().filter( + type=self.stream.rule_type + ) + + def set_rule_error(self, rule, exception): + length = rule._meta.get_field("error").max_length + + rule.error = exception.message[-length:] + rule.succeeded = False + + +class PostCollector(Collector): + def collect(self, rules=None): + with self.client(rules=rules) as client: + for payload, stream in client: + with self.builder(payload, stream) as builder: + builder.build() + builder.save() diff --git a/src/newsreader/news/collection/choices.py b/src/newsreader/news/collection/choices.py index 8a129de..3fd9bef 100644 --- a/src/newsreader/news/collection/choices.py +++ b/src/newsreader/news/collection/choices.py @@ -9,6 +9,6 @@ class RuleTypeChoices(TextChoices): class TwitterPostTypeChoices(TextChoices): - photo = "photo", _("Poto") + photo = "photo", _("Photo") video = "video", _("Video") animated_gif = "animated_gif", _("GIF") diff --git a/src/newsreader/news/collection/favicon.py b/src/newsreader/news/collection/favicon.py index 44b96bf..639e7f6 100644 --- a/src/newsreader/news/collection/favicon.py +++ b/src/newsreader/news/collection/favicon.py @@ -1,16 +1,12 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from urllib.parse import urljoin, urlparse -from newsreader.news.collection.base import ( - Builder, - Client, - Collector, - Stream, - URLBuilder, - WebsiteStream, -) -from newsreader.news.collection.exceptions import StreamException +from bs4 import BeautifulSoup + +from newsreader.news.collection.base import Builder, Client, Collector, Stream +from newsreader.news.collection.exceptions import StreamException, StreamParseException from newsreader.news.collection.feed import FeedClient +from newsreader.news.collection.utils import fetch LINK_RELS = [ @@ -21,17 +17,45 @@ LINK_RELS = [ ] +class WebsiteStream(Stream): + def read(self): + response = fetch(self.rule.website_url) + + return self.parse(response.content), self + + def parse(self, payload): + try: + return BeautifulSoup(payload, features="lxml") + except TypeError: + raise StreamParseException("Could not parse given HTML") + + +class WebsiteURLBuilder(Builder): + def build(self): + try: + url = self.payload["feed"]["link"] + except (KeyError, TypeError): + url = None + + self.instances = [(self.stream, url)] if url else [] + + def save(self): + for stream, url in self.instances: + stream.rule.website_url = url + stream.rule.save() + + class FaviconBuilder(Builder): def build(self): - rule, soup = self.stream + rule = self.stream.rule - url = self.parse(soup, rule.website_url) + url = self.parse() - if url: - rule.favicon = url - rule.save() + self.instances = [(rule, url)] if url else [] + + def parse(self): + soup = self.payload - def parse(self, soup, website_url): if not soup.head: return @@ -44,9 +68,9 @@ class FaviconBuilder(Builder): parsed_url = urlparse(url) if not parsed_url.scheme and not parsed_url.netloc: - if not website_url: + if not self.stream.rule.website_url: return - return urljoin(website_url, url) + return urljoin(self.stream.rule.website_url, url) elif not parsed_url.scheme: return urljoin(f"https://{parsed_url.netloc}", parsed_url.path) @@ -73,6 +97,11 @@ class FaviconBuilder(Builder): elif icons: return icons.pop() + def save(self): + for rule, favicon_url in self.instances: + rule.favicon = favicon_url + rule.save() + class FaviconClient(Client): stream = WebsiteStream @@ -82,39 +111,35 @@ class FaviconClient(Client): def __enter__(self): with ThreadPoolExecutor(max_workers=10) as executor: - futures = { - executor.submit(stream.read): rule for rule, stream in self.streams - } + futures = [executor.submit(stream.read) for stream in self.streams] for future in as_completed(futures): - rule = futures[future] - try: - response_data, stream = future.result() + payload, stream = future.result() except StreamException: continue - yield (rule, response_data) + yield payload, stream class FaviconCollector(Collector): feed_client, favicon_client = (FeedClient, FaviconClient) - url_builder, favicon_builder = (URLBuilder, FaviconBuilder) + url_builder, favicon_builder = (WebsiteURLBuilder, FaviconBuilder) def collect(self, rules=None): streams = [] with self.feed_client(rules=rules) as client: - for data, stream in client: - with self.url_builder((data, stream)) as builder: - rule, url = builder.build() + for payload, stream in client: + with self.url_builder(payload, stream) as builder: + builder.build() + builder.save() - if not url: - continue - - streams.append((rule, WebsiteStream(url))) + if builder.instances: + streams.append(WebsiteStream(stream.rule)) with self.favicon_client(streams) as client: - for rule, data in client: - with self.favicon_builder((rule, data)) as builder: + for payload, stream in client: + with self.favicon_builder(payload, stream) as builder: builder.build() + builder.save() diff --git a/src/newsreader/news/collection/feed.py b/src/newsreader/news/collection/feed.py index ff28666..22f8dc7 100644 --- a/src/newsreader/news/collection/feed.py +++ b/src/newsreader/news/collection/feed.py @@ -10,7 +10,12 @@ import pytz from feedparser import parse -from newsreader.news.collection.base import Builder, Client, Collector, Stream +from newsreader.news.collection.base import ( + PostBuilder, + PostClient, + PostCollector, + PostStream, +) from newsreader.news.collection.choices import RuleTypeChoices from newsreader.news.collection.exceptions import ( StreamDeniedException, @@ -19,7 +24,6 @@ from newsreader.news.collection.exceptions import ( StreamParseException, StreamTimeOutException, ) -from newsreader.news.collection.models import CollectionRule from newsreader.news.collection.utils import ( build_publication_date, fetch, @@ -31,19 +35,10 @@ from newsreader.news.core.models import Post logger = logging.getLogger(__name__) -class FeedBuilder(Builder): +class FeedBuilder(PostBuilder): rule__type = RuleTypeChoices.feed - def create_posts(self, stream): - data, stream = stream - - with FeedDuplicateHandler(stream.rule) as duplicate_handler: - entries = data.get("entries", []) - - instances = self.build(entries, stream.rule) - self.instances = duplicate_handler.check(instances) - - def build(self, entries, rule): + def build(self): field_mapping = { "id": "remote_identifier", "title": "title", @@ -52,40 +47,47 @@ class FeedBuilder(Builder): "published_parsed": "publication_date", "author": "author", } + tz = pytz.timezone(self.stream.rule.timezone) + instances = [] - tz = pytz.timezone(rule.timezone) + with FeedDuplicateHandler(self.stream.rule) as duplicate_handler: + entries = self.payload.get("entries", []) - for entry in entries: - data = {"rule_id": rule.pk} + for entry in entries: + data = {"rule_id": self.stream.rule.pk} - for field, model_field in field_mapping.items(): - if not field in entry: - continue + for field, model_field in field_mapping.items(): + if not field in entry: + continue - value = truncate_text(Post, model_field, entry[field]) + value = truncate_text(Post, model_field, entry[field]) - if field == "published_parsed": - data[model_field] = build_publication_date(value, tz) - elif field == "summary": - data[model_field] = self.sanitize_fragment(value) - else: - data[model_field] = value + if field == "published_parsed": + data[model_field] = build_publication_date(value, tz) + elif field == "summary": + data[model_field] = self.sanitize_fragment(value) + else: + data[model_field] = value - if "content" in entry: - content = self.get_content(entry["content"]) - body = data.get("body", "") + if "content" in entry: + content = self.get_content(entry["content"]) + body = data.get("body", "") - if not body or len(body) < len(content): - data["body"] = content + if not body or len(body) < len(content): + data["body"] = content - yield Post(**data) + instances.append(Post(**data)) + + self.instances = duplicate_handler.check(instances) def get_content(self, items): content = "\n ".join([item.get("value") for item in items]) return self.sanitize_fragment(content) -class FeedStream(Stream): +class FeedStream(PostStream): + rule_type = RuleTypeChoices.feed + def read(self): response = fetch(self.rule.url) @@ -99,17 +101,9 @@ class FeedStream(Stream): raise StreamParseException(response=response, message=message) from e -class FeedClient(Client): +class FeedClient(PostClient): stream = FeedStream - def __init__(self, rules=[]): - if rules: - self.rules = rules - else: - self.rules = CollectionRule.objects.filter( - enabled=True, type=RuleTypeChoices.feed - ) - def __enter__(self): streams = [self.stream(rule) for rule in self.rules] @@ -120,13 +114,13 @@ class FeedClient(Client): stream = futures[future] try: - response_data = future.result() + payload = future.result() stream.rule.error = None stream.rule.succeeded = True stream.rule.last_suceeded = timezone.now() - yield response_data + yield payload except (StreamNotFoundException, StreamTimeOutException) as e: logger.warning(f"Request failed for {stream.rule.url}") @@ -142,14 +136,8 @@ class FeedClient(Client): finally: stream.rule.save() - def set_rule_error(self, rule, exception): - length = rule._meta.get_field("error").max_length - rule.error = exception.message[-length:] - rule.succeeded = False - - -class FeedCollector(Collector): +class FeedCollector(PostCollector): builder = FeedBuilder client = FeedClient diff --git a/src/newsreader/news/collection/reddit.py b/src/newsreader/news/collection/reddit.py index 65ce384..82a4d96 100644 --- a/src/newsreader/news/collection/reddit.py +++ b/src/newsreader/news/collection/reddit.py @@ -15,7 +15,12 @@ from django.utils.html import format_html import pytz import requests -from newsreader.news.collection.base import Builder, Client, Collector, Stream +from newsreader.news.collection.base import ( + PostBuilder, + PostClient, + PostCollector, + PostStream, +) from newsreader.news.collection.choices import RuleTypeChoices from newsreader.news.collection.constants import ( WHITELISTED_ATTRIBUTES, @@ -92,21 +97,17 @@ def get_reddit_access_token(code, user): return response_data["access_token"], response_data["refresh_token"] -class RedditBuilder(Builder): - rule__type = RuleTypeChoices.subreddit +class RedditBuilder(PostBuilder): + rule_type = RuleTypeChoices.subreddit - def create_posts(self, stream): - data, stream = stream - posts = [] + def build(self): + results = {} - if not "data" in data or not "children" in data["data"]: + if not "data" in self.payload or not "children" in self.payload["data"]: return - posts = data["data"]["children"] - self.instances = self.build(posts, stream.rule) - - def build(self, posts, rule): - results = {} + posts = self.payload["data"]["children"] + rule = self.stream.rule for post in posts: if not "data" in post or post["kind"] != REDDIT_POST: @@ -170,7 +171,9 @@ class RedditBuilder(Builder): parsed_date = datetime.fromtimestamp(post["data"]["created_utc"]) created_date = pytz.utc.localize(parsed_date) except (OverflowError, OSError): - logging.warning(f"Failed parsing timestamp from {url_fragment}") + logging.warning( + f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}" + ) created_date = timezone.now() post_data = { @@ -194,52 +197,11 @@ class RedditBuilder(Builder): results[remote_identifier] = Post(**post_data) - return results.values() + self.instances = results.values() -class RedditScheduler: - max_amount = RATE_LIMIT - max_user_amount = RATE_LIMIT / 4 - - def __init__(self, subreddits=[]): - if not subreddits: - self.subreddits = CollectionRule.objects.filter( - type=RuleTypeChoices.subreddit, - user__reddit_access_token__isnull=False, - user__reddit_refresh_token__isnull=False, - enabled=True, - ).order_by("last_suceeded")[:200] - else: - self.subreddits = subreddits - - def get_scheduled_rules(self): - rule_mapping = {} - current_amount = 0 - - for subreddit in self.subreddits: - user_pk = subreddit.user.pk - - if current_amount == self.max_amount: - break - - if user_pk in rule_mapping: - max_amount_reached = len(rule_mapping[user_pk]) == self.max_user_amount - - if max_amount_reached: - continue - - rule_mapping[user_pk].append(subreddit) - current_amount += 1 - - continue - - rule_mapping[user_pk] = [subreddit] - current_amount += 1 - - return list(rule_mapping.values()) - - -class RedditStream(Stream): +class RedditStream(PostStream): + rule_type = RuleTypeChoices.subreddit headers = {} user = None @@ -261,16 +223,13 @@ class RedditStream(Stream): return response.json() except JSONDecodeError as e: raise StreamParseException( - response=response, message=f"Failed parsing json" + response=response, message="Failed parsing json" ) from e -class RedditClient(Client): +class RedditClient(PostClient): stream = RedditStream - def __init__(self, rules=[]): - self.rules = rules - def __enter__(self): streams = [[self.stream(rule) for rule in batch] for batch in self.rules] rate_limitted = False @@ -324,13 +283,49 @@ class RedditClient(Client): finally: stream.rule.save() - def set_rule_error(self, rule, exception): - length = rule._meta.get_field("error").max_length - rule.error = exception.message[-length:] - rule.succeeded = False - - -class RedditCollector(Collector): +class RedditCollector(PostCollector): builder = RedditBuilder client = RedditClient + + +class RedditScheduler: + max_amount = RATE_LIMIT + max_user_amount = RATE_LIMIT / 4 + + def __init__(self, subreddits=[]): + if not subreddits: + self.subreddits = CollectionRule.objects.filter( + type=RuleTypeChoices.subreddit, + user__reddit_access_token__isnull=False, + user__reddit_refresh_token__isnull=False, + enabled=True, + ).order_by("last_suceeded")[:200] + else: + self.subreddits = subreddits + + def get_scheduled_rules(self): + rule_mapping = {} + current_amount = 0 + + for subreddit in self.subreddits: + user_pk = subreddit.user.pk + + if current_amount == self.max_amount: + break + + if user_pk in rule_mapping: + max_amount_reached = len(rule_mapping[user_pk]) == self.max_user_amount + + if max_amount_reached: + continue + + rule_mapping[user_pk].append(subreddit) + current_amount += 1 + + continue + + rule_mapping[user_pk] = [subreddit] + current_amount += 1 + + return list(rule_mapping.values()) diff --git a/src/newsreader/news/collection/tests/favicon/builder/tests.py b/src/newsreader/news/collection/tests/favicon/builder/tests.py index e8a1a34..d21f77e 100644 --- a/src/newsreader/news/collection/tests/favicon/builder/tests.py +++ b/src/newsreader/news/collection/tests/favicon/builder/tests.py @@ -1,3 +1,5 @@ +from unittest.mock import Mock + from django.test import TestCase from newsreader.news.collection.favicon import FaviconBuilder @@ -12,8 +14,11 @@ class FaviconBuilderTestCase(TestCase): def test_simple(self): rule = CollectionRuleFactory(favicon=None) - with FaviconBuilder((rule, simple_mock)) as builder: + with FaviconBuilder(simple_mock, Mock(rule=rule)) as builder: builder.build() + builder.save() + + rule.refresh_from_db() self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico") @@ -22,24 +27,33 @@ class FaviconBuilderTestCase(TestCase): website_url="https://www.theguardian.com/", favicon=None ) - with FaviconBuilder((rule, mock_without_url)) as builder: + with FaviconBuilder(mock_without_url, Mock(rule=rule)) as builder: builder.build() + builder.save() + + rule.refresh_from_db() self.assertEquals(rule.favicon, "https://www.theguardian.com/favicon.ico") def test_without_header(self): rule = CollectionRuleFactory(favicon=None) - with FaviconBuilder((rule, mock_without_header)) as builder: + with FaviconBuilder(mock_without_header, Mock(rule=rule)) as builder: builder.build() + builder.save() + + rule.refresh_from_db() self.assertEquals(rule.favicon, None) def test_weird_path(self): rule = CollectionRuleFactory(favicon=None) - with FaviconBuilder((rule, mock_with_weird_path)) as builder: + with FaviconBuilder(mock_with_weird_path, Mock(rule=rule)) as builder: builder.build() + builder.save() + + rule.refresh_from_db() self.assertEquals( rule.favicon, "https://www.theguardian.com/jabadaba/doe/favicon.ico" @@ -48,15 +62,21 @@ class FaviconBuilderTestCase(TestCase): def test_other_url(self): rule = CollectionRuleFactory(favicon=None) - with FaviconBuilder((rule, mock_with_other_url)) as builder: + with FaviconBuilder(mock_with_other_url, Mock(rule=rule)) as builder: builder.build() + builder.save() + + rule.refresh_from_db() self.assertEquals(rule.favicon, "https://www.theguardian.com/icon.png") def test_url_with_favicon_takes_precedence(self): rule = CollectionRuleFactory(favicon=None) - with FaviconBuilder((rule, mock_with_multiple_icons)) as builder: + with FaviconBuilder(mock_with_multiple_icons, Mock(rule=rule)) as builder: builder.build() + builder.save() + + rule.refresh_from_db() self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico") diff --git a/src/newsreader/news/collection/tests/favicon/client/tests.py b/src/newsreader/news/collection/tests/favicon/client/tests.py index 717ee0c..85b8fa3 100644 --- a/src/newsreader/news/collection/tests/favicon/client/tests.py +++ b/src/newsreader/news/collection/tests/favicon/client/tests.py @@ -1,4 +1,4 @@ -from unittest.mock import MagicMock +from unittest.mock import Mock from django.test import TestCase @@ -19,22 +19,22 @@ class FaviconClientTestCase(TestCase): def test_simple(self): rule = CollectionRuleFactory() - stream = MagicMock(url="https://www.bbc.com") + stream = Mock(url="https://www.bbc.com", rule=rule) stream.read.return_value = (simple_mock, stream) - with FaviconClient([(rule, stream)]) as client: - for rule, data in client: - self.assertEquals(rule.pk, rule.pk) - self.assertEquals(data, simple_mock) + with FaviconClient([stream]) as client: + for payload, stream in client: + self.assertEquals(stream.rule.pk, rule.pk) + self.assertEquals(payload, simple_mock) stream.read.assert_called_once_with() def test_client_catches_stream_exception(self): rule = CollectionRuleFactory(error=None, succeeded=True) - stream = MagicMock(url="https://www.bbc.com") + stream = Mock(url="https://www.bbc.com", rule=rule) stream.read.side_effect = StreamException - with FaviconClient([(rule, stream)]) as client: + with FaviconClient([stream]) as client: for rule, data in client: pass @@ -46,10 +46,10 @@ class FaviconClientTestCase(TestCase): def test_client_catches_stream_not_found_exception(self): rule = CollectionRuleFactory(error=None, succeeded=True) - stream = MagicMock(url="https://www.bbc.com") + stream = Mock(url="https://www.bbc.com", rule=rule) stream.read.side_effect = StreamNotFoundException - with FaviconClient([(rule, stream)]) as client: + with FaviconClient([stream]) as client: for rule, data in client: pass @@ -61,10 +61,10 @@ class FaviconClientTestCase(TestCase): def test_client_catches_stream_denied_exception(self): rule = CollectionRuleFactory(error=None, succeeded=True) - stream = MagicMock(url="https://www.bbc.com") + stream = Mock(url="https://www.bbc.com", rule=rule) stream.read.side_effect = StreamDeniedException - with FaviconClient([(rule, stream)]) as client: + with FaviconClient([stream]) as client: for rule, data in client: pass @@ -76,10 +76,10 @@ class FaviconClientTestCase(TestCase): def test_client_catches_stream_timed_out(self): rule = CollectionRuleFactory(error=None, succeeded=True) - stream = MagicMock(url="https://www.bbc.com") + stream = Mock(url="https://www.bbc.com", rule=rule) stream.read.side_effect = StreamTimeOutException - with FaviconClient([(rule, stream)]) as client: + with FaviconClient([stream]) as client: for rule, data in client: pass diff --git a/src/newsreader/news/collection/tests/favicon/collector/tests.py b/src/newsreader/news/collection/tests/favicon/collector/tests.py index 44254a5..cb73a7c 100644 --- a/src/newsreader/news/collection/tests/favicon/collector/tests.py +++ b/src/newsreader/news/collection/tests/favicon/collector/tests.py @@ -1,4 +1,4 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import Mock, patch from django.test import TestCase @@ -38,8 +38,8 @@ class FaviconCollectorTestCase(TestCase): def test_simple(self): rule = CollectionRuleFactory(succeeded=True, error=None) - self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] - self.mocked_website_read.return_value = (website_mock, MagicMock()) + self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))] + self.mocked_website_read.return_value = (website_mock, Mock(rule=rule)) collector = FaviconCollector() collector.collect() @@ -54,8 +54,11 @@ class FaviconCollectorTestCase(TestCase): def test_empty_stream(self): rule = CollectionRuleFactory(succeeded=True, error=None) - self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] - self.mocked_website_read.return_value = (BeautifulSoup("", "lxml"), MagicMock()) + self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))] + self.mocked_website_read.return_value = ( + BeautifulSoup("", "lxml"), + Mock(rule=rule), + ) collector = FaviconCollector() collector.collect() @@ -70,7 +73,7 @@ class FaviconCollectorTestCase(TestCase): def test_not_found(self): rule = CollectionRuleFactory(succeeded=True, error=None) - self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] + self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))] self.mocked_website_read.side_effect = StreamNotFoundException collector = FaviconCollector() @@ -86,7 +89,7 @@ class FaviconCollectorTestCase(TestCase): def test_denied(self): rule = CollectionRuleFactory(succeeded=True, error=None) - self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] + self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))] self.mocked_website_read.side_effect = StreamDeniedException collector = FaviconCollector() @@ -102,7 +105,7 @@ class FaviconCollectorTestCase(TestCase): def test_forbidden(self): rule = CollectionRuleFactory(succeeded=True, error=None) - self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] + self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))] self.mocked_website_read.side_effect = StreamForbiddenException collector = FaviconCollector() @@ -118,7 +121,7 @@ class FaviconCollectorTestCase(TestCase): def test_timed_out(self): rule = CollectionRuleFactory(succeeded=True, error=None) - self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] + self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))] self.mocked_website_read.side_effect = StreamTimeOutException collector = FaviconCollector() @@ -134,7 +137,7 @@ class FaviconCollectorTestCase(TestCase): def test_wrong_stream_content_type(self): rule = CollectionRuleFactory(succeeded=True, error=None) - self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))] + self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))] self.mocked_website_read.side_effect = StreamParseException collector = FaviconCollector() diff --git a/src/newsreader/news/collection/tests/feed/builder/tests.py b/src/newsreader/news/collection/tests/feed/builder/tests.py index 4a6eb69..571a7cd 100644 --- a/src/newsreader/news/collection/tests/feed/builder/tests.py +++ b/src/newsreader/news/collection/tests/feed/builder/tests.py @@ -1,5 +1,5 @@ from datetime import date, datetime, time -from unittest.mock import MagicMock +from unittest.mock import Mock from django.test import TestCase from django.utils import timezone @@ -24,9 +24,10 @@ class FeedBuilderTestCase(TestCase): def test_basic_entry(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((simple_mock, mock_stream)) as builder: + with builder(simple_mock, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() @@ -55,9 +56,10 @@ class FeedBuilderTestCase(TestCase): def test_multiple_entries(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((multiple_mock, mock_stream)) as builder: + with builder(multiple_mock, mock_stream) as builder: + builder.build() builder.save() posts = Post.objects.order_by("-publication_date") @@ -116,9 +118,10 @@ class FeedBuilderTestCase(TestCase): def test_entries_without_remote_identifier(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_without_identifier, mock_stream)) as builder: + with builder(mock_without_identifier, mock_stream) as builder: + builder.build() builder.save() posts = Post.objects.order_by("-publication_date") @@ -155,9 +158,10 @@ class FeedBuilderTestCase(TestCase): def test_entry_without_publication_date(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_without_publish_date, mock_stream)) as builder: + with builder(mock_without_publish_date, mock_stream) as builder: + builder.build() builder.save() posts = Post.objects.order_by("-publication_date") @@ -187,9 +191,10 @@ class FeedBuilderTestCase(TestCase): def test_entry_without_url(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_without_url, mock_stream)) as builder: + with builder(mock_without_url, mock_stream) as builder: + builder.build() builder.save() posts = Post.objects.order_by("-publication_date") @@ -213,9 +218,10 @@ class FeedBuilderTestCase(TestCase): def test_entry_without_body(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_without_body, mock_stream)) as builder: + with builder(mock_without_body, mock_stream) as builder: + builder.build() builder.save() posts = Post.objects.order_by("-publication_date") @@ -247,9 +253,10 @@ class FeedBuilderTestCase(TestCase): def test_entry_without_author(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_without_author, mock_stream)) as builder: + with builder(mock_without_author, mock_stream) as builder: + builder.build() builder.save() posts = Post.objects.order_by("-publication_date") @@ -275,9 +282,10 @@ class FeedBuilderTestCase(TestCase): def test_empty_entries(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_without_entries, mock_stream)) as builder: + with builder(mock_without_entries, mock_stream) as builder: + builder.build() builder.save() self.assertEquals(Post.objects.count(), 0) @@ -285,7 +293,7 @@ class FeedBuilderTestCase(TestCase): def test_update_entries(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) existing_first_post = FeedPostFactory.create( remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule @@ -295,7 +303,8 @@ class FeedBuilderTestCase(TestCase): remote_identifier="a5479c66-8fae-11e9-8422-00163ef6bee7", rule=rule ) - with builder((mock_with_update_entries, mock_stream)) as builder: + with builder(mock_with_update_entries, mock_stream) as builder: + builder.build() builder.save() self.assertEquals(Post.objects.count(), 3) @@ -315,9 +324,10 @@ class FeedBuilderTestCase(TestCase): def test_html_sanitizing(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_with_html, mock_stream)) as builder: + with builder(mock_with_html, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() @@ -337,9 +347,10 @@ class FeedBuilderTestCase(TestCase): def test_long_author_text_is_truncated(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_with_long_author, mock_stream)) as builder: + with builder(mock_with_long_author, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() @@ -351,9 +362,10 @@ class FeedBuilderTestCase(TestCase): def test_long_title_text_is_truncated(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_with_long_title, mock_stream)) as builder: + with builder(mock_with_long_title, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() @@ -366,9 +378,10 @@ class FeedBuilderTestCase(TestCase): def test_long_title_exotic_title(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_with_long_exotic_title, mock_stream)) as builder: + with builder(mock_with_long_exotic_title, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() @@ -381,9 +394,10 @@ class FeedBuilderTestCase(TestCase): def test_content_detail_is_prioritized_if_longer(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_with_longer_content_detail, mock_stream)) as builder: + with builder(mock_with_longer_content_detail, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() @@ -398,9 +412,10 @@ class FeedBuilderTestCase(TestCase): def test_content_detail_is_not_prioritized_if_shorter(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_with_shorter_content_detail, mock_stream)) as builder: + with builder(mock_with_shorter_content_detail, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() @@ -414,9 +429,10 @@ class FeedBuilderTestCase(TestCase): def test_content_detail_is_concatinated(self): builder = FeedBuilder rule = FeedFactory() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) - with builder((mock_with_multiple_content_detail, mock_stream)) as builder: + with builder(mock_with_multiple_content_detail, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() diff --git a/src/newsreader/news/collection/tests/feed/client/tests.py b/src/newsreader/news/collection/tests/feed/client/tests.py index 24eb214..9a2365e 100644 --- a/src/newsreader/news/collection/tests/feed/client/tests.py +++ b/src/newsreader/news/collection/tests/feed/client/tests.py @@ -1,4 +1,4 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import Mock, patch from django.test import TestCase from django.utils.lorem_ipsum import words @@ -28,7 +28,7 @@ class FeedClientTestCase(TestCase): def test_client_retrieves_single_rules(self): rule = FeedFactory.create() - mock_stream = MagicMock(rule=rule) + mock_stream = Mock(rule=rule) self.mocked_read.return_value = (simple_mock, mock_stream) diff --git a/src/newsreader/news/collection/tests/feed/collector/tests.py b/src/newsreader/news/collection/tests/feed/collector/tests.py index 5a1bac1..048d618 100644 --- a/src/newsreader/news/collection/tests/feed/collector/tests.py +++ b/src/newsreader/news/collection/tests/feed/collector/tests.py @@ -1,6 +1,6 @@ from datetime import date, datetime, time from time import struct_time -from unittest.mock import MagicMock, patch +from unittest.mock import Mock, patch from django.test import TestCase from django.utils import timezone @@ -56,7 +56,7 @@ class FeedCollectorTestCase(TestCase): @freeze_time("2019-10-30 12:30:00") def test_emtpy_batch(self): - self.mocked_fetch.return_value = MagicMock() + self.mocked_fetch.return_value = Mock() self.mocked_parse.return_value = empty_mock rule = FeedFactory() diff --git a/src/newsreader/news/collection/tests/feed/stream/tests.py b/src/newsreader/news/collection/tests/feed/stream/tests.py index 82a09a3..f827c15 100644 --- a/src/newsreader/news/collection/tests/feed/stream/tests.py +++ b/src/newsreader/news/collection/tests/feed/stream/tests.py @@ -1,4 +1,4 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import Mock, patch from django.test import TestCase @@ -27,7 +27,7 @@ class FeedStreamTestCase(TestCase): patch.stopall() def test_simple_stream(self): - self.mocked_fetch.return_value = MagicMock(content=simple_mock) + self.mocked_fetch.return_value = Mock(content=simple_mock) rule = FeedFactory() stream = FeedStream(rule) @@ -95,7 +95,7 @@ class FeedStreamTestCase(TestCase): @patch("newsreader.news.collection.feed.parse") def test_stream_raises_parse_exception(self, mocked_parse): - self.mocked_fetch.return_value = MagicMock() + self.mocked_fetch.return_value = Mock() mocked_parse.side_effect = TypeError rule = FeedFactory() diff --git a/src/newsreader/news/collection/tests/reddit/builder/tests.py b/src/newsreader/news/collection/tests/reddit/builder/tests.py index 9c1a046..11cf549 100644 --- a/src/newsreader/news/collection/tests/reddit/builder/tests.py +++ b/src/newsreader/news/collection/tests/reddit/builder/tests.py @@ -1,5 +1,5 @@ from datetime import datetime -from unittest.mock import MagicMock +from unittest.mock import Mock from django.test import TestCase @@ -20,9 +20,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((simple_mock, mock_stream)) as builder: + with builder(simple_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -65,9 +66,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((empty_mock, mock_stream)) as builder: + with builder(empty_mock, mock_stream) as builder: + builder.build() builder.save() self.assertEquals(Post.objects.count(), 0) @@ -76,9 +78,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((unknown_mock, mock_stream)) as builder: + with builder(unknown_mock, mock_stream) as builder: + builder.build() builder.save() self.assertEquals(Post.objects.count(), 0) @@ -95,9 +98,10 @@ class RedditBuilderTestCase(TestCase): ) builder = RedditBuilder - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((simple_mock, mock_stream)) as builder: + with builder(simple_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -132,9 +136,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((unsanitized_mock, mock_stream)) as builder: + with builder(unsanitized_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -149,9 +154,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((author_mock, mock_stream)) as builder: + with builder(author_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -166,9 +172,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((title_mock, mock_stream)) as builder: + with builder(title_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -186,9 +193,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((duplicate_mock, mock_stream)) as builder: + with builder(duplicate_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -200,13 +208,14 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) duplicate_post = RedditPostFactory( remote_identifier="hm0qct", rule=subreddit, title="foo" ) - with builder((simple_mock, mock_stream)) as builder: + with builder(simple_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -231,9 +240,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((image_mock, mock_stream)) as builder: + with builder(image_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -262,9 +272,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((external_image_mock, mock_stream)) as builder: + with builder(external_image_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -302,9 +313,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((video_mock, mock_stream)) as builder: + with builder(video_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -328,9 +340,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((external_video_mock, mock_stream)) as builder: + with builder(external_video_mock, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() @@ -354,9 +367,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((external_gifv_mock, mock_stream)) as builder: + with builder(external_gifv_mock, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get() @@ -376,9 +390,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((simple_mock, mock_stream)) as builder: + with builder(simple_mock, mock_stream) as builder: + builder.build() builder.save() post = Post.objects.get(remote_identifier="hngsj8") @@ -400,9 +415,10 @@ class RedditBuilderTestCase(TestCase): builder = RedditBuilder subreddit = SubredditFactory() - mock_stream = MagicMock(rule=subreddit) + mock_stream = Mock(rule=subreddit) - with builder((unknown_mock, mock_stream)) as builder: + with builder(unknown_mock, mock_stream) as builder: + builder.build() builder.save() self.assertEquals(Post.objects.count(), 0) diff --git a/src/newsreader/news/collection/tests/tests.py b/src/newsreader/news/collection/tests/tests.py index 363e0b5..c7f0bb0 100644 --- a/src/newsreader/news/collection/tests/tests.py +++ b/src/newsreader/news/collection/tests/tests.py @@ -1,10 +1,9 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import Mock, patch from django.test import TestCase from bs4 import BeautifulSoup -from newsreader.news.collection.base import URLBuilder, WebsiteStream from newsreader.news.collection.exceptions import ( StreamDeniedException, StreamException, @@ -13,6 +12,7 @@ from newsreader.news.collection.exceptions import ( StreamParseException, StreamTimeOutException, ) +from newsreader.news.collection.favicon import WebsiteStream, WebsiteURLBuilder from newsreader.news.collection.tests.factories import CollectionRuleFactory from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock @@ -20,117 +20,125 @@ from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock class WebsiteStreamTestCase(TestCase): def setUp(self): - self.patched_fetch = patch("newsreader.news.collection.base.fetch") + self.patched_fetch = patch("newsreader.news.collection.favicon.fetch") self.mocked_fetch = self.patched_fetch.start() def tearDown(self): patch.stopall() def test_simple(self): - self.mocked_fetch.return_value = MagicMock(content=simple_mock) + self.mocked_fetch.return_value = Mock(content=simple_mock) - rule = CollectionRuleFactory() - stream = WebsiteStream(rule.url) + rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/") + stream = WebsiteStream(rule) return_value = stream.read() - self.mocked_fetch.assert_called_once_with(rule.url) - self.assertEquals(return_value, (BeautifulSoup(simple_mock, "lxml"), stream)) + self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/") + self.assertEquals( + return_value, (BeautifulSoup(simple_mock, features="lxml"), stream) + ) def test_raises_exception(self): self.mocked_fetch.side_effect = StreamException - rule = CollectionRuleFactory() - stream = WebsiteStream(rule.url) + rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/") + stream = WebsiteStream(rule) with self.assertRaises(StreamException): stream.read() - self.mocked_fetch.assert_called_once_with(rule.url) + self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/") def test_raises_denied_exception(self): self.mocked_fetch.side_effect = StreamDeniedException - rule = CollectionRuleFactory() - stream = WebsiteStream(rule.url) + rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/") + stream = WebsiteStream(rule) with self.assertRaises(StreamDeniedException): stream.read() - self.mocked_fetch.assert_called_once_with(rule.url) + self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/") def test_raises_stream_not_found_exception(self): self.mocked_fetch.side_effect = StreamNotFoundException - rule = CollectionRuleFactory() - stream = WebsiteStream(rule.url) + rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/") + stream = WebsiteStream(rule) with self.assertRaises(StreamNotFoundException): stream.read() - self.mocked_fetch.assert_called_once_with(rule.url) + self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/") def test_stream_raises_time_out_exception(self): self.mocked_fetch.side_effect = StreamTimeOutException - rule = CollectionRuleFactory() - stream = WebsiteStream(rule.url) + rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/") + stream = WebsiteStream(rule) with self.assertRaises(StreamTimeOutException): stream.read() - self.mocked_fetch.assert_called_once_with(rule.url) + self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/") def test_stream_raises_forbidden_exception(self): self.mocked_fetch.side_effect = StreamForbiddenException - rule = CollectionRuleFactory() - stream = WebsiteStream(rule.url) + rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/") + stream = WebsiteStream(rule) with self.assertRaises(StreamForbiddenException): stream.read() - self.mocked_fetch.assert_called_once_with(rule.url) + self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/") - @patch("newsreader.news.collection.base.WebsiteStream.parse") + @patch("newsreader.news.collection.favicon.WebsiteStream.parse") def test_stream_raises_parse_exception(self, mocked_parse): - self.mocked_fetch.return_value = MagicMock() + self.mocked_fetch.return_value = Mock() mocked_parse.side_effect = StreamParseException - rule = CollectionRuleFactory() - stream = WebsiteStream(rule.url) + rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/") + stream = WebsiteStream(rule) with self.assertRaises(StreamParseException): stream.read() - self.mocked_fetch.assert_called_once_with(rule.url) + self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/") -class URLBuilderTestCase(TestCase): +class WebsiteURLBuilderTestCase(TestCase): def test_simple(self): initial_rule = CollectionRuleFactory() - with URLBuilder((simple_feed_mock, MagicMock(rule=initial_rule))) as builder: - rule, url = builder.build() + with WebsiteURLBuilder(simple_feed_mock, Mock(rule=initial_rule)) as builder: + builder.build() + builder.save() - self.assertEquals(rule.pk, initial_rule.pk) - self.assertEquals(url, "https://www.bbc.co.uk/news/") + initial_rule.refresh_from_db() + + self.assertEquals(initial_rule.website_url, "https://www.bbc.co.uk/news/") def test_no_link(self): - initial_rule = CollectionRuleFactory() + initial_rule = CollectionRuleFactory(website_url=None) - with URLBuilder( - (feed_mock_without_link, MagicMock(rule=initial_rule)) + with WebsiteURLBuilder( + feed_mock_without_link, Mock(rule=initial_rule) ) as builder: - rule, url = builder.build() + builder.build() + builder.save() - self.assertEquals(rule.pk, initial_rule.pk) - self.assertEquals(url, None) + initial_rule.refresh_from_db() + + self.assertEquals(initial_rule.website_url, None) def test_no_data(self): - initial_rule = CollectionRuleFactory() + initial_rule = CollectionRuleFactory(website_url=None) - with URLBuilder((None, MagicMock(rule=initial_rule))) as builder: - rule, url = builder.build() + with WebsiteURLBuilder(None, Mock(rule=initial_rule)) as builder: + builder.build() + builder.save() - self.assertEquals(rule.pk, initial_rule.pk) - self.assertEquals(url, None) + initial_rule.refresh_from_db() + + self.assertEquals(initial_rule.website_url, None) diff --git a/src/newsreader/news/collection/tests/twitter/builder/tests.py b/src/newsreader/news/collection/tests/twitter/builder/tests.py index 19fdce3..8e08869 100644 --- a/src/newsreader/news/collection/tests/twitter/builder/tests.py +++ b/src/newsreader/news/collection/tests/twitter/builder/tests.py @@ -34,7 +34,8 @@ class TwitterBuilderTestCase(TestCase): profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) - with builder((simple_mock, mock_stream)) as builder: + with builder(simple_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -83,7 +84,8 @@ class TwitterBuilderTestCase(TestCase): profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) - with builder((image_mock, mock_stream)) as builder: + with builder(image_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -123,7 +125,8 @@ class TwitterBuilderTestCase(TestCase): profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) - with builder((video_mock, mock_stream)) as builder: + with builder(video_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -165,7 +168,8 @@ class TwitterBuilderTestCase(TestCase): profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) - with builder((video_without_bitrate_mock, mock_stream)) as builder: + with builder(video_without_bitrate_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -186,7 +190,8 @@ class TwitterBuilderTestCase(TestCase): profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) - with builder((gif_mock, mock_stream)) as builder: + with builder(gif_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -211,7 +216,8 @@ class TwitterBuilderTestCase(TestCase): profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) - with builder((retweet_mock, mock_stream)) as builder: + with builder(retweet_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -246,7 +252,8 @@ class TwitterBuilderTestCase(TestCase): profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) - with builder((quoted_mock, mock_stream)) as builder: + with builder(quoted_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} @@ -276,7 +283,8 @@ class TwitterBuilderTestCase(TestCase): profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) - with builder(([], mock_stream)) as builder: + with builder([], mock_stream) as builder: + builder.build() builder.save() self.assertEquals(Post.objects.count(), 0) @@ -287,7 +295,8 @@ class TwitterBuilderTestCase(TestCase): profile = TwitterProfileFactory(screen_name="RobertsSpaceInd") mock_stream = MagicMock(rule=profile) - with builder((unsanitized_mock, mock_stream)) as builder: + with builder(unsanitized_mock, mock_stream) as builder: + builder.build() builder.save() posts = {post.remote_identifier: post for post in Post.objects.all()} diff --git a/src/newsreader/news/collection/twitter.py b/src/newsreader/news/collection/twitter.py index b0f08cc..3d1c54b 100644 --- a/src/newsreader/news/collection/twitter.py +++ b/src/newsreader/news/collection/twitter.py @@ -8,7 +8,12 @@ import pytz from ftfy import fix_text -from newsreader.news.collection.base import Builder, Client, Collector, Stream +from newsreader.news.collection.base import ( + PostBuilder, + PostClient, + PostCollector, + PostStream, +) from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices from newsreader.news.collection.utils import truncate_text from newsreader.news.core.models import Post @@ -20,25 +25,14 @@ TWITTER_URL = "https://twitter.com" TWITTER_API_URL = "https://api.twitter.com/1.1" -class TwitterScheduler: - pass +class TwitterBuilder(PostBuilder): + rule_type = RuleTypeChoices.twitter - -class TwitterBuilder(Builder): - rule__type = RuleTypeChoices.twitter - - def create_posts(self, stream): - data, stream = stream - - if not data: - return - - self.instances = self.build(data, stream.rule) - - def build(self, posts, rule): + def build(self): results = {} + rule = self.stream.rule - for post in posts: + for post in self.payload: remote_identifier = post["id_str"] url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}" @@ -83,7 +77,7 @@ class TwitterBuilder(Builder): results[remote_identifier] = Post(**data) - return results.values() + self.instances = results.values() def get_media_entities(self, post): media_entities = post["extended_entities"]["media"] @@ -133,13 +127,17 @@ class TwitterBuilder(Builder): return formatted_entities -class TwitterStream(Stream): +class TwitterStream(PostStream): pass -class TwitterClient(Client): +class TwitterClient(PostClient): pass -class TwitterCollector(Collector): +class TwitterCollector(PostCollector): + pass + + +class TwitterScheduler: pass