Alot of plumbing
This commit is contained in:
parent
6be8862a7d
commit
150c492628
16 changed files with 462 additions and 387 deletions
|
|
@ -1,13 +1,10 @@
|
||||||
import bleach
|
import bleach
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
from newsreader.news.collection.constants import (
|
from newsreader.news.collection.constants import (
|
||||||
WHITELISTED_ATTRIBUTES,
|
WHITELISTED_ATTRIBUTES,
|
||||||
WHITELISTED_TAGS,
|
WHITELISTED_TAGS,
|
||||||
)
|
)
|
||||||
from newsreader.news.collection.exceptions import StreamParseException
|
from newsreader.news.collection.models import CollectionRule
|
||||||
from newsreader.news.collection.utils import fetch
|
|
||||||
from newsreader.news.core.models import Post
|
from newsreader.news.core.models import Post
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -33,7 +30,7 @@ class Stream:
|
||||||
|
|
||||||
class Client:
|
class Client:
|
||||||
"""
|
"""
|
||||||
Retrieves the data with streams
|
Retrieves the data through streams
|
||||||
"""
|
"""
|
||||||
|
|
||||||
stream = Stream
|
stream = Stream
|
||||||
|
|
@ -56,33 +53,24 @@ class Client:
|
||||||
|
|
||||||
class Builder:
|
class Builder:
|
||||||
"""
|
"""
|
||||||
Creates the collected posts
|
Builds instances of various types
|
||||||
"""
|
"""
|
||||||
|
|
||||||
instances = []
|
instances = []
|
||||||
stream = None
|
stream = None
|
||||||
rule_type = None
|
payload = None
|
||||||
|
|
||||||
def __init__(self, stream):
|
def __init__(self, payload, stream):
|
||||||
|
self.payload = payload
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
_, stream = self.stream
|
|
||||||
|
|
||||||
self.instances = []
|
|
||||||
self.existing_posts = {
|
|
||||||
post.remote_identifier: post
|
|
||||||
for post in Post.objects.filter(rule=stream.rule, rule__type=self.rule_type)
|
|
||||||
}
|
|
||||||
|
|
||||||
self.create_posts(self.stream)
|
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __exit__(self, *args, **kwargs):
|
def __exit__(self, *args, **kwargs):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def create_posts(self, stream):
|
def build(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def sanitize_fragment(self, fragment):
|
def sanitize_fragment(self, fragment):
|
||||||
|
|
@ -97,10 +85,6 @@ class Builder:
|
||||||
strip_comments=True,
|
strip_comments=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def save(self):
|
|
||||||
for post in self.instances:
|
|
||||||
post.save()
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
|
|
||||||
|
|
@ -118,46 +102,59 @@ class Collector:
|
||||||
self.builder = builder if builder else self.builder
|
self.builder = builder if builder else self.builder
|
||||||
|
|
||||||
def collect(self, rules=None):
|
def collect(self, rules=None):
|
||||||
with self.client(rules=rules) as client:
|
raise NotImplementedError
|
||||||
for data, stream in client:
|
|
||||||
with self.builder((data, stream)) as builder:
|
|
||||||
builder.save()
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
|
|
||||||
|
|
||||||
class WebsiteStream(Stream):
|
class PostBuilder(Builder):
|
||||||
def __init__(self, url):
|
rule_type = None
|
||||||
self.url = url
|
|
||||||
|
|
||||||
def read(self):
|
|
||||||
response = fetch(self.url)
|
|
||||||
|
|
||||||
return (self.parse(response.content), self)
|
|
||||||
|
|
||||||
def parse(self, payload):
|
|
||||||
try:
|
|
||||||
return BeautifulSoup(payload, "lxml")
|
|
||||||
except TypeError:
|
|
||||||
raise StreamParseException("Could not parse given HTML")
|
|
||||||
|
|
||||||
|
|
||||||
class URLBuilder(Builder):
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
return self
|
self.existing_posts = {
|
||||||
|
post.remote_identifier: post
|
||||||
|
for post in Post.objects.filter(
|
||||||
|
rule=self.stream.rule, rule__type=self.rule_type
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
def build(self):
|
return super().__enter__()
|
||||||
data, stream = self.stream
|
|
||||||
rule = stream.rule
|
|
||||||
|
|
||||||
try:
|
def save(self):
|
||||||
url = data["feed"]["link"]
|
for post in self.instances:
|
||||||
except (KeyError, TypeError):
|
post.save()
|
||||||
url = None
|
|
||||||
|
|
||||||
if url:
|
class Meta:
|
||||||
rule.website_url = url
|
abstract = True
|
||||||
rule.save()
|
|
||||||
|
|
||||||
return rule, url
|
|
||||||
|
class PostStream(Stream):
|
||||||
|
rule_type = None
|
||||||
|
|
||||||
|
|
||||||
|
class PostClient(Client):
|
||||||
|
stream = PostStream
|
||||||
|
|
||||||
|
def __init__(self, rules=[]):
|
||||||
|
if rules:
|
||||||
|
self.rules = rules
|
||||||
|
else:
|
||||||
|
self.rules = CollectionRule.objects.enabled().filter(
|
||||||
|
type=self.stream.rule_type
|
||||||
|
)
|
||||||
|
|
||||||
|
def set_rule_error(self, rule, exception):
|
||||||
|
length = rule._meta.get_field("error").max_length
|
||||||
|
|
||||||
|
rule.error = exception.message[-length:]
|
||||||
|
rule.succeeded = False
|
||||||
|
|
||||||
|
|
||||||
|
class PostCollector(Collector):
|
||||||
|
def collect(self, rules=None):
|
||||||
|
with self.client(rules=rules) as client:
|
||||||
|
for payload, stream in client:
|
||||||
|
with self.builder(payload, stream) as builder:
|
||||||
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,6 @@ class RuleTypeChoices(TextChoices):
|
||||||
|
|
||||||
|
|
||||||
class TwitterPostTypeChoices(TextChoices):
|
class TwitterPostTypeChoices(TextChoices):
|
||||||
photo = "photo", _("Poto")
|
photo = "photo", _("Photo")
|
||||||
video = "video", _("Video")
|
video = "video", _("Video")
|
||||||
animated_gif = "animated_gif", _("GIF")
|
animated_gif = "animated_gif", _("GIF")
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,12 @@
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from urllib.parse import urljoin, urlparse
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
from newsreader.news.collection.base import (
|
from bs4 import BeautifulSoup
|
||||||
Builder,
|
|
||||||
Client,
|
from newsreader.news.collection.base import Builder, Client, Collector, Stream
|
||||||
Collector,
|
from newsreader.news.collection.exceptions import StreamException, StreamParseException
|
||||||
Stream,
|
|
||||||
URLBuilder,
|
|
||||||
WebsiteStream,
|
|
||||||
)
|
|
||||||
from newsreader.news.collection.exceptions import StreamException
|
|
||||||
from newsreader.news.collection.feed import FeedClient
|
from newsreader.news.collection.feed import FeedClient
|
||||||
|
from newsreader.news.collection.utils import fetch
|
||||||
|
|
||||||
|
|
||||||
LINK_RELS = [
|
LINK_RELS = [
|
||||||
|
|
@ -21,17 +17,45 @@ LINK_RELS = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class WebsiteStream(Stream):
|
||||||
|
def read(self):
|
||||||
|
response = fetch(self.rule.website_url)
|
||||||
|
|
||||||
|
return self.parse(response.content), self
|
||||||
|
|
||||||
|
def parse(self, payload):
|
||||||
|
try:
|
||||||
|
return BeautifulSoup(payload, features="lxml")
|
||||||
|
except TypeError:
|
||||||
|
raise StreamParseException("Could not parse given HTML")
|
||||||
|
|
||||||
|
|
||||||
|
class WebsiteURLBuilder(Builder):
|
||||||
|
def build(self):
|
||||||
|
try:
|
||||||
|
url = self.payload["feed"]["link"]
|
||||||
|
except (KeyError, TypeError):
|
||||||
|
url = None
|
||||||
|
|
||||||
|
self.instances = [(self.stream, url)] if url else []
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
for stream, url in self.instances:
|
||||||
|
stream.rule.website_url = url
|
||||||
|
stream.rule.save()
|
||||||
|
|
||||||
|
|
||||||
class FaviconBuilder(Builder):
|
class FaviconBuilder(Builder):
|
||||||
def build(self):
|
def build(self):
|
||||||
rule, soup = self.stream
|
rule = self.stream.rule
|
||||||
|
|
||||||
url = self.parse(soup, rule.website_url)
|
url = self.parse()
|
||||||
|
|
||||||
if url:
|
self.instances = [(rule, url)] if url else []
|
||||||
rule.favicon = url
|
|
||||||
rule.save()
|
def parse(self):
|
||||||
|
soup = self.payload
|
||||||
|
|
||||||
def parse(self, soup, website_url):
|
|
||||||
if not soup.head:
|
if not soup.head:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -44,9 +68,9 @@ class FaviconBuilder(Builder):
|
||||||
parsed_url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
|
|
||||||
if not parsed_url.scheme and not parsed_url.netloc:
|
if not parsed_url.scheme and not parsed_url.netloc:
|
||||||
if not website_url:
|
if not self.stream.rule.website_url:
|
||||||
return
|
return
|
||||||
return urljoin(website_url, url)
|
return urljoin(self.stream.rule.website_url, url)
|
||||||
elif not parsed_url.scheme:
|
elif not parsed_url.scheme:
|
||||||
return urljoin(f"https://{parsed_url.netloc}", parsed_url.path)
|
return urljoin(f"https://{parsed_url.netloc}", parsed_url.path)
|
||||||
|
|
||||||
|
|
@ -73,6 +97,11 @@ class FaviconBuilder(Builder):
|
||||||
elif icons:
|
elif icons:
|
||||||
return icons.pop()
|
return icons.pop()
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
for rule, favicon_url in self.instances:
|
||||||
|
rule.favicon = favicon_url
|
||||||
|
rule.save()
|
||||||
|
|
||||||
|
|
||||||
class FaviconClient(Client):
|
class FaviconClient(Client):
|
||||||
stream = WebsiteStream
|
stream = WebsiteStream
|
||||||
|
|
@ -82,39 +111,35 @@ class FaviconClient(Client):
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
futures = {
|
futures = [executor.submit(stream.read) for stream in self.streams]
|
||||||
executor.submit(stream.read): rule for rule, stream in self.streams
|
|
||||||
}
|
|
||||||
|
|
||||||
for future in as_completed(futures):
|
for future in as_completed(futures):
|
||||||
rule = futures[future]
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response_data, stream = future.result()
|
payload, stream = future.result()
|
||||||
except StreamException:
|
except StreamException:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield (rule, response_data)
|
yield payload, stream
|
||||||
|
|
||||||
|
|
||||||
class FaviconCollector(Collector):
|
class FaviconCollector(Collector):
|
||||||
feed_client, favicon_client = (FeedClient, FaviconClient)
|
feed_client, favicon_client = (FeedClient, FaviconClient)
|
||||||
url_builder, favicon_builder = (URLBuilder, FaviconBuilder)
|
url_builder, favicon_builder = (WebsiteURLBuilder, FaviconBuilder)
|
||||||
|
|
||||||
def collect(self, rules=None):
|
def collect(self, rules=None):
|
||||||
streams = []
|
streams = []
|
||||||
|
|
||||||
with self.feed_client(rules=rules) as client:
|
with self.feed_client(rules=rules) as client:
|
||||||
for data, stream in client:
|
for payload, stream in client:
|
||||||
with self.url_builder((data, stream)) as builder:
|
with self.url_builder(payload, stream) as builder:
|
||||||
rule, url = builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
if not url:
|
if builder.instances:
|
||||||
continue
|
streams.append(WebsiteStream(stream.rule))
|
||||||
|
|
||||||
streams.append((rule, WebsiteStream(url)))
|
|
||||||
|
|
||||||
with self.favicon_client(streams) as client:
|
with self.favicon_client(streams) as client:
|
||||||
for rule, data in client:
|
for payload, stream in client:
|
||||||
with self.favicon_builder((rule, data)) as builder:
|
with self.favicon_builder(payload, stream) as builder:
|
||||||
builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,12 @@ import pytz
|
||||||
|
|
||||||
from feedparser import parse
|
from feedparser import parse
|
||||||
|
|
||||||
from newsreader.news.collection.base import Builder, Client, Collector, Stream
|
from newsreader.news.collection.base import (
|
||||||
|
PostBuilder,
|
||||||
|
PostClient,
|
||||||
|
PostCollector,
|
||||||
|
PostStream,
|
||||||
|
)
|
||||||
from newsreader.news.collection.choices import RuleTypeChoices
|
from newsreader.news.collection.choices import RuleTypeChoices
|
||||||
from newsreader.news.collection.exceptions import (
|
from newsreader.news.collection.exceptions import (
|
||||||
StreamDeniedException,
|
StreamDeniedException,
|
||||||
|
|
@ -19,7 +24,6 @@ from newsreader.news.collection.exceptions import (
|
||||||
StreamParseException,
|
StreamParseException,
|
||||||
StreamTimeOutException,
|
StreamTimeOutException,
|
||||||
)
|
)
|
||||||
from newsreader.news.collection.models import CollectionRule
|
|
||||||
from newsreader.news.collection.utils import (
|
from newsreader.news.collection.utils import (
|
||||||
build_publication_date,
|
build_publication_date,
|
||||||
fetch,
|
fetch,
|
||||||
|
|
@ -31,19 +35,10 @@ from newsreader.news.core.models import Post
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class FeedBuilder(Builder):
|
class FeedBuilder(PostBuilder):
|
||||||
rule__type = RuleTypeChoices.feed
|
rule__type = RuleTypeChoices.feed
|
||||||
|
|
||||||
def create_posts(self, stream):
|
def build(self):
|
||||||
data, stream = stream
|
|
||||||
|
|
||||||
with FeedDuplicateHandler(stream.rule) as duplicate_handler:
|
|
||||||
entries = data.get("entries", [])
|
|
||||||
|
|
||||||
instances = self.build(entries, stream.rule)
|
|
||||||
self.instances = duplicate_handler.check(instances)
|
|
||||||
|
|
||||||
def build(self, entries, rule):
|
|
||||||
field_mapping = {
|
field_mapping = {
|
||||||
"id": "remote_identifier",
|
"id": "remote_identifier",
|
||||||
"title": "title",
|
"title": "title",
|
||||||
|
|
@ -52,11 +47,14 @@ class FeedBuilder(Builder):
|
||||||
"published_parsed": "publication_date",
|
"published_parsed": "publication_date",
|
||||||
"author": "author",
|
"author": "author",
|
||||||
}
|
}
|
||||||
|
tz = pytz.timezone(self.stream.rule.timezone)
|
||||||
|
instances = []
|
||||||
|
|
||||||
tz = pytz.timezone(rule.timezone)
|
with FeedDuplicateHandler(self.stream.rule) as duplicate_handler:
|
||||||
|
entries = self.payload.get("entries", [])
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
data = {"rule_id": rule.pk}
|
data = {"rule_id": self.stream.rule.pk}
|
||||||
|
|
||||||
for field, model_field in field_mapping.items():
|
for field, model_field in field_mapping.items():
|
||||||
if not field in entry:
|
if not field in entry:
|
||||||
|
|
@ -78,14 +76,18 @@ class FeedBuilder(Builder):
|
||||||
if not body or len(body) < len(content):
|
if not body or len(body) < len(content):
|
||||||
data["body"] = content
|
data["body"] = content
|
||||||
|
|
||||||
yield Post(**data)
|
instances.append(Post(**data))
|
||||||
|
|
||||||
|
self.instances = duplicate_handler.check(instances)
|
||||||
|
|
||||||
def get_content(self, items):
|
def get_content(self, items):
|
||||||
content = "\n ".join([item.get("value") for item in items])
|
content = "\n ".join([item.get("value") for item in items])
|
||||||
return self.sanitize_fragment(content)
|
return self.sanitize_fragment(content)
|
||||||
|
|
||||||
|
|
||||||
class FeedStream(Stream):
|
class FeedStream(PostStream):
|
||||||
|
rule_type = RuleTypeChoices.feed
|
||||||
|
|
||||||
def read(self):
|
def read(self):
|
||||||
response = fetch(self.rule.url)
|
response = fetch(self.rule.url)
|
||||||
|
|
||||||
|
|
@ -99,17 +101,9 @@ class FeedStream(Stream):
|
||||||
raise StreamParseException(response=response, message=message) from e
|
raise StreamParseException(response=response, message=message) from e
|
||||||
|
|
||||||
|
|
||||||
class FeedClient(Client):
|
class FeedClient(PostClient):
|
||||||
stream = FeedStream
|
stream = FeedStream
|
||||||
|
|
||||||
def __init__(self, rules=[]):
|
|
||||||
if rules:
|
|
||||||
self.rules = rules
|
|
||||||
else:
|
|
||||||
self.rules = CollectionRule.objects.filter(
|
|
||||||
enabled=True, type=RuleTypeChoices.feed
|
|
||||||
)
|
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
streams = [self.stream(rule) for rule in self.rules]
|
streams = [self.stream(rule) for rule in self.rules]
|
||||||
|
|
||||||
|
|
@ -120,13 +114,13 @@ class FeedClient(Client):
|
||||||
stream = futures[future]
|
stream = futures[future]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response_data = future.result()
|
payload = future.result()
|
||||||
|
|
||||||
stream.rule.error = None
|
stream.rule.error = None
|
||||||
stream.rule.succeeded = True
|
stream.rule.succeeded = True
|
||||||
stream.rule.last_suceeded = timezone.now()
|
stream.rule.last_suceeded = timezone.now()
|
||||||
|
|
||||||
yield response_data
|
yield payload
|
||||||
except (StreamNotFoundException, StreamTimeOutException) as e:
|
except (StreamNotFoundException, StreamTimeOutException) as e:
|
||||||
logger.warning(f"Request failed for {stream.rule.url}")
|
logger.warning(f"Request failed for {stream.rule.url}")
|
||||||
|
|
||||||
|
|
@ -142,14 +136,8 @@ class FeedClient(Client):
|
||||||
finally:
|
finally:
|
||||||
stream.rule.save()
|
stream.rule.save()
|
||||||
|
|
||||||
def set_rule_error(self, rule, exception):
|
|
||||||
length = rule._meta.get_field("error").max_length
|
|
||||||
|
|
||||||
rule.error = exception.message[-length:]
|
class FeedCollector(PostCollector):
|
||||||
rule.succeeded = False
|
|
||||||
|
|
||||||
|
|
||||||
class FeedCollector(Collector):
|
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
client = FeedClient
|
client = FeedClient
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,12 @@ from django.utils.html import format_html
|
||||||
import pytz
|
import pytz
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from newsreader.news.collection.base import Builder, Client, Collector, Stream
|
from newsreader.news.collection.base import (
|
||||||
|
PostBuilder,
|
||||||
|
PostClient,
|
||||||
|
PostCollector,
|
||||||
|
PostStream,
|
||||||
|
)
|
||||||
from newsreader.news.collection.choices import RuleTypeChoices
|
from newsreader.news.collection.choices import RuleTypeChoices
|
||||||
from newsreader.news.collection.constants import (
|
from newsreader.news.collection.constants import (
|
||||||
WHITELISTED_ATTRIBUTES,
|
WHITELISTED_ATTRIBUTES,
|
||||||
|
|
@ -92,21 +97,17 @@ def get_reddit_access_token(code, user):
|
||||||
return response_data["access_token"], response_data["refresh_token"]
|
return response_data["access_token"], response_data["refresh_token"]
|
||||||
|
|
||||||
|
|
||||||
class RedditBuilder(Builder):
|
class RedditBuilder(PostBuilder):
|
||||||
rule__type = RuleTypeChoices.subreddit
|
rule_type = RuleTypeChoices.subreddit
|
||||||
|
|
||||||
def create_posts(self, stream):
|
def build(self):
|
||||||
data, stream = stream
|
results = {}
|
||||||
posts = []
|
|
||||||
|
|
||||||
if not "data" in data or not "children" in data["data"]:
|
if not "data" in self.payload or not "children" in self.payload["data"]:
|
||||||
return
|
return
|
||||||
|
|
||||||
posts = data["data"]["children"]
|
posts = self.payload["data"]["children"]
|
||||||
self.instances = self.build(posts, stream.rule)
|
rule = self.stream.rule
|
||||||
|
|
||||||
def build(self, posts, rule):
|
|
||||||
results = {}
|
|
||||||
|
|
||||||
for post in posts:
|
for post in posts:
|
||||||
if not "data" in post or post["kind"] != REDDIT_POST:
|
if not "data" in post or post["kind"] != REDDIT_POST:
|
||||||
|
|
@ -170,7 +171,9 @@ class RedditBuilder(Builder):
|
||||||
parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
|
parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
|
||||||
created_date = pytz.utc.localize(parsed_date)
|
created_date = pytz.utc.localize(parsed_date)
|
||||||
except (OverflowError, OSError):
|
except (OverflowError, OSError):
|
||||||
logging.warning(f"Failed parsing timestamp from {url_fragment}")
|
logging.warning(
|
||||||
|
f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}"
|
||||||
|
)
|
||||||
created_date = timezone.now()
|
created_date = timezone.now()
|
||||||
|
|
||||||
post_data = {
|
post_data = {
|
||||||
|
|
@ -194,52 +197,11 @@ class RedditBuilder(Builder):
|
||||||
|
|
||||||
results[remote_identifier] = Post(**post_data)
|
results[remote_identifier] = Post(**post_data)
|
||||||
|
|
||||||
return results.values()
|
self.instances = results.values()
|
||||||
|
|
||||||
|
|
||||||
class RedditScheduler:
|
class RedditStream(PostStream):
|
||||||
max_amount = RATE_LIMIT
|
rule_type = RuleTypeChoices.subreddit
|
||||||
max_user_amount = RATE_LIMIT / 4
|
|
||||||
|
|
||||||
def __init__(self, subreddits=[]):
|
|
||||||
if not subreddits:
|
|
||||||
self.subreddits = CollectionRule.objects.filter(
|
|
||||||
type=RuleTypeChoices.subreddit,
|
|
||||||
user__reddit_access_token__isnull=False,
|
|
||||||
user__reddit_refresh_token__isnull=False,
|
|
||||||
enabled=True,
|
|
||||||
).order_by("last_suceeded")[:200]
|
|
||||||
else:
|
|
||||||
self.subreddits = subreddits
|
|
||||||
|
|
||||||
def get_scheduled_rules(self):
|
|
||||||
rule_mapping = {}
|
|
||||||
current_amount = 0
|
|
||||||
|
|
||||||
for subreddit in self.subreddits:
|
|
||||||
user_pk = subreddit.user.pk
|
|
||||||
|
|
||||||
if current_amount == self.max_amount:
|
|
||||||
break
|
|
||||||
|
|
||||||
if user_pk in rule_mapping:
|
|
||||||
max_amount_reached = len(rule_mapping[user_pk]) == self.max_user_amount
|
|
||||||
|
|
||||||
if max_amount_reached:
|
|
||||||
continue
|
|
||||||
|
|
||||||
rule_mapping[user_pk].append(subreddit)
|
|
||||||
current_amount += 1
|
|
||||||
|
|
||||||
continue
|
|
||||||
|
|
||||||
rule_mapping[user_pk] = [subreddit]
|
|
||||||
current_amount += 1
|
|
||||||
|
|
||||||
return list(rule_mapping.values())
|
|
||||||
|
|
||||||
|
|
||||||
class RedditStream(Stream):
|
|
||||||
headers = {}
|
headers = {}
|
||||||
user = None
|
user = None
|
||||||
|
|
||||||
|
|
@ -261,16 +223,13 @@ class RedditStream(Stream):
|
||||||
return response.json()
|
return response.json()
|
||||||
except JSONDecodeError as e:
|
except JSONDecodeError as e:
|
||||||
raise StreamParseException(
|
raise StreamParseException(
|
||||||
response=response, message=f"Failed parsing json"
|
response=response, message="Failed parsing json"
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
|
|
||||||
class RedditClient(Client):
|
class RedditClient(PostClient):
|
||||||
stream = RedditStream
|
stream = RedditStream
|
||||||
|
|
||||||
def __init__(self, rules=[]):
|
|
||||||
self.rules = rules
|
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
streams = [[self.stream(rule) for rule in batch] for batch in self.rules]
|
streams = [[self.stream(rule) for rule in batch] for batch in self.rules]
|
||||||
rate_limitted = False
|
rate_limitted = False
|
||||||
|
|
@ -324,13 +283,49 @@ class RedditClient(Client):
|
||||||
finally:
|
finally:
|
||||||
stream.rule.save()
|
stream.rule.save()
|
||||||
|
|
||||||
def set_rule_error(self, rule, exception):
|
|
||||||
length = rule._meta.get_field("error").max_length
|
|
||||||
|
|
||||||
rule.error = exception.message[-length:]
|
class RedditCollector(PostCollector):
|
||||||
rule.succeeded = False
|
|
||||||
|
|
||||||
|
|
||||||
class RedditCollector(Collector):
|
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
client = RedditClient
|
client = RedditClient
|
||||||
|
|
||||||
|
|
||||||
|
class RedditScheduler:
|
||||||
|
max_amount = RATE_LIMIT
|
||||||
|
max_user_amount = RATE_LIMIT / 4
|
||||||
|
|
||||||
|
def __init__(self, subreddits=[]):
|
||||||
|
if not subreddits:
|
||||||
|
self.subreddits = CollectionRule.objects.filter(
|
||||||
|
type=RuleTypeChoices.subreddit,
|
||||||
|
user__reddit_access_token__isnull=False,
|
||||||
|
user__reddit_refresh_token__isnull=False,
|
||||||
|
enabled=True,
|
||||||
|
).order_by("last_suceeded")[:200]
|
||||||
|
else:
|
||||||
|
self.subreddits = subreddits
|
||||||
|
|
||||||
|
def get_scheduled_rules(self):
|
||||||
|
rule_mapping = {}
|
||||||
|
current_amount = 0
|
||||||
|
|
||||||
|
for subreddit in self.subreddits:
|
||||||
|
user_pk = subreddit.user.pk
|
||||||
|
|
||||||
|
if current_amount == self.max_amount:
|
||||||
|
break
|
||||||
|
|
||||||
|
if user_pk in rule_mapping:
|
||||||
|
max_amount_reached = len(rule_mapping[user_pk]) == self.max_user_amount
|
||||||
|
|
||||||
|
if max_amount_reached:
|
||||||
|
continue
|
||||||
|
|
||||||
|
rule_mapping[user_pk].append(subreddit)
|
||||||
|
current_amount += 1
|
||||||
|
|
||||||
|
continue
|
||||||
|
|
||||||
|
rule_mapping[user_pk] = [subreddit]
|
||||||
|
current_amount += 1
|
||||||
|
|
||||||
|
return list(rule_mapping.values())
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
from newsreader.news.collection.favicon import FaviconBuilder
|
from newsreader.news.collection.favicon import FaviconBuilder
|
||||||
|
|
@ -12,8 +14,11 @@ class FaviconBuilderTestCase(TestCase):
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
rule = CollectionRuleFactory(favicon=None)
|
rule = CollectionRuleFactory(favicon=None)
|
||||||
|
|
||||||
with FaviconBuilder((rule, simple_mock)) as builder:
|
with FaviconBuilder(simple_mock, Mock(rule=rule)) as builder:
|
||||||
builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
rule.refresh_from_db()
|
||||||
|
|
||||||
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")
|
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")
|
||||||
|
|
||||||
|
|
@ -22,24 +27,33 @@ class FaviconBuilderTestCase(TestCase):
|
||||||
website_url="https://www.theguardian.com/", favicon=None
|
website_url="https://www.theguardian.com/", favicon=None
|
||||||
)
|
)
|
||||||
|
|
||||||
with FaviconBuilder((rule, mock_without_url)) as builder:
|
with FaviconBuilder(mock_without_url, Mock(rule=rule)) as builder:
|
||||||
builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
rule.refresh_from_db()
|
||||||
|
|
||||||
self.assertEquals(rule.favicon, "https://www.theguardian.com/favicon.ico")
|
self.assertEquals(rule.favicon, "https://www.theguardian.com/favicon.ico")
|
||||||
|
|
||||||
def test_without_header(self):
|
def test_without_header(self):
|
||||||
rule = CollectionRuleFactory(favicon=None)
|
rule = CollectionRuleFactory(favicon=None)
|
||||||
|
|
||||||
with FaviconBuilder((rule, mock_without_header)) as builder:
|
with FaviconBuilder(mock_without_header, Mock(rule=rule)) as builder:
|
||||||
builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
rule.refresh_from_db()
|
||||||
|
|
||||||
self.assertEquals(rule.favicon, None)
|
self.assertEquals(rule.favicon, None)
|
||||||
|
|
||||||
def test_weird_path(self):
|
def test_weird_path(self):
|
||||||
rule = CollectionRuleFactory(favicon=None)
|
rule = CollectionRuleFactory(favicon=None)
|
||||||
|
|
||||||
with FaviconBuilder((rule, mock_with_weird_path)) as builder:
|
with FaviconBuilder(mock_with_weird_path, Mock(rule=rule)) as builder:
|
||||||
builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
rule.refresh_from_db()
|
||||||
|
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
rule.favicon, "https://www.theguardian.com/jabadaba/doe/favicon.ico"
|
rule.favicon, "https://www.theguardian.com/jabadaba/doe/favicon.ico"
|
||||||
|
|
@ -48,15 +62,21 @@ class FaviconBuilderTestCase(TestCase):
|
||||||
def test_other_url(self):
|
def test_other_url(self):
|
||||||
rule = CollectionRuleFactory(favicon=None)
|
rule = CollectionRuleFactory(favicon=None)
|
||||||
|
|
||||||
with FaviconBuilder((rule, mock_with_other_url)) as builder:
|
with FaviconBuilder(mock_with_other_url, Mock(rule=rule)) as builder:
|
||||||
builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
rule.refresh_from_db()
|
||||||
|
|
||||||
self.assertEquals(rule.favicon, "https://www.theguardian.com/icon.png")
|
self.assertEquals(rule.favicon, "https://www.theguardian.com/icon.png")
|
||||||
|
|
||||||
def test_url_with_favicon_takes_precedence(self):
|
def test_url_with_favicon_takes_precedence(self):
|
||||||
rule = CollectionRuleFactory(favicon=None)
|
rule = CollectionRuleFactory(favicon=None)
|
||||||
|
|
||||||
with FaviconBuilder((rule, mock_with_multiple_icons)) as builder:
|
with FaviconBuilder(mock_with_multiple_icons, Mock(rule=rule)) as builder:
|
||||||
builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
rule.refresh_from_db()
|
||||||
|
|
||||||
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")
|
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
|
|
@ -19,22 +19,22 @@ class FaviconClientTestCase(TestCase):
|
||||||
|
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
rule = CollectionRuleFactory()
|
rule = CollectionRuleFactory()
|
||||||
stream = MagicMock(url="https://www.bbc.com")
|
stream = Mock(url="https://www.bbc.com", rule=rule)
|
||||||
stream.read.return_value = (simple_mock, stream)
|
stream.read.return_value = (simple_mock, stream)
|
||||||
|
|
||||||
with FaviconClient([(rule, stream)]) as client:
|
with FaviconClient([stream]) as client:
|
||||||
for rule, data in client:
|
for payload, stream in client:
|
||||||
self.assertEquals(rule.pk, rule.pk)
|
self.assertEquals(stream.rule.pk, rule.pk)
|
||||||
self.assertEquals(data, simple_mock)
|
self.assertEquals(payload, simple_mock)
|
||||||
|
|
||||||
stream.read.assert_called_once_with()
|
stream.read.assert_called_once_with()
|
||||||
|
|
||||||
def test_client_catches_stream_exception(self):
|
def test_client_catches_stream_exception(self):
|
||||||
rule = CollectionRuleFactory(error=None, succeeded=True)
|
rule = CollectionRuleFactory(error=None, succeeded=True)
|
||||||
stream = MagicMock(url="https://www.bbc.com")
|
stream = Mock(url="https://www.bbc.com", rule=rule)
|
||||||
stream.read.side_effect = StreamException
|
stream.read.side_effect = StreamException
|
||||||
|
|
||||||
with FaviconClient([(rule, stream)]) as client:
|
with FaviconClient([stream]) as client:
|
||||||
for rule, data in client:
|
for rule, data in client:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -46,10 +46,10 @@ class FaviconClientTestCase(TestCase):
|
||||||
|
|
||||||
def test_client_catches_stream_not_found_exception(self):
|
def test_client_catches_stream_not_found_exception(self):
|
||||||
rule = CollectionRuleFactory(error=None, succeeded=True)
|
rule = CollectionRuleFactory(error=None, succeeded=True)
|
||||||
stream = MagicMock(url="https://www.bbc.com")
|
stream = Mock(url="https://www.bbc.com", rule=rule)
|
||||||
stream.read.side_effect = StreamNotFoundException
|
stream.read.side_effect = StreamNotFoundException
|
||||||
|
|
||||||
with FaviconClient([(rule, stream)]) as client:
|
with FaviconClient([stream]) as client:
|
||||||
for rule, data in client:
|
for rule, data in client:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -61,10 +61,10 @@ class FaviconClientTestCase(TestCase):
|
||||||
|
|
||||||
def test_client_catches_stream_denied_exception(self):
|
def test_client_catches_stream_denied_exception(self):
|
||||||
rule = CollectionRuleFactory(error=None, succeeded=True)
|
rule = CollectionRuleFactory(error=None, succeeded=True)
|
||||||
stream = MagicMock(url="https://www.bbc.com")
|
stream = Mock(url="https://www.bbc.com", rule=rule)
|
||||||
stream.read.side_effect = StreamDeniedException
|
stream.read.side_effect = StreamDeniedException
|
||||||
|
|
||||||
with FaviconClient([(rule, stream)]) as client:
|
with FaviconClient([stream]) as client:
|
||||||
for rule, data in client:
|
for rule, data in client:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -76,10 +76,10 @@ class FaviconClientTestCase(TestCase):
|
||||||
|
|
||||||
def test_client_catches_stream_timed_out(self):
|
def test_client_catches_stream_timed_out(self):
|
||||||
rule = CollectionRuleFactory(error=None, succeeded=True)
|
rule = CollectionRuleFactory(error=None, succeeded=True)
|
||||||
stream = MagicMock(url="https://www.bbc.com")
|
stream = Mock(url="https://www.bbc.com", rule=rule)
|
||||||
stream.read.side_effect = StreamTimeOutException
|
stream.read.side_effect = StreamTimeOutException
|
||||||
|
|
||||||
with FaviconClient([(rule, stream)]) as client:
|
with FaviconClient([stream]) as client:
|
||||||
for rule, data in client:
|
for rule, data in client:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
|
|
@ -38,8 +38,8 @@ class FaviconCollectorTestCase(TestCase):
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||||
|
|
||||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
|
||||||
self.mocked_website_read.return_value = (website_mock, MagicMock())
|
self.mocked_website_read.return_value = (website_mock, Mock(rule=rule))
|
||||||
|
|
||||||
collector = FaviconCollector()
|
collector = FaviconCollector()
|
||||||
collector.collect()
|
collector.collect()
|
||||||
|
|
@ -54,8 +54,11 @@ class FaviconCollectorTestCase(TestCase):
|
||||||
def test_empty_stream(self):
|
def test_empty_stream(self):
|
||||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||||
|
|
||||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
|
||||||
self.mocked_website_read.return_value = (BeautifulSoup("", "lxml"), MagicMock())
|
self.mocked_website_read.return_value = (
|
||||||
|
BeautifulSoup("", "lxml"),
|
||||||
|
Mock(rule=rule),
|
||||||
|
)
|
||||||
|
|
||||||
collector = FaviconCollector()
|
collector = FaviconCollector()
|
||||||
collector.collect()
|
collector.collect()
|
||||||
|
|
@ -70,7 +73,7 @@ class FaviconCollectorTestCase(TestCase):
|
||||||
def test_not_found(self):
|
def test_not_found(self):
|
||||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||||
|
|
||||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
|
||||||
self.mocked_website_read.side_effect = StreamNotFoundException
|
self.mocked_website_read.side_effect = StreamNotFoundException
|
||||||
|
|
||||||
collector = FaviconCollector()
|
collector = FaviconCollector()
|
||||||
|
|
@ -86,7 +89,7 @@ class FaviconCollectorTestCase(TestCase):
|
||||||
def test_denied(self):
|
def test_denied(self):
|
||||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||||
|
|
||||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
|
||||||
self.mocked_website_read.side_effect = StreamDeniedException
|
self.mocked_website_read.side_effect = StreamDeniedException
|
||||||
|
|
||||||
collector = FaviconCollector()
|
collector = FaviconCollector()
|
||||||
|
|
@ -102,7 +105,7 @@ class FaviconCollectorTestCase(TestCase):
|
||||||
def test_forbidden(self):
|
def test_forbidden(self):
|
||||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||||
|
|
||||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
|
||||||
self.mocked_website_read.side_effect = StreamForbiddenException
|
self.mocked_website_read.side_effect = StreamForbiddenException
|
||||||
|
|
||||||
collector = FaviconCollector()
|
collector = FaviconCollector()
|
||||||
|
|
@ -118,7 +121,7 @@ class FaviconCollectorTestCase(TestCase):
|
||||||
def test_timed_out(self):
|
def test_timed_out(self):
|
||||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||||
|
|
||||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
|
||||||
self.mocked_website_read.side_effect = StreamTimeOutException
|
self.mocked_website_read.side_effect = StreamTimeOutException
|
||||||
|
|
||||||
collector = FaviconCollector()
|
collector = FaviconCollector()
|
||||||
|
|
@ -134,7 +137,7 @@ class FaviconCollectorTestCase(TestCase):
|
||||||
def test_wrong_stream_content_type(self):
|
def test_wrong_stream_content_type(self):
|
||||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||||
|
|
||||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
self.mocked_feed_client.return_value = [(feed_mock, Mock(rule=rule))]
|
||||||
self.mocked_website_read.side_effect = StreamParseException
|
self.mocked_website_read.side_effect = StreamParseException
|
||||||
|
|
||||||
collector = FaviconCollector()
|
collector = FaviconCollector()
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from datetime import date, datetime, time
|
from datetime import date, datetime, time
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
@ -24,9 +24,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_basic_entry(self):
|
def test_basic_entry(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((simple_mock, mock_stream)) as builder:
|
with builder(simple_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
@ -55,9 +56,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_multiple_entries(self):
|
def test_multiple_entries(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((multiple_mock, mock_stream)) as builder:
|
with builder(multiple_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = Post.objects.order_by("-publication_date")
|
posts = Post.objects.order_by("-publication_date")
|
||||||
|
|
@ -116,9 +118,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_entries_without_remote_identifier(self):
|
def test_entries_without_remote_identifier(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_without_identifier, mock_stream)) as builder:
|
with builder(mock_without_identifier, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = Post.objects.order_by("-publication_date")
|
posts = Post.objects.order_by("-publication_date")
|
||||||
|
|
@ -155,9 +158,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_entry_without_publication_date(self):
|
def test_entry_without_publication_date(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_without_publish_date, mock_stream)) as builder:
|
with builder(mock_without_publish_date, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = Post.objects.order_by("-publication_date")
|
posts = Post.objects.order_by("-publication_date")
|
||||||
|
|
@ -187,9 +191,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_entry_without_url(self):
|
def test_entry_without_url(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_without_url, mock_stream)) as builder:
|
with builder(mock_without_url, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = Post.objects.order_by("-publication_date")
|
posts = Post.objects.order_by("-publication_date")
|
||||||
|
|
@ -213,9 +218,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_entry_without_body(self):
|
def test_entry_without_body(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_without_body, mock_stream)) as builder:
|
with builder(mock_without_body, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = Post.objects.order_by("-publication_date")
|
posts = Post.objects.order_by("-publication_date")
|
||||||
|
|
@ -247,9 +253,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_entry_without_author(self):
|
def test_entry_without_author(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_without_author, mock_stream)) as builder:
|
with builder(mock_without_author, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = Post.objects.order_by("-publication_date")
|
posts = Post.objects.order_by("-publication_date")
|
||||||
|
|
@ -275,9 +282,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_empty_entries(self):
|
def test_empty_entries(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_without_entries, mock_stream)) as builder:
|
with builder(mock_without_entries, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
self.assertEquals(Post.objects.count(), 0)
|
self.assertEquals(Post.objects.count(), 0)
|
||||||
|
|
@ -285,7 +293,7 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_update_entries(self):
|
def test_update_entries(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
existing_first_post = FeedPostFactory.create(
|
existing_first_post = FeedPostFactory.create(
|
||||||
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule
|
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule
|
||||||
|
|
@ -295,7 +303,8 @@ class FeedBuilderTestCase(TestCase):
|
||||||
remote_identifier="a5479c66-8fae-11e9-8422-00163ef6bee7", rule=rule
|
remote_identifier="a5479c66-8fae-11e9-8422-00163ef6bee7", rule=rule
|
||||||
)
|
)
|
||||||
|
|
||||||
with builder((mock_with_update_entries, mock_stream)) as builder:
|
with builder(mock_with_update_entries, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
self.assertEquals(Post.objects.count(), 3)
|
self.assertEquals(Post.objects.count(), 3)
|
||||||
|
|
@ -315,9 +324,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_html_sanitizing(self):
|
def test_html_sanitizing(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_with_html, mock_stream)) as builder:
|
with builder(mock_with_html, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
@ -337,9 +347,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_long_author_text_is_truncated(self):
|
def test_long_author_text_is_truncated(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_with_long_author, mock_stream)) as builder:
|
with builder(mock_with_long_author, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
@ -351,9 +362,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_long_title_text_is_truncated(self):
|
def test_long_title_text_is_truncated(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_with_long_title, mock_stream)) as builder:
|
with builder(mock_with_long_title, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
@ -366,9 +378,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_long_title_exotic_title(self):
|
def test_long_title_exotic_title(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_with_long_exotic_title, mock_stream)) as builder:
|
with builder(mock_with_long_exotic_title, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
@ -381,9 +394,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_content_detail_is_prioritized_if_longer(self):
|
def test_content_detail_is_prioritized_if_longer(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_with_longer_content_detail, mock_stream)) as builder:
|
with builder(mock_with_longer_content_detail, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
@ -398,9 +412,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_content_detail_is_not_prioritized_if_shorter(self):
|
def test_content_detail_is_not_prioritized_if_shorter(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_with_shorter_content_detail, mock_stream)) as builder:
|
with builder(mock_with_shorter_content_detail, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
@ -414,9 +429,10 @@ class FeedBuilderTestCase(TestCase):
|
||||||
def test_content_detail_is_concatinated(self):
|
def test_content_detail_is_concatinated(self):
|
||||||
builder = FeedBuilder
|
builder = FeedBuilder
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
with builder((mock_with_multiple_content_detail, mock_stream)) as builder:
|
with builder(mock_with_multiple_content_detail, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.utils.lorem_ipsum import words
|
from django.utils.lorem_ipsum import words
|
||||||
|
|
@ -28,7 +28,7 @@ class FeedClientTestCase(TestCase):
|
||||||
|
|
||||||
def test_client_retrieves_single_rules(self):
|
def test_client_retrieves_single_rules(self):
|
||||||
rule = FeedFactory.create()
|
rule = FeedFactory.create()
|
||||||
mock_stream = MagicMock(rule=rule)
|
mock_stream = Mock(rule=rule)
|
||||||
|
|
||||||
self.mocked_read.return_value = (simple_mock, mock_stream)
|
self.mocked_read.return_value = (simple_mock, mock_stream)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from datetime import date, datetime, time
|
from datetime import date, datetime, time
|
||||||
from time import struct_time
|
from time import struct_time
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
@ -56,7 +56,7 @@ class FeedCollectorTestCase(TestCase):
|
||||||
|
|
||||||
@freeze_time("2019-10-30 12:30:00")
|
@freeze_time("2019-10-30 12:30:00")
|
||||||
def test_emtpy_batch(self):
|
def test_emtpy_batch(self):
|
||||||
self.mocked_fetch.return_value = MagicMock()
|
self.mocked_fetch.return_value = Mock()
|
||||||
self.mocked_parse.return_value = empty_mock
|
self.mocked_parse.return_value = empty_mock
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
|
|
@ -27,7 +27,7 @@ class FeedStreamTestCase(TestCase):
|
||||||
patch.stopall()
|
patch.stopall()
|
||||||
|
|
||||||
def test_simple_stream(self):
|
def test_simple_stream(self):
|
||||||
self.mocked_fetch.return_value = MagicMock(content=simple_mock)
|
self.mocked_fetch.return_value = Mock(content=simple_mock)
|
||||||
|
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
stream = FeedStream(rule)
|
stream = FeedStream(rule)
|
||||||
|
|
@ -95,7 +95,7 @@ class FeedStreamTestCase(TestCase):
|
||||||
|
|
||||||
@patch("newsreader.news.collection.feed.parse")
|
@patch("newsreader.news.collection.feed.parse")
|
||||||
def test_stream_raises_parse_exception(self, mocked_parse):
|
def test_stream_raises_parse_exception(self, mocked_parse):
|
||||||
self.mocked_fetch.return_value = MagicMock()
|
self.mocked_fetch.return_value = Mock()
|
||||||
mocked_parse.side_effect = TypeError
|
mocked_parse.side_effect = TypeError
|
||||||
|
|
||||||
rule = FeedFactory()
|
rule = FeedFactory()
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
|
|
@ -20,9 +20,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((simple_mock, mock_stream)) as builder:
|
with builder(simple_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -65,9 +66,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((empty_mock, mock_stream)) as builder:
|
with builder(empty_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
self.assertEquals(Post.objects.count(), 0)
|
self.assertEquals(Post.objects.count(), 0)
|
||||||
|
|
@ -76,9 +78,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((unknown_mock, mock_stream)) as builder:
|
with builder(unknown_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
self.assertEquals(Post.objects.count(), 0)
|
self.assertEquals(Post.objects.count(), 0)
|
||||||
|
|
@ -95,9 +98,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((simple_mock, mock_stream)) as builder:
|
with builder(simple_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -132,9 +136,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((unsanitized_mock, mock_stream)) as builder:
|
with builder(unsanitized_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -149,9 +154,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((author_mock, mock_stream)) as builder:
|
with builder(author_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -166,9 +172,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((title_mock, mock_stream)) as builder:
|
with builder(title_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -186,9 +193,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((duplicate_mock, mock_stream)) as builder:
|
with builder(duplicate_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -200,13 +208,14 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
duplicate_post = RedditPostFactory(
|
duplicate_post = RedditPostFactory(
|
||||||
remote_identifier="hm0qct", rule=subreddit, title="foo"
|
remote_identifier="hm0qct", rule=subreddit, title="foo"
|
||||||
)
|
)
|
||||||
|
|
||||||
with builder((simple_mock, mock_stream)) as builder:
|
with builder(simple_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -231,9 +240,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((image_mock, mock_stream)) as builder:
|
with builder(image_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -262,9 +272,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((external_image_mock, mock_stream)) as builder:
|
with builder(external_image_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -302,9 +313,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((video_mock, mock_stream)) as builder:
|
with builder(video_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -328,9 +340,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((external_video_mock, mock_stream)) as builder:
|
with builder(external_video_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
@ -354,9 +367,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((external_gifv_mock, mock_stream)) as builder:
|
with builder(external_gifv_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get()
|
post = Post.objects.get()
|
||||||
|
|
@ -376,9 +390,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((simple_mock, mock_stream)) as builder:
|
with builder(simple_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
post = Post.objects.get(remote_identifier="hngsj8")
|
post = Post.objects.get(remote_identifier="hngsj8")
|
||||||
|
|
@ -400,9 +415,10 @@ class RedditBuilderTestCase(TestCase):
|
||||||
builder = RedditBuilder
|
builder = RedditBuilder
|
||||||
|
|
||||||
subreddit = SubredditFactory()
|
subreddit = SubredditFactory()
|
||||||
mock_stream = MagicMock(rule=subreddit)
|
mock_stream = Mock(rule=subreddit)
|
||||||
|
|
||||||
with builder((unknown_mock, mock_stream)) as builder:
|
with builder(unknown_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
self.assertEquals(Post.objects.count(), 0)
|
self.assertEquals(Post.objects.count(), 0)
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,9 @@
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from newsreader.news.collection.base import URLBuilder, WebsiteStream
|
|
||||||
from newsreader.news.collection.exceptions import (
|
from newsreader.news.collection.exceptions import (
|
||||||
StreamDeniedException,
|
StreamDeniedException,
|
||||||
StreamException,
|
StreamException,
|
||||||
|
|
@ -13,6 +12,7 @@ from newsreader.news.collection.exceptions import (
|
||||||
StreamParseException,
|
StreamParseException,
|
||||||
StreamTimeOutException,
|
StreamTimeOutException,
|
||||||
)
|
)
|
||||||
|
from newsreader.news.collection.favicon import WebsiteStream, WebsiteURLBuilder
|
||||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||||
|
|
||||||
from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock
|
from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock
|
||||||
|
|
@ -20,117 +20,125 @@ from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock
|
||||||
|
|
||||||
class WebsiteStreamTestCase(TestCase):
|
class WebsiteStreamTestCase(TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.patched_fetch = patch("newsreader.news.collection.base.fetch")
|
self.patched_fetch = patch("newsreader.news.collection.favicon.fetch")
|
||||||
self.mocked_fetch = self.patched_fetch.start()
|
self.mocked_fetch = self.patched_fetch.start()
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
patch.stopall()
|
patch.stopall()
|
||||||
|
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
self.mocked_fetch.return_value = MagicMock(content=simple_mock)
|
self.mocked_fetch.return_value = Mock(content=simple_mock)
|
||||||
|
|
||||||
rule = CollectionRuleFactory()
|
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
|
||||||
stream = WebsiteStream(rule.url)
|
stream = WebsiteStream(rule)
|
||||||
return_value = stream.read()
|
return_value = stream.read()
|
||||||
|
|
||||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
|
||||||
self.assertEquals(return_value, (BeautifulSoup(simple_mock, "lxml"), stream))
|
self.assertEquals(
|
||||||
|
return_value, (BeautifulSoup(simple_mock, features="lxml"), stream)
|
||||||
|
)
|
||||||
|
|
||||||
def test_raises_exception(self):
|
def test_raises_exception(self):
|
||||||
self.mocked_fetch.side_effect = StreamException
|
self.mocked_fetch.side_effect = StreamException
|
||||||
|
|
||||||
rule = CollectionRuleFactory()
|
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
|
||||||
stream = WebsiteStream(rule.url)
|
stream = WebsiteStream(rule)
|
||||||
|
|
||||||
with self.assertRaises(StreamException):
|
with self.assertRaises(StreamException):
|
||||||
stream.read()
|
stream.read()
|
||||||
|
|
||||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
|
||||||
|
|
||||||
def test_raises_denied_exception(self):
|
def test_raises_denied_exception(self):
|
||||||
self.mocked_fetch.side_effect = StreamDeniedException
|
self.mocked_fetch.side_effect = StreamDeniedException
|
||||||
|
|
||||||
rule = CollectionRuleFactory()
|
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
|
||||||
stream = WebsiteStream(rule.url)
|
stream = WebsiteStream(rule)
|
||||||
|
|
||||||
with self.assertRaises(StreamDeniedException):
|
with self.assertRaises(StreamDeniedException):
|
||||||
stream.read()
|
stream.read()
|
||||||
|
|
||||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
|
||||||
|
|
||||||
def test_raises_stream_not_found_exception(self):
|
def test_raises_stream_not_found_exception(self):
|
||||||
self.mocked_fetch.side_effect = StreamNotFoundException
|
self.mocked_fetch.side_effect = StreamNotFoundException
|
||||||
|
|
||||||
rule = CollectionRuleFactory()
|
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
|
||||||
stream = WebsiteStream(rule.url)
|
stream = WebsiteStream(rule)
|
||||||
|
|
||||||
with self.assertRaises(StreamNotFoundException):
|
with self.assertRaises(StreamNotFoundException):
|
||||||
stream.read()
|
stream.read()
|
||||||
|
|
||||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
|
||||||
|
|
||||||
def test_stream_raises_time_out_exception(self):
|
def test_stream_raises_time_out_exception(self):
|
||||||
self.mocked_fetch.side_effect = StreamTimeOutException
|
self.mocked_fetch.side_effect = StreamTimeOutException
|
||||||
|
|
||||||
rule = CollectionRuleFactory()
|
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
|
||||||
stream = WebsiteStream(rule.url)
|
stream = WebsiteStream(rule)
|
||||||
|
|
||||||
with self.assertRaises(StreamTimeOutException):
|
with self.assertRaises(StreamTimeOutException):
|
||||||
stream.read()
|
stream.read()
|
||||||
|
|
||||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
|
||||||
|
|
||||||
def test_stream_raises_forbidden_exception(self):
|
def test_stream_raises_forbidden_exception(self):
|
||||||
self.mocked_fetch.side_effect = StreamForbiddenException
|
self.mocked_fetch.side_effect = StreamForbiddenException
|
||||||
|
|
||||||
rule = CollectionRuleFactory()
|
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
|
||||||
stream = WebsiteStream(rule.url)
|
stream = WebsiteStream(rule)
|
||||||
|
|
||||||
with self.assertRaises(StreamForbiddenException):
|
with self.assertRaises(StreamForbiddenException):
|
||||||
stream.read()
|
stream.read()
|
||||||
|
|
||||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
|
||||||
|
|
||||||
@patch("newsreader.news.collection.base.WebsiteStream.parse")
|
@patch("newsreader.news.collection.favicon.WebsiteStream.parse")
|
||||||
def test_stream_raises_parse_exception(self, mocked_parse):
|
def test_stream_raises_parse_exception(self, mocked_parse):
|
||||||
self.mocked_fetch.return_value = MagicMock()
|
self.mocked_fetch.return_value = Mock()
|
||||||
mocked_parse.side_effect = StreamParseException
|
mocked_parse.side_effect = StreamParseException
|
||||||
|
|
||||||
rule = CollectionRuleFactory()
|
rule = CollectionRuleFactory(website_url="https://www.bbc.co.uk/news/")
|
||||||
stream = WebsiteStream(rule.url)
|
stream = WebsiteStream(rule)
|
||||||
|
|
||||||
with self.assertRaises(StreamParseException):
|
with self.assertRaises(StreamParseException):
|
||||||
stream.read()
|
stream.read()
|
||||||
|
|
||||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
self.mocked_fetch.assert_called_once_with("https://www.bbc.co.uk/news/")
|
||||||
|
|
||||||
|
|
||||||
class URLBuilderTestCase(TestCase):
|
class WebsiteURLBuilderTestCase(TestCase):
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
initial_rule = CollectionRuleFactory()
|
initial_rule = CollectionRuleFactory()
|
||||||
|
|
||||||
with URLBuilder((simple_feed_mock, MagicMock(rule=initial_rule))) as builder:
|
with WebsiteURLBuilder(simple_feed_mock, Mock(rule=initial_rule)) as builder:
|
||||||
rule, url = builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
self.assertEquals(rule.pk, initial_rule.pk)
|
initial_rule.refresh_from_db()
|
||||||
self.assertEquals(url, "https://www.bbc.co.uk/news/")
|
|
||||||
|
self.assertEquals(initial_rule.website_url, "https://www.bbc.co.uk/news/")
|
||||||
|
|
||||||
def test_no_link(self):
|
def test_no_link(self):
|
||||||
initial_rule = CollectionRuleFactory()
|
initial_rule = CollectionRuleFactory(website_url=None)
|
||||||
|
|
||||||
with URLBuilder(
|
with WebsiteURLBuilder(
|
||||||
(feed_mock_without_link, MagicMock(rule=initial_rule))
|
feed_mock_without_link, Mock(rule=initial_rule)
|
||||||
) as builder:
|
) as builder:
|
||||||
rule, url = builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
self.assertEquals(rule.pk, initial_rule.pk)
|
initial_rule.refresh_from_db()
|
||||||
self.assertEquals(url, None)
|
|
||||||
|
self.assertEquals(initial_rule.website_url, None)
|
||||||
|
|
||||||
def test_no_data(self):
|
def test_no_data(self):
|
||||||
initial_rule = CollectionRuleFactory()
|
initial_rule = CollectionRuleFactory(website_url=None)
|
||||||
|
|
||||||
with URLBuilder((None, MagicMock(rule=initial_rule))) as builder:
|
with WebsiteURLBuilder(None, Mock(rule=initial_rule)) as builder:
|
||||||
rule, url = builder.build()
|
builder.build()
|
||||||
|
builder.save()
|
||||||
|
|
||||||
self.assertEquals(rule.pk, initial_rule.pk)
|
initial_rule.refresh_from_db()
|
||||||
self.assertEquals(url, None)
|
|
||||||
|
self.assertEquals(initial_rule.website_url, None)
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder((simple_mock, mock_stream)) as builder:
|
with builder(simple_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -83,7 +84,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder((image_mock, mock_stream)) as builder:
|
with builder(image_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -123,7 +125,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder((video_mock, mock_stream)) as builder:
|
with builder(video_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -165,7 +168,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder((video_without_bitrate_mock, mock_stream)) as builder:
|
with builder(video_without_bitrate_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -186,7 +190,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder((gif_mock, mock_stream)) as builder:
|
with builder(gif_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -211,7 +216,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder((retweet_mock, mock_stream)) as builder:
|
with builder(retweet_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -246,7 +252,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder((quoted_mock, mock_stream)) as builder:
|
with builder(quoted_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
@ -276,7 +283,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder(([], mock_stream)) as builder:
|
with builder([], mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
self.assertEquals(Post.objects.count(), 0)
|
self.assertEquals(Post.objects.count(), 0)
|
||||||
|
|
@ -287,7 +295,8 @@ class TwitterBuilderTestCase(TestCase):
|
||||||
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
|
||||||
mock_stream = MagicMock(rule=profile)
|
mock_stream = MagicMock(rule=profile)
|
||||||
|
|
||||||
with builder((unsanitized_mock, mock_stream)) as builder:
|
with builder(unsanitized_mock, mock_stream) as builder:
|
||||||
|
builder.build()
|
||||||
builder.save()
|
builder.save()
|
||||||
|
|
||||||
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
posts = {post.remote_identifier: post for post in Post.objects.all()}
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,12 @@ import pytz
|
||||||
|
|
||||||
from ftfy import fix_text
|
from ftfy import fix_text
|
||||||
|
|
||||||
from newsreader.news.collection.base import Builder, Client, Collector, Stream
|
from newsreader.news.collection.base import (
|
||||||
|
PostBuilder,
|
||||||
|
PostClient,
|
||||||
|
PostCollector,
|
||||||
|
PostStream,
|
||||||
|
)
|
||||||
from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
|
from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
|
||||||
from newsreader.news.collection.utils import truncate_text
|
from newsreader.news.collection.utils import truncate_text
|
||||||
from newsreader.news.core.models import Post
|
from newsreader.news.core.models import Post
|
||||||
|
|
@ -20,25 +25,14 @@ TWITTER_URL = "https://twitter.com"
|
||||||
TWITTER_API_URL = "https://api.twitter.com/1.1"
|
TWITTER_API_URL = "https://api.twitter.com/1.1"
|
||||||
|
|
||||||
|
|
||||||
class TwitterScheduler:
|
class TwitterBuilder(PostBuilder):
|
||||||
pass
|
rule_type = RuleTypeChoices.twitter
|
||||||
|
|
||||||
|
def build(self):
|
||||||
class TwitterBuilder(Builder):
|
|
||||||
rule__type = RuleTypeChoices.twitter
|
|
||||||
|
|
||||||
def create_posts(self, stream):
|
|
||||||
data, stream = stream
|
|
||||||
|
|
||||||
if not data:
|
|
||||||
return
|
|
||||||
|
|
||||||
self.instances = self.build(data, stream.rule)
|
|
||||||
|
|
||||||
def build(self, posts, rule):
|
|
||||||
results = {}
|
results = {}
|
||||||
|
rule = self.stream.rule
|
||||||
|
|
||||||
for post in posts:
|
for post in self.payload:
|
||||||
remote_identifier = post["id_str"]
|
remote_identifier = post["id_str"]
|
||||||
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
|
url = f"{TWITTER_URL}/{rule.screen_name}/{remote_identifier}"
|
||||||
|
|
||||||
|
|
@ -83,7 +77,7 @@ class TwitterBuilder(Builder):
|
||||||
|
|
||||||
results[remote_identifier] = Post(**data)
|
results[remote_identifier] = Post(**data)
|
||||||
|
|
||||||
return results.values()
|
self.instances = results.values()
|
||||||
|
|
||||||
def get_media_entities(self, post):
|
def get_media_entities(self, post):
|
||||||
media_entities = post["extended_entities"]["media"]
|
media_entities = post["extended_entities"]["media"]
|
||||||
|
|
@ -133,13 +127,17 @@ class TwitterBuilder(Builder):
|
||||||
return formatted_entities
|
return formatted_entities
|
||||||
|
|
||||||
|
|
||||||
class TwitterStream(Stream):
|
class TwitterStream(PostStream):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class TwitterClient(Client):
|
class TwitterClient(PostClient):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class TwitterCollector(Collector):
|
class TwitterCollector(PostCollector):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TwitterScheduler:
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue