Favicon fetcher
This commit is contained in:
parent
c75de1c469
commit
48a9b25545
33 changed files with 1238 additions and 228 deletions
|
|
@ -4,20 +4,9 @@ from newsreader.news.collection.models import CollectionRule
|
|||
|
||||
|
||||
class CollectionRuleAdmin(admin.ModelAdmin):
|
||||
fields = (
|
||||
"url",
|
||||
"name",
|
||||
"timezone",
|
||||
"category",
|
||||
)
|
||||
fields = ("url", "name", "timezone", "category", "favicon")
|
||||
|
||||
list_display = (
|
||||
"name",
|
||||
"category",
|
||||
"url",
|
||||
"last_suceeded",
|
||||
"succeeded",
|
||||
)
|
||||
list_display = ("name", "category", "url", "last_suceeded", "succeeded")
|
||||
|
||||
|
||||
admin.site.register(CollectionRule, CollectionRuleAdmin)
|
||||
|
|
|
|||
|
|
@ -2,9 +2,13 @@ from typing import ContextManager, Dict, List, Optional, Tuple
|
|||
|
||||
import requests
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from django.utils import timezone
|
||||
|
||||
from newsreader.news.collection.exceptions import StreamParseException
|
||||
from newsreader.news.collection.models import CollectionRule
|
||||
from newsreader.news.collection.utils import fetch
|
||||
|
||||
|
||||
class Stream:
|
||||
|
|
@ -12,9 +16,7 @@ class Stream:
|
|||
self.rule = rule
|
||||
|
||||
def read(self) -> Tuple:
|
||||
url = self.rule.url
|
||||
response = requests.get(url)
|
||||
return (self.parse(response.content), self)
|
||||
raise NotImplementedError
|
||||
|
||||
def parse(self, payload: bytes) -> Dict:
|
||||
raise NotImplementedError
|
||||
|
|
@ -45,7 +47,7 @@ class Client:
|
|||
class Builder:
|
||||
instances = []
|
||||
|
||||
def __init__(self, stream: Stream) -> None:
|
||||
def __init__(self, stream: Tuple) -> None:
|
||||
self.stream = stream
|
||||
|
||||
def __enter__(self) -> ContextManager:
|
||||
|
|
@ -81,3 +83,39 @@ class Collector:
|
|||
|
||||
class Meta:
|
||||
abstract = True
|
||||
|
||||
|
||||
class WebsiteStream(Stream):
|
||||
def __init__(self, url: str) -> None:
|
||||
self.url = url
|
||||
|
||||
def read(self) -> Tuple:
|
||||
response = fetch(self.url)
|
||||
|
||||
return (self.parse(response.content), self)
|
||||
|
||||
def parse(self, payload: bytes) -> BeautifulSoup:
|
||||
try:
|
||||
return BeautifulSoup(payload, "lxml")
|
||||
except TypeError:
|
||||
raise StreamParseException("Could not parse given HTML")
|
||||
|
||||
|
||||
class URLBuilder(Builder):
|
||||
def __enter__(self) -> ContextManager:
|
||||
return self
|
||||
|
||||
def build(self) -> Tuple:
|
||||
data, stream = self.stream
|
||||
rule = stream.rule
|
||||
|
||||
try:
|
||||
url = data["feed"]["link"]
|
||||
except (KeyError, TypeError):
|
||||
url = None
|
||||
|
||||
if url:
|
||||
rule.website_url = url
|
||||
rule.save()
|
||||
|
||||
return rule, url
|
||||
|
|
|
|||
113
src/newsreader/news/collection/favicon.py
Normal file
113
src/newsreader/news/collection/favicon.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import ContextManager, List, Optional
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from newsreader.news.collection.base import (
|
||||
Builder,
|
||||
Client,
|
||||
Collector,
|
||||
Stream,
|
||||
URLBuilder,
|
||||
WebsiteStream,
|
||||
)
|
||||
from newsreader.news.collection.exceptions import StreamException
|
||||
from newsreader.news.collection.feed import FeedClient
|
||||
|
||||
LINK_RELS = ["icon", "shortcut icon", "apple-touch-icon", "apple-touch-icon-precomposed"]
|
||||
|
||||
|
||||
class FaviconBuilder(Builder):
|
||||
def build(self) -> None:
|
||||
rule, soup = self.stream
|
||||
|
||||
url = self.parse(soup, rule.website_url)
|
||||
|
||||
if url:
|
||||
rule.favicon = url
|
||||
rule.save()
|
||||
|
||||
def parse(self, soup, website_url) -> Optional[str]:
|
||||
if not soup.head:
|
||||
return
|
||||
|
||||
links = soup.head.find_all("link")
|
||||
url = self.parse_links(links)
|
||||
|
||||
if not url:
|
||||
return
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
if not parsed_url.scheme and not parsed_url.netloc:
|
||||
if not website_url:
|
||||
return
|
||||
return urljoin(website_url, url)
|
||||
elif not parsed_url.scheme:
|
||||
return urljoin(f"https://{parsed_url.netloc}", parsed_url.path)
|
||||
|
||||
return url
|
||||
|
||||
def parse_links(self, links: List) -> Optional[str]:
|
||||
favicons = set()
|
||||
icons = set()
|
||||
|
||||
for link in links:
|
||||
if not "href" in link.attrs:
|
||||
continue
|
||||
|
||||
if "favicon" in link["href"]:
|
||||
favicons.add(link["href"].lower())
|
||||
|
||||
if "rel" in link.attrs:
|
||||
for rel in link["rel"]:
|
||||
if rel in LINK_RELS:
|
||||
icons.add(link["href"].lower())
|
||||
|
||||
if favicons:
|
||||
return favicons.pop()
|
||||
elif icons:
|
||||
return icons.pop()
|
||||
|
||||
|
||||
class FaviconClient(Client):
|
||||
stream = WebsiteStream
|
||||
|
||||
def __init__(self, streams: List) -> None:
|
||||
self.streams = streams
|
||||
|
||||
def __enter__(self) -> ContextManager:
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = {executor.submit(stream.read): rule for rule, stream in self.streams}
|
||||
|
||||
for future in as_completed(futures):
|
||||
rule = futures[future]
|
||||
|
||||
try:
|
||||
response_data, stream = future.result()
|
||||
except StreamException:
|
||||
continue
|
||||
|
||||
yield (rule, response_data)
|
||||
|
||||
|
||||
class FaviconCollector(Collector):
|
||||
feed_client, favicon_client = (FeedClient, FaviconClient)
|
||||
url_builder, favicon_builder = (URLBuilder, FaviconBuilder)
|
||||
|
||||
def collect(self, rules: Optional[List] = None) -> None:
|
||||
streams = []
|
||||
|
||||
with self.feed_client(rules=rules) as client:
|
||||
for data, stream in client:
|
||||
with self.url_builder((data, stream)) as builder:
|
||||
rule, url = builder.build()
|
||||
|
||||
if not url:
|
||||
continue
|
||||
|
||||
streams.append((rule, WebsiteStream(url)))
|
||||
|
||||
with self.favicon_client(streams) as client:
|
||||
for rule, data in client:
|
||||
with self.favicon_builder((rule, data)) as builder:
|
||||
builder.build()
|
||||
|
|
@ -3,7 +3,6 @@ from typing import ContextManager, Dict, Generator, List, Optional, Tuple
|
|||
|
||||
import bleach
|
||||
import pytz
|
||||
import requests
|
||||
|
||||
from feedparser import parse
|
||||
|
||||
|
|
@ -19,7 +18,7 @@ from newsreader.news.collection.exceptions import (
|
|||
)
|
||||
from newsreader.news.collection.models import CollectionRule
|
||||
from newsreader.news.collection.response_handler import ResponseHandler
|
||||
from newsreader.news.collection.utils import build_publication_date
|
||||
from newsreader.news.collection.utils import build_publication_date, fetch
|
||||
from newsreader.news.posts.models import Post
|
||||
|
||||
|
||||
|
|
@ -92,10 +91,7 @@ class FeedBuilder(Builder):
|
|||
class FeedStream(Stream):
|
||||
def read(self) -> Tuple:
|
||||
url = self.rule.url
|
||||
response = requests.get(url)
|
||||
|
||||
with ResponseHandler(response) as response_handler:
|
||||
response_handler.handle_response()
|
||||
response = fetch(url)
|
||||
|
||||
return (self.parse(response.content), self)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
|
||||
from newsreader.news.collection.favicon import FaviconCollector
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Fetch favicons for collection rules"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
collector = FaviconCollector()
|
||||
collector.collect()
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
# Generated by Django 2.2 on 2019-06-23 18:37
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [("collection", "0006_collectionrule_error")]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="collectionrule",
|
||||
name="favicon",
|
||||
field=models.ImageField(default="favicons/default-favicon.ico", upload_to="favicons/"),
|
||||
)
|
||||
]
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
# Generated by Django 2.2 on 2019-06-23 18:47
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [("collection", "0007_auto_20190623_1837")]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="collectionrule",
|
||||
name="favicon",
|
||||
field=models.URLField(blank=True, null=True),
|
||||
)
|
||||
]
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
# Generated by Django 2.2 on 2019-06-27 21:27
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [("collection", "0008_auto_20190623_1847")]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="collectionrule",
|
||||
name="website_url",
|
||||
field=models.URLField(blank=True, editable=False, null=True),
|
||||
)
|
||||
]
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# Generated by Django 2.2 on 2019-06-28 21:42
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [("collection", "0009_collectionrule_website_url")]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="collectionrule", name="url", field=models.URLField(max_length=1024)
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name="collectionrule",
|
||||
name="website_url",
|
||||
field=models.URLField(blank=True, editable=False, max_length=1024, null=True),
|
||||
),
|
||||
]
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import pytz
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import models
|
||||
from django.utils.translation import gettext as _
|
||||
|
||||
|
|
@ -8,8 +9,9 @@ class CollectionRule(models.Model):
|
|||
name = models.CharField(max_length=100)
|
||||
source = models.CharField(max_length=100)
|
||||
|
||||
url = models.URLField()
|
||||
favicon = models.ImageField(blank=True, null=True)
|
||||
url = models.URLField(max_length=1024)
|
||||
website_url = models.URLField(max_length=1024, editable=False, blank=True, null=True)
|
||||
favicon = models.URLField(blank=True, null=True)
|
||||
|
||||
timezone = models.CharField(
|
||||
choices=((timezone, timezone) for timezone in pytz.all_timezones),
|
||||
|
|
@ -23,7 +25,7 @@ class CollectionRule(models.Model):
|
|||
null=True,
|
||||
verbose_name=_("Category"),
|
||||
help_text=_("Posts from this rule will be tagged with this category"),
|
||||
on_delete=models.SET_NULL
|
||||
on_delete=models.SET_NULL,
|
||||
)
|
||||
|
||||
last_suceeded = models.DateTimeField(blank=True, null=True)
|
||||
|
|
|
|||
|
|
@ -1 +1,4 @@
|
|||
from .favicon import *
|
||||
from .feed import *
|
||||
from .tests import *
|
||||
from .utils import *
|
||||
|
|
|
|||
|
|
@ -10,3 +10,4 @@ class CollectionRuleFactory(factory.django.DjangoModelFactory):
|
|||
name = factory.Sequence(lambda n: "CollectionRule-{}".format(n))
|
||||
source = factory.Faker("name")
|
||||
url = factory.Faker("url")
|
||||
website_url = factory.Faker("url")
|
||||
|
|
|
|||
3
src/newsreader/news/collection/tests/favicon/__init__.py
Normal file
3
src/newsreader/news/collection/tests/favicon/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .builder import *
|
||||
from .client import *
|
||||
from .collector import *
|
||||
|
|
@ -0,0 +1 @@
|
|||
from .tests import *
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
from bs4 import BeautifulSoup
|
||||
|
||||
simple_mock = BeautifulSoup(
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<link rel='shortcut icon' href='https://www.bbc.com/favicon.ico' />
|
||||
</head>
|
||||
<body>
|
||||
<article />
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
"lxml",
|
||||
)
|
||||
|
||||
mock_without_url = BeautifulSoup(
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<link rel='shortcut icon' href='favicon.ico' />
|
||||
</head>
|
||||
<body>
|
||||
<article />
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
"lxml",
|
||||
)
|
||||
|
||||
mock_without_header = BeautifulSoup(
|
||||
"""
|
||||
<html>
|
||||
<body>
|
||||
<article />
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
"lxml",
|
||||
)
|
||||
|
||||
mock_with_weird_path = BeautifulSoup(
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<link rel='shortcut icon' href='//www.theguardian.com/jabadaba/doe/favicon.ico' />
|
||||
</head>
|
||||
<body>
|
||||
<article />
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
"lxml",
|
||||
)
|
||||
|
||||
mock_with_other_url = BeautifulSoup(
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<link rel='stylesheet' href='https://www.theguardian.com/main.css' />
|
||||
<link rel='icon' href='https://www.theguardian.com/icon.png' />
|
||||
</head>
|
||||
<body>
|
||||
<article />
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
"lxml",
|
||||
)
|
||||
|
||||
mock_with_multiple_icons = BeautifulSoup(
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<link rel='stylesheet' href='https://www.theguardian.com/main.css' />
|
||||
<link rel='icon' href='https://www.theguardian.com/icon.png' />
|
||||
<link rel='shortcut icon' href='https://www.theguardian.com/icon.png' />
|
||||
<link rel='apple-touch-icon' href='https://www.theguardian.com/icon.png' />
|
||||
<link rel='apple-touch-icon-precomposed' href='https://www.theguardian.com/icon.png' />
|
||||
<link rel='shortcut icon' href='https://www.bbc.com/favicon.ico' />
|
||||
</head>
|
||||
<body>
|
||||
<article />
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
"lxml",
|
||||
)
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
from freezegun import freeze_time
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from newsreader.news.collection.favicon import FaviconBuilder
|
||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||
from newsreader.news.collection.tests.favicon.builder.mocks import *
|
||||
|
||||
|
||||
class FaviconBuilderTestCase(TestCase):
|
||||
def setUp(self):
|
||||
self.maxDiff = None
|
||||
|
||||
def test_simple(self):
|
||||
rule = CollectionRuleFactory(favicon=None)
|
||||
|
||||
with FaviconBuilder((rule, simple_mock)) as builder:
|
||||
builder.build()
|
||||
|
||||
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")
|
||||
|
||||
def test_without_url(self):
|
||||
rule = CollectionRuleFactory(website_url="https://www.theguardian.com/", favicon=None)
|
||||
|
||||
with FaviconBuilder((rule, mock_without_url)) as builder:
|
||||
builder.build()
|
||||
|
||||
self.assertEquals(rule.favicon, "https://www.theguardian.com/favicon.ico")
|
||||
|
||||
def test_without_header(self):
|
||||
rule = CollectionRuleFactory(favicon=None)
|
||||
|
||||
with FaviconBuilder((rule, mock_without_header)) as builder:
|
||||
builder.build()
|
||||
|
||||
self.assertEquals(rule.favicon, None)
|
||||
|
||||
def test_weird_path(self):
|
||||
rule = CollectionRuleFactory(favicon=None)
|
||||
|
||||
with FaviconBuilder((rule, mock_with_weird_path)) as builder:
|
||||
builder.build()
|
||||
|
||||
self.assertEquals(rule.favicon, "https://www.theguardian.com/jabadaba/doe/favicon.ico")
|
||||
|
||||
def test_other_url(self):
|
||||
rule = CollectionRuleFactory(favicon=None)
|
||||
|
||||
with FaviconBuilder((rule, mock_with_other_url)) as builder:
|
||||
builder.build()
|
||||
|
||||
self.assertEquals(rule.favicon, "https://www.theguardian.com/icon.png")
|
||||
|
||||
def test_url_with_favicon_takes_precedence(self):
|
||||
rule = CollectionRuleFactory(favicon=None)
|
||||
|
||||
with FaviconBuilder((rule, mock_with_multiple_icons)) as builder:
|
||||
builder.build()
|
||||
|
||||
self.assertEquals(rule.favicon, "https://www.bbc.com/favicon.ico")
|
||||
|
|
@ -0,0 +1 @@
|
|||
from .tests import *
|
||||
12
src/newsreader/news/collection/tests/favicon/client/mocks.py
Normal file
12
src/newsreader/news/collection/tests/favicon/client/mocks.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
from bs4 import BeautifulSoup
|
||||
|
||||
simple_mock = BeautifulSoup(
|
||||
"""
|
||||
<html>
|
||||
<body>
|
||||
<article />
|
||||
</body>
|
||||
</htm>
|
||||
""",
|
||||
"lxml",
|
||||
)
|
||||
91
src/newsreader/news/collection/tests/favicon/client/tests.py
Normal file
91
src/newsreader/news/collection/tests/favicon/client/tests.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from unittest.mock import MagicMock
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from newsreader.news.collection.base import WebsiteStream
|
||||
from newsreader.news.collection.exceptions import (
|
||||
StreamDeniedException,
|
||||
StreamException,
|
||||
StreamNotFoundException,
|
||||
StreamTimeOutException,
|
||||
)
|
||||
from newsreader.news.collection.favicon import FaviconClient
|
||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||
from newsreader.news.collection.tests.favicon.client.mocks import simple_mock
|
||||
|
||||
|
||||
class FaviconClientTestCase(TestCase):
|
||||
def setUp(self):
|
||||
self.maxDiff = None
|
||||
|
||||
def test_simple(self):
|
||||
rule = CollectionRuleFactory()
|
||||
stream = MagicMock(url="https://www.bbc.com")
|
||||
stream.read.return_value = (simple_mock, stream)
|
||||
|
||||
with FaviconClient([(rule, stream)]) as client:
|
||||
for rule, data in client:
|
||||
self.assertEquals(rule.pk, rule.pk)
|
||||
self.assertEquals(data, simple_mock)
|
||||
|
||||
stream.read.assert_called_once_with()
|
||||
|
||||
def test_client_catches_stream_exception(self):
|
||||
rule = CollectionRuleFactory(error=None, succeeded=True)
|
||||
stream = MagicMock(url="https://www.bbc.com")
|
||||
stream.read.side_effect = StreamException
|
||||
|
||||
with FaviconClient([(rule, stream)]) as client:
|
||||
for rule, data in client:
|
||||
pass
|
||||
|
||||
stream.read.assert_called_once_with()
|
||||
|
||||
# The favicon client does not set CollectionRule errors
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
|
||||
def test_client_catches_stream_not_found_exception(self):
|
||||
rule = CollectionRuleFactory(error=None, succeeded=True)
|
||||
stream = MagicMock(url="https://www.bbc.com")
|
||||
stream.read.side_effect = StreamNotFoundException
|
||||
|
||||
with FaviconClient([(rule, stream)]) as client:
|
||||
for rule, data in client:
|
||||
pass
|
||||
|
||||
stream.read.assert_called_once_with()
|
||||
|
||||
# The favicon client does not set CollectionRule errors
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
|
||||
def test_client_catches_stream_denied_exception(self):
|
||||
rule = CollectionRuleFactory(error=None, succeeded=True)
|
||||
stream = MagicMock(url="https://www.bbc.com")
|
||||
stream.read.side_effect = StreamDeniedException
|
||||
|
||||
with FaviconClient([(rule, stream)]) as client:
|
||||
for rule, data in client:
|
||||
pass
|
||||
|
||||
stream.read.assert_called_once_with()
|
||||
|
||||
# The favicon client does not set CollectionRule errors
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
|
||||
def test_client_catches_stream_timed_out(self):
|
||||
rule = CollectionRuleFactory(error=None, succeeded=True)
|
||||
stream = MagicMock(url="https://www.bbc.com")
|
||||
stream.read.side_effect = StreamTimeOutException
|
||||
|
||||
with FaviconClient([(rule, stream)]) as client:
|
||||
for rule, data in client:
|
||||
pass
|
||||
|
||||
stream.read.assert_called_once_with()
|
||||
|
||||
# The favicon client does not set CollectionRule errors
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
|
|
@ -0,0 +1 @@
|
|||
from .tests import *
|
||||
159
src/newsreader/news/collection/tests/favicon/collector/mocks.py
Normal file
159
src/newsreader/news/collection/tests/favicon/collector/mocks.py
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
from time import struct_time
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
feed_mock = {
|
||||
"bozo": 0,
|
||||
"encoding": "utf-8",
|
||||
"entries": [
|
||||
{
|
||||
"guidislink": False,
|
||||
"href": "",
|
||||
"id": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||
"link": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||
"links": [
|
||||
{
|
||||
"href": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||
"rel": "alternate",
|
||||
"type": "text/html",
|
||||
}
|
||||
],
|
||||
"media_thumbnail": [
|
||||
{
|
||||
"height": "1152",
|
||||
"url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg",
|
||||
"width": "2048",
|
||||
}
|
||||
],
|
||||
"published": "Mon, 20 May 2019 16:07:37 GMT",
|
||||
"published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
|
||||
"summary": "Foreign Minister Mohammad Javad Zarif says the US "
|
||||
"president should try showing Iranians some respect.",
|
||||
"summary_detail": {
|
||||
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"language": None,
|
||||
"type": "text/html",
|
||||
"value": "Foreign Minister Mohammad Javad "
|
||||
"Zarif says the US president should "
|
||||
"try showing Iranians some "
|
||||
"respect.",
|
||||
},
|
||||
"title": "Trump's genocidal taunts will not end Iran - Zarif",
|
||||
"title_detail": {
|
||||
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"language": None,
|
||||
"type": "text/plain",
|
||||
"value": "Trump's genocidal taunts will not " "end Iran - Zarif",
|
||||
},
|
||||
},
|
||||
{
|
||||
"guidislink": False,
|
||||
"href": "",
|
||||
"id": "https://www.bbc.co.uk/news/technology-48334739",
|
||||
"link": "https://www.bbc.co.uk/news/technology-48334739",
|
||||
"links": [
|
||||
{
|
||||
"href": "https://www.bbc.co.uk/news/technology-48334739",
|
||||
"rel": "alternate",
|
||||
"type": "text/html",
|
||||
}
|
||||
],
|
||||
"media_thumbnail": [
|
||||
{
|
||||
"height": "432",
|
||||
"url": "http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg",
|
||||
"width": "768",
|
||||
}
|
||||
],
|
||||
"published": "Mon, 20 May 2019 12:19:19 GMT",
|
||||
"published_parsed": struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0)),
|
||||
"summary": "Google's move to end business ties with Huawei will "
|
||||
"affect current devices and future purchases.",
|
||||
"summary_detail": {
|
||||
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"language": None,
|
||||
"type": "text/html",
|
||||
"value": "Google's move to end business ties "
|
||||
"with Huawei will affect current "
|
||||
"devices and future purchases.",
|
||||
},
|
||||
"title": "Huawei's Android loss: How it affects you",
|
||||
"title_detail": {
|
||||
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"language": None,
|
||||
"type": "text/plain",
|
||||
"value": "Huawei's Android loss: How it " "affects you",
|
||||
},
|
||||
},
|
||||
{
|
||||
"guidislink": False,
|
||||
"href": "",
|
||||
"id": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
|
||||
"link": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
|
||||
"links": [
|
||||
{
|
||||
"href": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
|
||||
"rel": "alternate",
|
||||
"type": "text/html",
|
||||
}
|
||||
],
|
||||
"media_thumbnail": [
|
||||
{
|
||||
"height": "549",
|
||||
"url": "http://c.files.bbci.co.uk/11D67/production/_107036037_lgbtheadjpg.jpg",
|
||||
"width": "976",
|
||||
}
|
||||
],
|
||||
"published": "Mon, 20 May 2019 16:32:38 GMT",
|
||||
"published_parsed": struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)),
|
||||
"summary": "Police are investigating the messages while an MP "
|
||||
"calls for a protest exclusion zone to protect "
|
||||
"children.",
|
||||
"summary_detail": {
|
||||
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"language": None,
|
||||
"type": "text/html",
|
||||
"value": "Police are investigating the "
|
||||
"messages while an MP calls for a "
|
||||
"protest exclusion zone to protect "
|
||||
"children.",
|
||||
},
|
||||
"title": "Birmingham head teacher threatened over LGBT lessons",
|
||||
"title_detail": {
|
||||
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"language": None,
|
||||
"type": "text/plain",
|
||||
"value": "Birmingham head teacher threatened " "over LGBT lessons",
|
||||
},
|
||||
},
|
||||
],
|
||||
"feed": {
|
||||
"image": {
|
||||
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
"title": "BBC News - Home",
|
||||
"language": "en-gb",
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
},
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
"links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}],
|
||||
"title": "BBC News - Home",
|
||||
},
|
||||
"href": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"status": 200,
|
||||
"version": "rss20",
|
||||
}
|
||||
|
||||
website_mock = BeautifulSoup(
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<link rel="shortcut icon" href="https://www.bbc.co.uk/news/favicon.ico" />
|
||||
</head>
|
||||
<body>
|
||||
<article />
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
"lxml",
|
||||
)
|
||||
147
src/newsreader/news/collection/tests/favicon/collector/tests.py
Normal file
147
src/newsreader/news/collection/tests/favicon/collector/tests.py
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytz
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from .mocks import feed_mock, website_mock
|
||||
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from newsreader.news.collection.exceptions import (
|
||||
StreamDeniedException,
|
||||
StreamException,
|
||||
StreamForbiddenException,
|
||||
StreamNotFoundException,
|
||||
StreamParseException,
|
||||
StreamTimeOutException,
|
||||
)
|
||||
from newsreader.news.collection.favicon import FaviconCollector
|
||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||
|
||||
|
||||
class FaviconCollectorTestCase(TestCase):
|
||||
def setUp(self):
|
||||
self.maxDiff = None
|
||||
|
||||
self.patched_feed_client = patch("newsreader.news.collection.favicon.FeedClient.__enter__")
|
||||
self.mocked_feed_client = self.patched_feed_client.start()
|
||||
|
||||
self.patched_website_read = patch("newsreader.news.collection.favicon.WebsiteStream.read")
|
||||
self.mocked_website_read = self.patched_website_read.start()
|
||||
|
||||
def tearDown(self):
|
||||
patch.stopall()
|
||||
|
||||
def test_simple(self):
|
||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||
|
||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
||||
self.mocked_website_read.return_value = (website_mock, MagicMock())
|
||||
|
||||
collector = FaviconCollector()
|
||||
collector.collect()
|
||||
|
||||
rule.refresh_from_db()
|
||||
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
self.assertEquals(rule.website_url, "https://www.bbc.co.uk/news/")
|
||||
self.assertEquals(rule.favicon, "https://www.bbc.co.uk/news/favicon.ico")
|
||||
|
||||
def test_empty_stream(self):
|
||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||
|
||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
||||
self.mocked_website_read.return_value = (BeautifulSoup("", "lxml"), MagicMock())
|
||||
|
||||
collector = FaviconCollector()
|
||||
collector.collect()
|
||||
|
||||
rule.refresh_from_db()
|
||||
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
self.assertEquals(rule.website_url, "https://www.bbc.co.uk/news/")
|
||||
self.assertEquals(rule.favicon, None)
|
||||
|
||||
def test_not_found(self):
|
||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||
|
||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
||||
self.mocked_website_read.side_effect = StreamNotFoundException
|
||||
|
||||
collector = FaviconCollector()
|
||||
collector.collect()
|
||||
|
||||
rule.refresh_from_db()
|
||||
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
self.assertEquals(rule.website_url, "https://www.bbc.co.uk/news/")
|
||||
self.assertEquals(rule.favicon, None)
|
||||
|
||||
def test_denied(self):
|
||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||
|
||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
||||
self.mocked_website_read.side_effect = StreamDeniedException
|
||||
|
||||
collector = FaviconCollector()
|
||||
collector.collect()
|
||||
|
||||
rule.refresh_from_db()
|
||||
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
self.assertEquals(rule.website_url, "https://www.bbc.co.uk/news/")
|
||||
self.assertEquals(rule.favicon, None)
|
||||
|
||||
def test_forbidden(self):
|
||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||
|
||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
||||
self.mocked_website_read.side_effect = StreamForbiddenException
|
||||
|
||||
collector = FaviconCollector()
|
||||
collector.collect()
|
||||
|
||||
rule.refresh_from_db()
|
||||
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
self.assertEquals(rule.website_url, "https://www.bbc.co.uk/news/")
|
||||
self.assertEquals(rule.favicon, None)
|
||||
|
||||
def test_timed_out(self):
|
||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||
|
||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
||||
self.mocked_website_read.side_effect = StreamTimeOutException
|
||||
|
||||
collector = FaviconCollector()
|
||||
collector.collect()
|
||||
|
||||
rule.refresh_from_db()
|
||||
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
self.assertEquals(rule.website_url, "https://www.bbc.co.uk/news/")
|
||||
self.assertEquals(rule.favicon, None)
|
||||
|
||||
def test_wrong_stream_content_type(self):
|
||||
rule = CollectionRuleFactory(succeeded=True, error=None)
|
||||
|
||||
self.mocked_feed_client.return_value = [(feed_mock, MagicMock(rule=rule))]
|
||||
self.mocked_website_read.side_effect = StreamParseException
|
||||
|
||||
collector = FaviconCollector()
|
||||
collector.collect()
|
||||
|
||||
rule.refresh_from_db()
|
||||
|
||||
self.assertEquals(rule.succeeded, True)
|
||||
self.assertEquals(rule.error, None)
|
||||
self.assertEquals(rule.website_url, "https://www.bbc.co.uk/news/")
|
||||
self.assertEquals(rule.favicon, None)
|
||||
|
|
@ -5,26 +5,27 @@ import pytz
|
|||
|
||||
from freezegun import freeze_time
|
||||
|
||||
from .mocks import *
|
||||
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from newsreader.news.collection.feed import FeedBuilder
|
||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||
from newsreader.news.collection.tests.feed.builder.mocks import *
|
||||
from newsreader.news.posts.models import Post
|
||||
from newsreader.news.posts.tests.factories import PostFactory
|
||||
|
||||
|
||||
class FeedBuilderTestCase(TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
self.maxDiff = None
|
||||
|
||||
def test_basic_entry(self):
|
||||
builder = FeedBuilder
|
||||
rule = CollectionRuleFactory()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
|
||||
with builder((simple_mock, mock_stream,)) as builder:
|
||||
with builder((simple_mock, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
post = Post.objects.get()
|
||||
|
|
@ -36,26 +37,19 @@ class FeedBuilderTestCase(TestCase):
|
|||
self.assertEquals(Post.objects.count(), 1)
|
||||
|
||||
self.assertEquals(
|
||||
post.remote_identifier,
|
||||
"https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
post.remote_identifier, "https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
)
|
||||
|
||||
self.assertEquals(
|
||||
post.url,
|
||||
"https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
)
|
||||
self.assertEquals(post.url, "https://www.bbc.co.uk/news/world-us-canada-48338168")
|
||||
|
||||
self.assertEquals(
|
||||
post.title,
|
||||
"Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||
)
|
||||
self.assertEquals(post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif")
|
||||
|
||||
def test_multiple_entries(self):
|
||||
builder = FeedBuilder
|
||||
rule = CollectionRuleFactory()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
|
||||
with builder((multiple_mock, mock_stream,)) as builder:
|
||||
with builder((multiple_mock, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
posts = Post.objects.order_by("id")
|
||||
|
|
@ -70,19 +64,12 @@ class FeedBuilderTestCase(TestCase):
|
|||
self.assertEquals(first_post.publication_date, aware_date)
|
||||
|
||||
self.assertEquals(
|
||||
first_post.remote_identifier,
|
||||
"https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
first_post.remote_identifier, "https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
)
|
||||
|
||||
self.assertEquals(
|
||||
first_post.url,
|
||||
"https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
)
|
||||
self.assertEquals(first_post.url, "https://www.bbc.co.uk/news/world-us-canada-48338168")
|
||||
|
||||
self.assertEquals(
|
||||
first_post.title,
|
||||
"Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||
)
|
||||
self.assertEquals(first_post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif")
|
||||
|
||||
d = datetime.combine(date(2019, 5, 20), time(hour=12, minute=19, second=19))
|
||||
aware_date = pytz.utc.localize(d)
|
||||
|
|
@ -90,26 +77,19 @@ class FeedBuilderTestCase(TestCase):
|
|||
self.assertEquals(second_post.publication_date, aware_date)
|
||||
|
||||
self.assertEquals(
|
||||
second_post.remote_identifier,
|
||||
"https://www.bbc.co.uk/news/technology-48334739"
|
||||
second_post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
|
||||
)
|
||||
|
||||
self.assertEquals(
|
||||
second_post.url,
|
||||
"https://www.bbc.co.uk/news/technology-48334739"
|
||||
)
|
||||
self.assertEquals(second_post.url, "https://www.bbc.co.uk/news/technology-48334739")
|
||||
|
||||
self.assertEquals(
|
||||
second_post.title,
|
||||
"Huawei's Android loss: How it affects you"
|
||||
)
|
||||
self.assertEquals(second_post.title, "Huawei's Android loss: How it affects you")
|
||||
|
||||
def test_entry_without_remote_identifier(self):
|
||||
builder = FeedBuilder
|
||||
rule = CollectionRuleFactory()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
|
||||
with builder((mock_without_identifier, mock_stream,)) as builder:
|
||||
with builder((mock_without_identifier, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
posts = Post.objects.order_by("id")
|
||||
|
|
@ -124,15 +104,9 @@ class FeedBuilderTestCase(TestCase):
|
|||
|
||||
self.assertEquals(first_post.remote_identifier, None)
|
||||
|
||||
self.assertEquals(
|
||||
first_post.url,
|
||||
"https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
)
|
||||
self.assertEquals(first_post.url, "https://www.bbc.co.uk/news/world-us-canada-48338168")
|
||||
|
||||
self.assertEquals(
|
||||
first_post.title,
|
||||
"Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||
)
|
||||
self.assertEquals(first_post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif")
|
||||
|
||||
@freeze_time("2019-10-30 12:30:00")
|
||||
def test_entry_without_publication_date(self):
|
||||
|
|
@ -140,7 +114,7 @@ class FeedBuilderTestCase(TestCase):
|
|||
rule = CollectionRuleFactory()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
|
||||
with builder((mock_without_publish_date, mock_stream,)) as builder:
|
||||
with builder((mock_without_publish_date, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
posts = Post.objects.order_by("id")
|
||||
|
|
@ -151,14 +125,12 @@ class FeedBuilderTestCase(TestCase):
|
|||
|
||||
self.assertEquals(first_post.created, timezone.now())
|
||||
self.assertEquals(
|
||||
first_post.remote_identifier,
|
||||
'https://www.bbc.co.uk/news/world-us-canada-48338168'
|
||||
first_post.remote_identifier, "https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
)
|
||||
|
||||
self.assertEquals(second_post.created, timezone.now())
|
||||
self.assertEquals(
|
||||
second_post.remote_identifier,
|
||||
'https://www.bbc.co.uk/news/technology-48334739'
|
||||
second_post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
|
||||
)
|
||||
|
||||
@freeze_time("2019-10-30 12:30:00")
|
||||
|
|
@ -167,7 +139,7 @@ class FeedBuilderTestCase(TestCase):
|
|||
rule = CollectionRuleFactory()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
|
||||
with builder((mock_without_url, mock_stream,)) as builder:
|
||||
with builder((mock_without_url, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
posts = Post.objects.order_by("id")
|
||||
|
|
@ -178,14 +150,12 @@ class FeedBuilderTestCase(TestCase):
|
|||
|
||||
self.assertEquals(first_post.created, timezone.now())
|
||||
self.assertEquals(
|
||||
first_post.remote_identifier,
|
||||
'https://www.bbc.co.uk/news/world-us-canada-48338168'
|
||||
first_post.remote_identifier, "https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
)
|
||||
|
||||
self.assertEquals(second_post.created, timezone.now())
|
||||
self.assertEquals(
|
||||
second_post.remote_identifier,
|
||||
'https://www.bbc.co.uk/news/technology-48334739'
|
||||
second_post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
|
||||
)
|
||||
|
||||
@freeze_time("2019-10-30 12:30:00")
|
||||
|
|
@ -194,7 +164,7 @@ class FeedBuilderTestCase(TestCase):
|
|||
rule = CollectionRuleFactory()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
|
||||
with builder((mock_without_body, mock_stream,)) as builder:
|
||||
with builder((mock_without_body, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
posts = Post.objects.order_by("id")
|
||||
|
|
@ -205,14 +175,13 @@ class FeedBuilderTestCase(TestCase):
|
|||
|
||||
self.assertEquals(first_post.created, timezone.now())
|
||||
self.assertEquals(
|
||||
first_post.remote_identifier,
|
||||
'https://www.bbc.co.uk/news/world-us-canada-48338168'
|
||||
first_post.remote_identifier, "https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
)
|
||||
|
||||
self.assertEquals(second_post.created, timezone.now())
|
||||
self.assertEquals(
|
||||
second_post.remote_identifier,
|
||||
'https://www.bbc.co.uk/news/uk-england-birmingham-48339080'
|
||||
"https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
|
||||
)
|
||||
|
||||
@freeze_time("2019-10-30 12:30:00")
|
||||
|
|
@ -221,7 +190,7 @@ class FeedBuilderTestCase(TestCase):
|
|||
rule = CollectionRuleFactory()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
|
||||
with builder((mock_without_author, mock_stream,)) as builder:
|
||||
with builder((mock_without_author, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
posts = Post.objects.order_by("id")
|
||||
|
|
@ -232,14 +201,12 @@ class FeedBuilderTestCase(TestCase):
|
|||
|
||||
self.assertEquals(first_post.created, timezone.now())
|
||||
self.assertEquals(
|
||||
first_post.remote_identifier,
|
||||
'https://www.bbc.co.uk/news/world-us-canada-48338168'
|
||||
first_post.remote_identifier, "https://www.bbc.co.uk/news/world-us-canada-48338168"
|
||||
)
|
||||
|
||||
self.assertEquals(second_post.created, timezone.now())
|
||||
self.assertEquals(
|
||||
second_post.remote_identifier,
|
||||
'https://www.bbc.co.uk/news/technology-48334739'
|
||||
second_post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
|
||||
)
|
||||
|
||||
def test_empty_entries(self):
|
||||
|
|
@ -247,7 +214,7 @@ class FeedBuilderTestCase(TestCase):
|
|||
rule = CollectionRuleFactory()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
|
||||
with builder((mock_without_entries, mock_stream,)) as builder:
|
||||
with builder((mock_without_entries, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
self.assertEquals(Post.objects.count(), 0)
|
||||
|
|
@ -265,7 +232,7 @@ class FeedBuilderTestCase(TestCase):
|
|||
remote_identifier="a5479c66-8fae-11e9-8422-00163ef6bee7", rule=rule
|
||||
)
|
||||
|
||||
with builder((mock_with_update_entries, mock_stream,)) as builder:
|
||||
with builder((mock_with_update_entries, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
self.assertEquals(Post.objects.count(), 3)
|
||||
|
|
@ -274,21 +241,17 @@ class FeedBuilderTestCase(TestCase):
|
|||
existing_second_post.refresh_from_db()
|
||||
|
||||
self.assertEquals(
|
||||
existing_first_post.title,
|
||||
"Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||
existing_first_post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||
)
|
||||
|
||||
self.assertEquals(
|
||||
existing_second_post.title,
|
||||
"Huawei's Android loss: How it affects you"
|
||||
)
|
||||
self.assertEquals(existing_second_post.title, "Huawei's Android loss: How it affects you")
|
||||
|
||||
def test_html_sanitizing(self):
|
||||
builder = FeedBuilder
|
||||
rule = CollectionRuleFactory()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
|
||||
with builder((mock_with_html, mock_stream,)) as builder:
|
||||
with builder((mock_with_html, mock_stream)) as builder:
|
||||
builder.save()
|
||||
|
||||
post = Post.objects.get()
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from .mocks import simple_mock
|
||||
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
|
|
@ -7,15 +9,17 @@ from newsreader.news.collection.exceptions import (
|
|||
StreamDeniedException,
|
||||
StreamException,
|
||||
StreamNotFoundException,
|
||||
StreamParseException,
|
||||
StreamTimeOutException,
|
||||
)
|
||||
from newsreader.news.collection.feed import FeedClient
|
||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||
from newsreader.news.collection.tests.feed.client.mocks import simple_mock
|
||||
|
||||
|
||||
class FeedClientTestCase(TestCase):
|
||||
def setUp(self):
|
||||
self.maxDiff = None
|
||||
|
||||
self.patched_read = patch("newsreader.news.collection.feed.FeedStream.read")
|
||||
self.mocked_read = self.patched_read.start()
|
||||
|
||||
|
|
@ -85,3 +89,16 @@ class FeedClientTestCase(TestCase):
|
|||
self.assertEquals(stream.rule.succeeded, False)
|
||||
|
||||
self.mocked_read.assert_called_once_with()
|
||||
|
||||
def test_client_catches_stream_parse_exception(self):
|
||||
rule = CollectionRuleFactory.create()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
self.mocked_read.side_effect = StreamParseException("Stream has wrong contents")
|
||||
|
||||
with FeedClient([rule]) as client:
|
||||
for data, stream in client:
|
||||
self.assertEquals(data, {"entries": []})
|
||||
self.assertEquals(stream.rule.error, "Stream has wrong contents")
|
||||
self.assertEquals(stream.rule.succeeded, False)
|
||||
|
||||
self.mocked_read.assert_called_once_with()
|
||||
|
|
|
|||
|
|
@ -6,17 +6,26 @@ import pytz
|
|||
|
||||
from freezegun import freeze_time
|
||||
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from newsreader.news.collection.feed import FeedCollector
|
||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||
from newsreader.news.collection.tests.feed.collector.mocks import (
|
||||
from .mocks import (
|
||||
duplicate_mock,
|
||||
empty_mock,
|
||||
multiple_mock,
|
||||
multiple_update_mock,
|
||||
)
|
||||
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from newsreader.news.collection.exceptions import (
|
||||
StreamDeniedException,
|
||||
StreamException,
|
||||
StreamForbiddenException,
|
||||
StreamNotFoundException,
|
||||
StreamParseException,
|
||||
StreamTimeOutException,
|
||||
)
|
||||
from newsreader.news.collection.feed import FeedCollector
|
||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||
from newsreader.news.collection.utils import build_publication_date
|
||||
from newsreader.news.posts.models import Post
|
||||
from newsreader.news.posts.tests.factories import PostFactory
|
||||
|
|
@ -24,14 +33,12 @@ from newsreader.news.posts.tests.factories import PostFactory
|
|||
|
||||
class FeedCollectorTestCase(TestCase):
|
||||
def setUp(self):
|
||||
self.patched_get = patch(
|
||||
'newsreader.news.collection.feed.requests.get'
|
||||
)
|
||||
self.mocked_get = self.patched_get.start()
|
||||
self.maxDiff = None
|
||||
|
||||
self.patched_parse = patch(
|
||||
'newsreader.news.collection.feed.FeedStream.parse'
|
||||
)
|
||||
self.patched_get = patch("newsreader.news.collection.feed.fetch")
|
||||
self.mocked_fetch = self.patched_get.start()
|
||||
|
||||
self.patched_parse = patch("newsreader.news.collection.feed.FeedStream.parse")
|
||||
self.mocked_parse = self.patched_parse.start()
|
||||
|
||||
def tearDown(self):
|
||||
|
|
@ -54,7 +61,7 @@ class FeedCollectorTestCase(TestCase):
|
|||
|
||||
@freeze_time("2019-10-30 12:30:00")
|
||||
def test_emtpy_batch(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=200)
|
||||
self.mocked_fetch.return_value = MagicMock()
|
||||
self.mocked_parse.return_value = empty_mock
|
||||
rule = CollectionRuleFactory()
|
||||
|
||||
|
|
@ -69,7 +76,7 @@ class FeedCollectorTestCase(TestCase):
|
|||
self.assertEquals(rule.last_suceeded, timezone.now())
|
||||
|
||||
def test_not_found(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=404)
|
||||
self.mocked_fetch.side_effect = StreamNotFoundException
|
||||
rule = CollectionRuleFactory()
|
||||
|
||||
collector = FeedCollector()
|
||||
|
|
@ -82,7 +89,7 @@ class FeedCollectorTestCase(TestCase):
|
|||
self.assertEquals(rule.error, "Stream not found")
|
||||
|
||||
def test_denied(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=404)
|
||||
self.mocked_fetch.side_effect = StreamDeniedException
|
||||
last_suceeded = timezone.make_aware(
|
||||
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
|
||||
)
|
||||
|
|
@ -95,11 +102,11 @@ class FeedCollectorTestCase(TestCase):
|
|||
|
||||
self.assertEquals(Post.objects.count(), 0)
|
||||
self.assertEquals(rule.succeeded, False)
|
||||
self.assertEquals(rule.error, "Stream not found")
|
||||
self.assertEquals(rule.error, "Stream does not have sufficient permissions")
|
||||
self.assertEquals(rule.last_suceeded, last_suceeded)
|
||||
|
||||
def test_forbidden(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=403)
|
||||
self.mocked_fetch.side_effect = StreamForbiddenException
|
||||
last_suceeded = timezone.make_aware(
|
||||
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
|
||||
)
|
||||
|
|
@ -116,7 +123,7 @@ class FeedCollectorTestCase(TestCase):
|
|||
self.assertEquals(rule.last_suceeded, last_suceeded)
|
||||
|
||||
def test_timed_out(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=408)
|
||||
self.mocked_fetch.side_effect = StreamTimeOutException
|
||||
last_suceeded = timezone.make_aware(
|
||||
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
|
||||
)
|
||||
|
|
@ -138,8 +145,7 @@ class FeedCollectorTestCase(TestCase):
|
|||
rule = CollectionRuleFactory()
|
||||
|
||||
_, aware_datetime = build_publication_date(
|
||||
struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
|
||||
pytz.utc
|
||||
struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), pytz.utc
|
||||
)
|
||||
|
||||
first_post = PostFactory(
|
||||
|
|
@ -148,12 +154,11 @@ class FeedCollectorTestCase(TestCase):
|
|||
body="Foreign Minister Mohammad Javad Zarif says the US "
|
||||
"president should try showing Iranians some respect.",
|
||||
publication_date=aware_datetime,
|
||||
rule=rule
|
||||
rule=rule,
|
||||
)
|
||||
|
||||
_, aware_datetime = build_publication_date(
|
||||
struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0,)),
|
||||
pytz.utc
|
||||
struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0)), pytz.utc
|
||||
)
|
||||
|
||||
second_post = PostFactory(
|
||||
|
|
@ -162,22 +167,21 @@ class FeedCollectorTestCase(TestCase):
|
|||
body="Google's move to end business ties with Huawei will "
|
||||
"affect current devices and future purchases.",
|
||||
publication_date=aware_datetime,
|
||||
rule=rule
|
||||
rule=rule,
|
||||
)
|
||||
|
||||
_, aware_datetime = build_publication_date(
|
||||
struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)),
|
||||
pytz.utc
|
||||
struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), pytz.utc
|
||||
)
|
||||
|
||||
third_post = PostFactory(
|
||||
url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
|
||||
title="Birmingham head teacher threatened over LGBT lessons",
|
||||
body="Police are investigating the messages while an MP "
|
||||
"calls for a protest exclusion zone \"to protect "
|
||||
"children\".",
|
||||
'calls for a protest exclusion zone "to protect '
|
||||
'children".',
|
||||
publication_date=aware_datetime,
|
||||
rule=rule
|
||||
rule=rule,
|
||||
)
|
||||
|
||||
collector = FeedCollector()
|
||||
|
|
@ -201,7 +205,7 @@ class FeedCollectorTestCase(TestCase):
|
|||
title="Trump",
|
||||
body="Foreign Minister Mohammad Javad Zarif",
|
||||
publication_date=timezone.now(),
|
||||
rule=rule
|
||||
rule=rule,
|
||||
)
|
||||
|
||||
second_post = PostFactory(
|
||||
|
|
@ -210,7 +214,7 @@ class FeedCollectorTestCase(TestCase):
|
|||
title="Huawei's Android loss: How it affects you",
|
||||
body="Google's move to end business ties with Huawei will",
|
||||
publication_date=timezone.now(),
|
||||
rule=rule
|
||||
rule=rule,
|
||||
)
|
||||
|
||||
third_post = PostFactory(
|
||||
|
|
@ -219,7 +223,7 @@ class FeedCollectorTestCase(TestCase):
|
|||
title="Birmingham head teacher threatened over LGBT lessons",
|
||||
body="Police are investigating the messages while an MP",
|
||||
publication_date=timezone.now(),
|
||||
rule=rule
|
||||
rule=rule,
|
||||
)
|
||||
|
||||
collector = FeedCollector()
|
||||
|
|
@ -235,17 +239,8 @@ class FeedCollectorTestCase(TestCase):
|
|||
self.assertEquals(rule.last_suceeded, timezone.now())
|
||||
self.assertEquals(rule.error, None)
|
||||
|
||||
self.assertEquals(
|
||||
first_post.title,
|
||||
"Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||
)
|
||||
self.assertEquals(first_post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif")
|
||||
|
||||
self.assertEquals(
|
||||
second_post.title,
|
||||
"Huawei's Android loss: How it affects you"
|
||||
)
|
||||
self.assertEquals(second_post.title, "Huawei's Android loss: How it affects you")
|
||||
|
||||
self.assertEquals(
|
||||
third_post.title,
|
||||
'Birmingham head teacher threatened over LGBT lessons'
|
||||
)
|
||||
self.assertEquals(third_post.title, "Birmingham head teacher threatened over LGBT lessons")
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from newsreader.news.posts.tests.factories import PostFactory
|
|||
|
||||
class FeedDuplicateHandlerTestCase(TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
self.maxDiff = None
|
||||
|
||||
def test_duplicate_entries_with_remote_identifiers(self):
|
||||
rule = CollectionRuleFactory()
|
||||
|
|
@ -19,7 +19,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
|
|||
new_post = PostFactory.build(
|
||||
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7",
|
||||
title="title got updated",
|
||||
rule=rule
|
||||
rule=rule,
|
||||
)
|
||||
|
||||
with FeedDuplicateHandler(rule) as duplicate_handler:
|
||||
|
|
@ -45,7 +45,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
|
|||
body="Google's move to end business ties with Huawei will affect current devices",
|
||||
publication_date=publication_date,
|
||||
remote_identifier=None,
|
||||
rule=rule
|
||||
rule=rule,
|
||||
)
|
||||
new_post = PostFactory.build(
|
||||
url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
|
||||
|
|
@ -53,7 +53,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
|
|||
body="Google's move to end business ties with Huawei will affect current devices",
|
||||
publication_date=publication_date,
|
||||
remote_identifier=None,
|
||||
rule=rule
|
||||
rule=rule,
|
||||
)
|
||||
|
||||
with FeedDuplicateHandler(rule) as duplicate_handler:
|
||||
|
|
|
|||
|
|
@ -1,61 +1,62 @@
|
|||
from time import struct_time
|
||||
|
||||
simple_mock = {
|
||||
'bozo': 0,
|
||||
'encoding': 'utf-8',
|
||||
'entries': [{
|
||||
'guidislink': False,
|
||||
'href': '',
|
||||
'id': 'https://www.bbc.co.uk/news/world-us-canada-48338168',
|
||||
'link': 'https://www.bbc.co.uk/news/world-us-canada-48338168',
|
||||
'links': [{
|
||||
'href': 'https://www.bbc.co.uk/news/world-us-canada-48338168',
|
||||
'rel': 'alternate',
|
||||
'type': 'text/html'
|
||||
}],
|
||||
'media_thumbnail': [{
|
||||
'height': '1152',
|
||||
'url': 'http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg',
|
||||
'width': '2048'
|
||||
}],
|
||||
'published': 'Mon, 20 May 2019 16:07:37 GMT',
|
||||
'published_parsed': struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
|
||||
'summary': 'Foreign Minister Mohammad Javad Zarif says the US '
|
||||
'president should try showing Iranians some respect.',
|
||||
'summary_detail': {
|
||||
'base': 'http://feeds.bbci.co.uk/news/rss.xml',
|
||||
'language': None,
|
||||
'type': 'text/html',
|
||||
'value': 'Foreign Minister Mohammad Javad '
|
||||
'Zarif says the US president should '
|
||||
'try showing Iranians some '
|
||||
'respect.'
|
||||
},
|
||||
'title': "Trump's 'genocidal taunts' will not end Iran - Zarif",
|
||||
'title_detail': {
|
||||
'base': 'http://feeds.bbci.co.uk/news/rss.xml',
|
||||
'language': None,
|
||||
'type': 'text/plain',
|
||||
'value': "Trump's 'genocidal taunts' will not "
|
||||
'end Iran - Zarif'
|
||||
"bozo": 1,
|
||||
"encoding": "utf-8",
|
||||
"entries": [
|
||||
{
|
||||
"guidislink": False,
|
||||
"href": "",
|
||||
"id": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||
"link": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||
"links": [
|
||||
{
|
||||
"href": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||
"rel": "alternate",
|
||||
"type": "text/html",
|
||||
}
|
||||
}],
|
||||
'feed': {
|
||||
'image': {
|
||||
'href': 'https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif',
|
||||
'link': 'https://www.bbc.co.uk/news/',
|
||||
'title': 'BBC News - Home',
|
||||
'language': 'en-gb',
|
||||
'link': 'https://www.bbc.co.uk/news/'
|
||||
},
|
||||
'links': [{
|
||||
'href': 'https://www.bbc.co.uk/news/',
|
||||
'rel': 'alternate',
|
||||
'type': 'text/html'
|
||||
}],
|
||||
'title': 'BBC News - Home',
|
||||
},
|
||||
'href': 'http://feeds.bbci.co.uk/news/rss.xml',
|
||||
'status': 200,
|
||||
'version': 'rss20'
|
||||
],
|
||||
"media_thumbnail": [
|
||||
{
|
||||
"height": "1152",
|
||||
"url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg",
|
||||
"width": "2048",
|
||||
}
|
||||
],
|
||||
"published": "Mon, 20 May 2019 16:07:37 GMT",
|
||||
"published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
|
||||
"summary": "Foreign Minister Mohammad Javad Zarif says the US "
|
||||
"president should try showing Iranians some respect.",
|
||||
"summary_detail": {
|
||||
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"language": None,
|
||||
"type": "text/html",
|
||||
"value": "Foreign Minister Mohammad Javad "
|
||||
"Zarif says the US president should "
|
||||
"try showing Iranians some "
|
||||
"respect.",
|
||||
},
|
||||
"title": "Trump's 'genocidal taunts' will not end Iran - Zarif",
|
||||
"title_detail": {
|
||||
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"language": None,
|
||||
"type": "text/plain",
|
||||
"value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif",
|
||||
},
|
||||
}
|
||||
],
|
||||
"feed": {
|
||||
"image": {
|
||||
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
"title": "BBC News - Home",
|
||||
"language": "en-gb",
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
},
|
||||
"links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}],
|
||||
"title": "BBC News - Home",
|
||||
},
|
||||
"href": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"status": 200,
|
||||
"version": "rss20",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from .mocks import simple_mock
|
||||
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
|
|
@ -13,36 +15,32 @@ from newsreader.news.collection.exceptions import (
|
|||
)
|
||||
from newsreader.news.collection.feed import FeedStream
|
||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||
from newsreader.news.collection.tests.feed.stream.mocks import simple_mock
|
||||
|
||||
|
||||
class FeedStreamTestCase(TestCase):
|
||||
def setUp(self):
|
||||
self.patched_get = patch(
|
||||
'newsreader.news.collection.feed.requests.get'
|
||||
)
|
||||
self.mocked_get = self.patched_get.start()
|
||||
self.maxDiff = None
|
||||
|
||||
self.patched_parse = patch(
|
||||
'newsreader.news.collection.feed.FeedStream.parse'
|
||||
)
|
||||
self.mocked_parse = self.patched_parse.start()
|
||||
self.patched_fetch = patch("newsreader.news.collection.feed.fetch")
|
||||
self.mocked_fetch = self.patched_fetch.start()
|
||||
|
||||
def tearDown(self):
|
||||
patch.stopall()
|
||||
|
||||
def test_simple_stream(self):
|
||||
self.mocked_parse.return_value = simple_mock
|
||||
self.mocked_fetch.return_value = MagicMock(content=simple_mock)
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = FeedStream(rule)
|
||||
return_value = stream.read()
|
||||
|
||||
self.mocked_get.assert_called_once_with(rule.url)
|
||||
self.assertEquals(return_value, (simple_mock, stream))
|
||||
data, stream = stream.read()
|
||||
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
self.assertEquals(data["entries"], data["entries"])
|
||||
self.assertEquals(stream, stream)
|
||||
|
||||
def test_stream_raises_exception(self):
|
||||
self.mocked_parse.side_effect = StreamException
|
||||
self.mocked_fetch.side_effect = StreamException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = FeedStream(rule)
|
||||
|
|
@ -50,10 +48,10 @@ class FeedStreamTestCase(TestCase):
|
|||
with self.assertRaises(StreamException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_get.assert_called_once_with(rule.url)
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
def test_stream_raises_denied_exception(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=401)
|
||||
self.mocked_fetch.side_effect = StreamDeniedException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = FeedStream(rule)
|
||||
|
|
@ -61,10 +59,10 @@ class FeedStreamTestCase(TestCase):
|
|||
with self.assertRaises(StreamDeniedException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_get.assert_called_once_with(rule.url)
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
def test_stream_raises_not_found_exception(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=404)
|
||||
self.mocked_fetch.side_effect = StreamNotFoundException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = FeedStream(rule)
|
||||
|
|
@ -72,10 +70,10 @@ class FeedStreamTestCase(TestCase):
|
|||
with self.assertRaises(StreamNotFoundException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_get.assert_called_once_with(rule.url)
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
def test_stream_raises_time_out_exception(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=408)
|
||||
self.mocked_fetch.side_effect = StreamTimeOutException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = FeedStream(rule)
|
||||
|
|
@ -83,10 +81,10 @@ class FeedStreamTestCase(TestCase):
|
|||
with self.assertRaises(StreamTimeOutException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_get.assert_called_once_with(rule.url)
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
def test_stream_raises_forbidden_exception(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=403)
|
||||
self.mocked_fetch.side_effect = StreamForbiddenException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = FeedStream(rule)
|
||||
|
|
@ -94,13 +92,12 @@ class FeedStreamTestCase(TestCase):
|
|||
with self.assertRaises(StreamForbiddenException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_get.assert_called_once_with(rule.url)
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
@patch("newsreader.news.collection.feed.parse")
|
||||
def test_stream_raises_parse_exception(self, mocked_parse):
|
||||
self.mocked_get.return_value = MagicMock(status_code=200)
|
||||
self.mocked_fetch.return_value = MagicMock()
|
||||
mocked_parse.side_effect = TypeError
|
||||
self.patched_parse.stop()
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = FeedStream(rule)
|
||||
|
|
|
|||
47
src/newsreader/news/collection/tests/mocks.py
Normal file
47
src/newsreader/news/collection/tests/mocks.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
simple_mock = """
|
||||
<html>
|
||||
<body>
|
||||
<article>
|
||||
<h1>Clickbait</h1>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
simple_feed_mock = {
|
||||
"bozo": 0,
|
||||
"encoding": "utf-8",
|
||||
"feed": {
|
||||
"image": {
|
||||
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
"title": "BBC News - Home",
|
||||
"language": "en-gb",
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
},
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
"links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}],
|
||||
"title": "BBC News - Home",
|
||||
},
|
||||
"href": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"status": 200,
|
||||
"version": "rss20",
|
||||
}
|
||||
|
||||
feed_mock_without_link = {
|
||||
"bozo": 0,
|
||||
"encoding": "utf-8",
|
||||
"feed": {
|
||||
"image": {
|
||||
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
"title": "BBC News - Home",
|
||||
"language": "en-gb",
|
||||
"link": "https://www.bbc.co.uk/news/",
|
||||
},
|
||||
"title": "BBC News - Home",
|
||||
},
|
||||
"href": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||
"status": 200,
|
||||
"version": "rss20",
|
||||
}
|
||||
134
src/newsreader/news/collection/tests/tests.py
Normal file
134
src/newsreader/news/collection/tests/tests.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from .mocks import feed_mock_without_link, simple_feed_mock, simple_mock
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from newsreader.news.collection.base import URLBuilder, WebsiteStream
|
||||
from newsreader.news.collection.exceptions import (
|
||||
StreamDeniedException,
|
||||
StreamException,
|
||||
StreamForbiddenException,
|
||||
StreamNotFoundException,
|
||||
StreamParseException,
|
||||
StreamTimeOutException,
|
||||
)
|
||||
from newsreader.news.collection.tests.factories import CollectionRuleFactory
|
||||
|
||||
|
||||
class WebsiteStreamTestCase(TestCase):
|
||||
def setUp(self):
|
||||
self.patched_fetch = patch("newsreader.news.collection.base.fetch")
|
||||
self.mocked_fetch = self.patched_fetch.start()
|
||||
|
||||
def tearDown(self):
|
||||
patch.stopall()
|
||||
|
||||
def test_simple(self):
|
||||
self.mocked_fetch.return_value = MagicMock(content=simple_mock)
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = WebsiteStream(rule.url)
|
||||
return_value = stream.read()
|
||||
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
self.assertEquals(return_value, (BeautifulSoup(simple_mock, "lxml"), stream))
|
||||
|
||||
def test_raises_exception(self):
|
||||
self.mocked_fetch.side_effect = StreamException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = WebsiteStream(rule.url)
|
||||
|
||||
with self.assertRaises(StreamException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
def test_raises_denied_exception(self):
|
||||
self.mocked_fetch.side_effect = StreamDeniedException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = WebsiteStream(rule.url)
|
||||
|
||||
with self.assertRaises(StreamDeniedException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
def test_raises_stream_not_found_exception(self):
|
||||
self.mocked_fetch.side_effect = StreamNotFoundException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = WebsiteStream(rule.url)
|
||||
|
||||
with self.assertRaises(StreamNotFoundException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
def test_stream_raises_time_out_exception(self):
|
||||
self.mocked_fetch.side_effect = StreamTimeOutException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = WebsiteStream(rule.url)
|
||||
|
||||
with self.assertRaises(StreamTimeOutException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
def test_stream_raises_forbidden_exception(self):
|
||||
self.mocked_fetch.side_effect = StreamForbiddenException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = WebsiteStream(rule.url)
|
||||
|
||||
with self.assertRaises(StreamForbiddenException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
@patch("newsreader.news.collection.base.WebsiteStream.parse")
|
||||
def test_stream_raises_parse_exception(self, mocked_parse):
|
||||
self.mocked_fetch.return_value = MagicMock()
|
||||
mocked_parse.side_effect = StreamParseException
|
||||
|
||||
rule = CollectionRuleFactory()
|
||||
stream = WebsiteStream(rule.url)
|
||||
|
||||
with self.assertRaises(StreamParseException):
|
||||
stream.read()
|
||||
|
||||
self.mocked_fetch.assert_called_once_with(rule.url)
|
||||
|
||||
|
||||
class URLBuilderTestCase(TestCase):
|
||||
def test_simple(self):
|
||||
initial_rule = CollectionRuleFactory()
|
||||
|
||||
with URLBuilder((simple_feed_mock, MagicMock(rule=initial_rule))) as builder:
|
||||
rule, url = builder.build()
|
||||
|
||||
self.assertEquals(rule.pk, initial_rule.pk)
|
||||
self.assertEquals(url, "https://www.bbc.co.uk/news/")
|
||||
|
||||
def test_no_link(self):
|
||||
initial_rule = CollectionRuleFactory()
|
||||
|
||||
with URLBuilder((feed_mock_without_link, MagicMock(rule=initial_rule))) as builder:
|
||||
rule, url = builder.build()
|
||||
|
||||
self.assertEquals(rule.pk, initial_rule.pk)
|
||||
self.assertEquals(url, None)
|
||||
|
||||
def test_no_data(self):
|
||||
initial_rule = CollectionRuleFactory()
|
||||
|
||||
with URLBuilder((None, MagicMock(rule=initial_rule))) as builder:
|
||||
rule, url = builder.build()
|
||||
|
||||
self.assertEquals(rule.pk, initial_rule.pk)
|
||||
self.assertEquals(url, None)
|
||||
1
src/newsreader/news/collection/tests/utils/__init__.py
Normal file
1
src/newsreader/news/collection/tests/utils/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from .tests import *
|
||||
57
src/newsreader/news/collection/tests/utils/tests.py
Normal file
57
src/newsreader/news/collection/tests/utils/tests.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from newsreader.news.collection.exceptions import (
|
||||
StreamDeniedException,
|
||||
StreamForbiddenException,
|
||||
StreamNotFoundException,
|
||||
StreamTimeOutException,
|
||||
)
|
||||
from newsreader.news.collection.utils import fetch
|
||||
|
||||
|
||||
class FetchTestCase(TestCase):
|
||||
def setUp(self):
|
||||
self.patched_get = patch("newsreader.news.collection.utils.requests.get")
|
||||
self.mocked_get = self.patched_get.start()
|
||||
|
||||
def test_simple(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=200, content="content")
|
||||
|
||||
url = "https://www.bbc.co.uk/news"
|
||||
response = fetch(url)
|
||||
|
||||
self.assertEquals(response.content, "content")
|
||||
|
||||
def test_raises_not_found(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=404)
|
||||
|
||||
url = "https://www.bbc.co.uk/news"
|
||||
|
||||
with self.assertRaises(StreamNotFoundException):
|
||||
fetch(url)
|
||||
|
||||
def test_raises_denied(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=401)
|
||||
|
||||
url = "https://www.bbc.co.uk/news"
|
||||
|
||||
with self.assertRaises(StreamDeniedException):
|
||||
fetch(url)
|
||||
|
||||
def test_raises_forbidden(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=403)
|
||||
|
||||
url = "https://www.bbc.co.uk/news"
|
||||
|
||||
with self.assertRaises(StreamForbiddenException):
|
||||
fetch(url)
|
||||
|
||||
def test_raises_timed_out(self):
|
||||
self.mocked_get.return_value = MagicMock(status_code=408)
|
||||
|
||||
url = "https://www.bbc.co.uk/news"
|
||||
|
||||
with self.assertRaises(StreamTimeOutException):
|
||||
fetch(url)
|
||||
|
|
@ -1,9 +1,15 @@
|
|||
from datetime import datetime, tzinfo
|
||||
from time import mktime, struct_time
|
||||
from typing import Tuple
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from requests.models import Response
|
||||
|
||||
from django.utils import timezone
|
||||
|
||||
from newsreader.news.collection.response_handler import ResponseHandler
|
||||
|
||||
|
||||
def build_publication_date(dt: struct_time, tz: tzinfo) -> Tuple:
|
||||
try:
|
||||
|
|
@ -12,3 +18,12 @@ def build_publication_date(dt: struct_time, tz: tzinfo) -> Tuple:
|
|||
except TypeError:
|
||||
return False, None
|
||||
return True, published_parsed
|
||||
|
||||
|
||||
def fetch(url: str) -> Optional[Response]:
|
||||
response = requests.get(url)
|
||||
|
||||
with ResponseHandler(response) as response_handler:
|
||||
response_handler.handle_response()
|
||||
|
||||
return response
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue