newsreader/src/newsreader/news/collection/tests/feed/collector/tests.py
2020-09-13 13:32:52 +02:00

267 lines
9 KiB
Python

from datetime import date, datetime, time
from time import struct_time
from unittest.mock import Mock, patch
from django.test import TestCase
from django.utils import timezone
import pytz
from freezegun import freeze_time
from newsreader.news.collection.exceptions import (
StreamDeniedException,
StreamException,
StreamForbiddenException,
StreamNotFoundException,
StreamParseException,
StreamTimeOutException,
)
from newsreader.news.collection.feed import FeedCollector
from newsreader.news.collection.tests.factories import FeedFactory
from newsreader.news.collection.utils import build_publication_date
from newsreader.news.core.models import Post
from newsreader.news.core.tests.factories import FeedPostFactory
from .mocks import duplicate_mock, empty_mock, multiple_mock, multiple_update_mock
class FeedCollectorTestCase(TestCase):
def setUp(self):
self.maxDiff = None
self.patched_get = patch("newsreader.news.collection.feed.fetch")
self.mocked_fetch = self.patched_get.start()
self.patched_parse = patch("newsreader.news.collection.feed.FeedStream.parse")
self.mocked_parse = self.patched_parse.start()
def tearDown(self):
patch.stopall()
@freeze_time("2019-10-30 12:30:00")
def test_simple_batch(self):
self.mocked_parse.return_value = multiple_mock
rule = FeedFactory()
collector = FeedCollector()
collector.collect()
rule.refresh_from_db()
self.assertEquals(Post.objects.count(), 3)
self.assertEquals(rule.succeeded, True)
self.assertEquals(rule.last_suceeded, timezone.now())
self.assertEquals(rule.error, None)
@freeze_time("2019-10-30 12:30:00")
def test_emtpy_batch(self):
self.mocked_fetch.return_value = Mock()
self.mocked_parse.return_value = empty_mock
rule = FeedFactory()
collector = FeedCollector()
collector.collect()
rule.refresh_from_db()
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, True)
self.assertEquals(rule.error, None)
self.assertEquals(rule.last_suceeded, timezone.now())
def test_not_found(self):
self.mocked_fetch.side_effect = StreamNotFoundException
rule = FeedFactory()
collector = FeedCollector()
collector.collect()
rule.refresh_from_db()
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream not found")
def test_denied(self):
self.mocked_fetch.side_effect = StreamDeniedException
last_suceeded = timezone.make_aware(
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
)
rule = FeedFactory(last_suceeded=last_suceeded)
collector = FeedCollector()
collector.collect()
rule.refresh_from_db()
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream does not have sufficient permissions")
self.assertEquals(rule.last_suceeded, last_suceeded)
def test_forbidden(self):
self.mocked_fetch.side_effect = StreamForbiddenException
last_suceeded = timezone.make_aware(
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
)
rule = FeedFactory(last_suceeded=last_suceeded)
collector = FeedCollector()
collector.collect()
rule.refresh_from_db()
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream forbidden")
self.assertEquals(rule.last_suceeded, last_suceeded)
def test_timed_out(self):
self.mocked_fetch.side_effect = StreamTimeOutException
last_suceeded = timezone.make_aware(
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
)
rule = FeedFactory(last_suceeded=last_suceeded)
collector = FeedCollector()
collector.collect()
rule.refresh_from_db()
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream timed out")
self.assertEquals(rule.last_suceeded, last_suceeded)
@freeze_time("2019-10-30 12:30:00")
def test_duplicates(self):
self.mocked_parse.return_value = duplicate_mock
rule = FeedFactory()
aware_datetime = build_publication_date(
struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), pytz.utc
)
first_post = FeedPostFactory(
url="https://www.bbc.co.uk/news/world-us-canada-48338168",
title="Trump's 'genocidal taunts' will not end Iran - Zarif",
body="Foreign Minister Mohammad Javad Zarif says the US "
"president should try showing Iranians some respect.",
publication_date=aware_datetime,
rule=rule,
)
aware_datetime = build_publication_date(
struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0)), pytz.utc
)
second_post = FeedPostFactory(
url="https://www.bbc.co.uk/news/technology-48334739",
title="Huawei's Android loss: How it affects you",
body="Google's move to end business ties with Huawei will "
"affect current devices and future purchases.",
publication_date=aware_datetime,
rule=rule,
)
aware_datetime = build_publication_date(
struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), pytz.utc
)
third_post = FeedPostFactory(
url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
title="Birmingham head teacher threatened over LGBT lessons",
body="Police are investigating the messages while an MP "
'calls for a protest exclusion zone "to protect '
'children".',
publication_date=aware_datetime,
rule=rule,
)
collector = FeedCollector()
collector.collect(rules=[rule])
rule.refresh_from_db()
self.assertEquals(Post.objects.count(), 3)
self.assertEquals(rule.succeeded, True)
self.assertEquals(rule.last_suceeded, timezone.now())
self.assertEquals(rule.error, None)
@freeze_time("2019-02-22 12:30:00")
def test_items_with_identifiers_get_updated(self):
self.mocked_parse.return_value = multiple_update_mock
rule = FeedFactory()
first_post = FeedPostFactory(
remote_identifier="https://www.bbc.co.uk/news/world-us-canada-48338168",
url="https://www.bbc.co.uk/",
title="Trump",
body="Foreign Minister Mohammad Javad Zarif",
publication_date=timezone.now(),
rule=rule,
)
second_post = FeedPostFactory(
remote_identifier="https://www.bbc.co.uk/news/technology-48334739",
url="https://www.bbc.co.uk/",
title="Huawei's Android loss: How it affects you",
body="Google's move to end business ties with Huawei will",
publication_date=timezone.now(),
rule=rule,
)
third_post = FeedPostFactory(
remote_identifier="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
title="Birmingham head teacher threatened over LGBT lessons",
body="Police are investigating the messages while an MP",
publication_date=timezone.now(),
rule=rule,
)
collector = FeedCollector()
collector.collect(rules=[rule])
rule.refresh_from_db()
first_post.refresh_from_db()
second_post.refresh_from_db()
third_post.refresh_from_db()
self.assertEquals(Post.objects.count(), 3)
self.assertEquals(rule.succeeded, True)
self.assertEquals(rule.last_suceeded, timezone.now())
self.assertEquals(rule.error, None)
self.assertEquals(
first_post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif"
)
self.assertEquals(
second_post.title, "Huawei's Android loss: How it affects you"
)
self.assertEquals(
third_post.title, "Birmingham head teacher threatened over LGBT lessons"
)
@freeze_time("2019-02-22 12:30:00")
def test_disabled_rules(self):
rules = (FeedFactory(enabled=False), FeedFactory(enabled=True))
self.mocked_parse.return_value = multiple_mock
collector = FeedCollector()
collector.collect()
for rule in rules:
rule.refresh_from_db()
self.assertEquals(Post.objects.count(), 3)
self.assertEquals(rules[1].succeeded, True)
self.assertEquals(rules[1].last_suceeded, timezone.now())
self.assertEquals(rules[1].error, None)
self.assertEquals(rules[0].last_suceeded, None)
self.assertEquals(rules[0].succeeded, False)