267 lines
9 KiB
Python
267 lines
9 KiB
Python
from datetime import date, datetime, time
|
|
from time import struct_time
|
|
from unittest.mock import Mock, patch
|
|
|
|
from django.test import TestCase
|
|
from django.utils import timezone
|
|
|
|
import pytz
|
|
|
|
from freezegun import freeze_time
|
|
|
|
from newsreader.news.collection.exceptions import (
|
|
StreamDeniedException,
|
|
StreamException,
|
|
StreamForbiddenException,
|
|
StreamNotFoundException,
|
|
StreamParseException,
|
|
StreamTimeOutException,
|
|
)
|
|
from newsreader.news.collection.feed import FeedCollector
|
|
from newsreader.news.collection.tests.factories import FeedFactory
|
|
from newsreader.news.collection.utils import build_publication_date
|
|
from newsreader.news.core.models import Post
|
|
from newsreader.news.core.tests.factories import FeedPostFactory
|
|
|
|
from .mocks import duplicate_mock, empty_mock, multiple_mock, multiple_update_mock
|
|
|
|
|
|
class FeedCollectorTestCase(TestCase):
|
|
def setUp(self):
|
|
self.maxDiff = None
|
|
|
|
self.patched_get = patch("newsreader.news.collection.feed.fetch")
|
|
self.mocked_fetch = self.patched_get.start()
|
|
|
|
self.patched_parse = patch("newsreader.news.collection.feed.FeedStream.parse")
|
|
self.mocked_parse = self.patched_parse.start()
|
|
|
|
def tearDown(self):
|
|
patch.stopall()
|
|
|
|
@freeze_time("2019-10-30 12:30:00")
|
|
def test_simple_batch(self):
|
|
self.mocked_parse.return_value = multiple_mock
|
|
rule = FeedFactory()
|
|
|
|
collector = FeedCollector()
|
|
collector.collect()
|
|
|
|
rule.refresh_from_db()
|
|
|
|
self.assertEquals(Post.objects.count(), 3)
|
|
self.assertEquals(rule.succeeded, True)
|
|
self.assertEquals(rule.last_suceeded, timezone.now())
|
|
self.assertEquals(rule.error, None)
|
|
|
|
@freeze_time("2019-10-30 12:30:00")
|
|
def test_emtpy_batch(self):
|
|
self.mocked_fetch.return_value = Mock()
|
|
self.mocked_parse.return_value = empty_mock
|
|
rule = FeedFactory()
|
|
|
|
collector = FeedCollector()
|
|
collector.collect()
|
|
|
|
rule.refresh_from_db()
|
|
|
|
self.assertEquals(Post.objects.count(), 0)
|
|
self.assertEquals(rule.succeeded, True)
|
|
self.assertEquals(rule.error, None)
|
|
self.assertEquals(rule.last_suceeded, timezone.now())
|
|
|
|
def test_not_found(self):
|
|
self.mocked_fetch.side_effect = StreamNotFoundException
|
|
rule = FeedFactory()
|
|
|
|
collector = FeedCollector()
|
|
collector.collect()
|
|
|
|
rule.refresh_from_db()
|
|
|
|
self.assertEquals(Post.objects.count(), 0)
|
|
self.assertEquals(rule.succeeded, False)
|
|
self.assertEquals(rule.error, "Stream not found")
|
|
|
|
def test_denied(self):
|
|
self.mocked_fetch.side_effect = StreamDeniedException
|
|
last_suceeded = timezone.make_aware(
|
|
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
|
|
)
|
|
rule = FeedFactory(last_suceeded=last_suceeded)
|
|
|
|
collector = FeedCollector()
|
|
collector.collect()
|
|
|
|
rule.refresh_from_db()
|
|
|
|
self.assertEquals(Post.objects.count(), 0)
|
|
self.assertEquals(rule.succeeded, False)
|
|
self.assertEquals(rule.error, "Stream does not have sufficient permissions")
|
|
self.assertEquals(rule.last_suceeded, last_suceeded)
|
|
|
|
def test_forbidden(self):
|
|
self.mocked_fetch.side_effect = StreamForbiddenException
|
|
last_suceeded = timezone.make_aware(
|
|
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
|
|
)
|
|
rule = FeedFactory(last_suceeded=last_suceeded)
|
|
|
|
collector = FeedCollector()
|
|
collector.collect()
|
|
|
|
rule.refresh_from_db()
|
|
|
|
self.assertEquals(Post.objects.count(), 0)
|
|
self.assertEquals(rule.succeeded, False)
|
|
self.assertEquals(rule.error, "Stream forbidden")
|
|
self.assertEquals(rule.last_suceeded, last_suceeded)
|
|
|
|
def test_timed_out(self):
|
|
self.mocked_fetch.side_effect = StreamTimeOutException
|
|
last_suceeded = timezone.make_aware(
|
|
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
|
|
)
|
|
rule = FeedFactory(last_suceeded=last_suceeded)
|
|
|
|
collector = FeedCollector()
|
|
collector.collect()
|
|
|
|
rule.refresh_from_db()
|
|
|
|
self.assertEquals(Post.objects.count(), 0)
|
|
self.assertEquals(rule.succeeded, False)
|
|
self.assertEquals(rule.error, "Stream timed out")
|
|
self.assertEquals(rule.last_suceeded, last_suceeded)
|
|
|
|
@freeze_time("2019-10-30 12:30:00")
|
|
def test_duplicates(self):
|
|
self.mocked_parse.return_value = duplicate_mock
|
|
rule = FeedFactory()
|
|
|
|
aware_datetime = build_publication_date(
|
|
struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), pytz.utc
|
|
)
|
|
|
|
first_post = FeedPostFactory(
|
|
url="https://www.bbc.co.uk/news/world-us-canada-48338168",
|
|
title="Trump's 'genocidal taunts' will not end Iran - Zarif",
|
|
body="Foreign Minister Mohammad Javad Zarif says the US "
|
|
"president should try showing Iranians some respect.",
|
|
publication_date=aware_datetime,
|
|
rule=rule,
|
|
)
|
|
|
|
aware_datetime = build_publication_date(
|
|
struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0)), pytz.utc
|
|
)
|
|
|
|
second_post = FeedPostFactory(
|
|
url="https://www.bbc.co.uk/news/technology-48334739",
|
|
title="Huawei's Android loss: How it affects you",
|
|
body="Google's move to end business ties with Huawei will "
|
|
"affect current devices and future purchases.",
|
|
publication_date=aware_datetime,
|
|
rule=rule,
|
|
)
|
|
|
|
aware_datetime = build_publication_date(
|
|
struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), pytz.utc
|
|
)
|
|
|
|
third_post = FeedPostFactory(
|
|
url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
|
|
title="Birmingham head teacher threatened over LGBT lessons",
|
|
body="Police are investigating the messages while an MP "
|
|
'calls for a protest exclusion zone "to protect '
|
|
'children".',
|
|
publication_date=aware_datetime,
|
|
rule=rule,
|
|
)
|
|
|
|
collector = FeedCollector()
|
|
collector.collect(rules=[rule])
|
|
|
|
rule.refresh_from_db()
|
|
|
|
self.assertEquals(Post.objects.count(), 3)
|
|
self.assertEquals(rule.succeeded, True)
|
|
self.assertEquals(rule.last_suceeded, timezone.now())
|
|
self.assertEquals(rule.error, None)
|
|
|
|
@freeze_time("2019-02-22 12:30:00")
|
|
def test_items_with_identifiers_get_updated(self):
|
|
self.mocked_parse.return_value = multiple_update_mock
|
|
rule = FeedFactory()
|
|
|
|
first_post = FeedPostFactory(
|
|
remote_identifier="https://www.bbc.co.uk/news/world-us-canada-48338168",
|
|
url="https://www.bbc.co.uk/",
|
|
title="Trump",
|
|
body="Foreign Minister Mohammad Javad Zarif",
|
|
publication_date=timezone.now(),
|
|
rule=rule,
|
|
)
|
|
|
|
second_post = FeedPostFactory(
|
|
remote_identifier="https://www.bbc.co.uk/news/technology-48334739",
|
|
url="https://www.bbc.co.uk/",
|
|
title="Huawei's Android loss: How it affects you",
|
|
body="Google's move to end business ties with Huawei will",
|
|
publication_date=timezone.now(),
|
|
rule=rule,
|
|
)
|
|
|
|
third_post = FeedPostFactory(
|
|
remote_identifier="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
|
|
url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
|
|
title="Birmingham head teacher threatened over LGBT lessons",
|
|
body="Police are investigating the messages while an MP",
|
|
publication_date=timezone.now(),
|
|
rule=rule,
|
|
)
|
|
|
|
collector = FeedCollector()
|
|
collector.collect(rules=[rule])
|
|
|
|
rule.refresh_from_db()
|
|
first_post.refresh_from_db()
|
|
second_post.refresh_from_db()
|
|
third_post.refresh_from_db()
|
|
|
|
self.assertEquals(Post.objects.count(), 3)
|
|
self.assertEquals(rule.succeeded, True)
|
|
self.assertEquals(rule.last_suceeded, timezone.now())
|
|
self.assertEquals(rule.error, None)
|
|
|
|
self.assertEquals(
|
|
first_post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif"
|
|
)
|
|
|
|
self.assertEquals(
|
|
second_post.title, "Huawei's Android loss: How it affects you"
|
|
)
|
|
|
|
self.assertEquals(
|
|
third_post.title, "Birmingham head teacher threatened over LGBT lessons"
|
|
)
|
|
|
|
@freeze_time("2019-02-22 12:30:00")
|
|
def test_disabled_rules(self):
|
|
rules = (FeedFactory(enabled=False), FeedFactory(enabled=True))
|
|
|
|
self.mocked_parse.return_value = multiple_mock
|
|
|
|
collector = FeedCollector()
|
|
collector.collect()
|
|
|
|
for rule in rules:
|
|
rule.refresh_from_db()
|
|
|
|
self.assertEquals(Post.objects.count(), 3)
|
|
self.assertEquals(rules[1].succeeded, True)
|
|
self.assertEquals(rules[1].last_suceeded, timezone.now())
|
|
self.assertEquals(rules[1].error, None)
|
|
|
|
self.assertEquals(rules[0].last_suceeded, None)
|
|
self.assertEquals(rules[0].succeeded, False)
|