diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4ae3f02..a45b8a7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -83,4 +83,4 @@ python linting: script: - isort -rc src/ --check-only - black -l 88 --check src/ - - autoflake -rc src/ + - autoflake --check --remove-all-unused-imports --ignore-init-module-imports --recursive src/ diff --git a/src/newsreader/accounts/tests/test_activation.py b/src/newsreader/accounts/tests/test_activation.py index b5a9943..45d0909 100644 --- a/src/newsreader/accounts/tests/test_activation.py +++ b/src/newsreader/accounts/tests/test_activation.py @@ -1,16 +1,13 @@ import datetime from django.conf import settings -from django.core import mail from django.test import TestCase -from django.test.utils import override_settings from django.urls import reverse from django.utils.translation import gettext as _ from registration.models import RegistrationProfile from newsreader.accounts.models import User -from newsreader.accounts.tests.factories import UserFactory class ActivationTestCase(TestCase): diff --git a/src/newsreader/accounts/tests/test_password_reset.py b/src/newsreader/accounts/tests/test_password_reset.py index 1f818c8..c7871d5 100644 --- a/src/newsreader/accounts/tests/test_password_reset.py +++ b/src/newsreader/accounts/tests/test_password_reset.py @@ -1,14 +1,10 @@ from typing import Dict -from django.contrib.auth.tokens import default_token_generator as token_generator from django.core import mail from django.test import TestCase from django.urls import reverse -from django.utils.encoding import force_bytes -from django.utils.http import urlsafe_base64_encode from django.utils.translation import gettext as _ -from newsreader.accounts.models import User from newsreader.accounts.tests.factories import UserFactory diff --git a/src/newsreader/accounts/tests/test_resend_activation.py b/src/newsreader/accounts/tests/test_resend_activation.py index a18df2a..0209f94 100644 --- a/src/newsreader/accounts/tests/test_resend_activation.py +++ b/src/newsreader/accounts/tests/test_resend_activation.py @@ -1,13 +1,10 @@ -from django.conf import settings from django.core import mail from django.test import TransactionTestCase as TestCase -from django.test.utils import override_settings from django.urls import reverse from django.utils.translation import gettext as _ from registration.models import RegistrationProfile -from newsreader.accounts.models import User from newsreader.accounts.tests.factories import RegistrationProfileFactory, UserFactory diff --git a/src/newsreader/accounts/tests/tests.py b/src/newsreader/accounts/tests/tests.py index b87e8fd..e28dbd3 100644 --- a/src/newsreader/accounts/tests/tests.py +++ b/src/newsreader/accounts/tests/tests.py @@ -1,6 +1,6 @@ from django.test import TestCase -from django_celery_beat.models import IntervalSchedule, PeriodicTask +from django_celery_beat.models import PeriodicTask from newsreader.accounts.models import User diff --git a/src/newsreader/accounts/urls.py b/src/newsreader/accounts/urls.py index 7b869c7..8605233 100644 --- a/src/newsreader/accounts/urls.py +++ b/src/newsreader/accounts/urls.py @@ -1,4 +1,4 @@ -from django.urls import include, path +from django.urls import path from newsreader.accounts.views import ( ActivationCompleteView, diff --git a/src/newsreader/conf/dev.py b/src/newsreader/conf/dev.py index 9d77952..16cf03b 100644 --- a/src/newsreader/conf/dev.py +++ b/src/newsreader/conf/dev.py @@ -1,4 +1,4 @@ -from .base import * +from .base import * # isort:skip DEBUG = True diff --git a/src/newsreader/conf/docker.py b/src/newsreader/conf/docker.py index fe20d06..5643937 100644 --- a/src/newsreader/conf/docker.py +++ b/src/newsreader/conf/docker.py @@ -1,5 +1,4 @@ -from .dev import * - +from .dev import * # isort:skip # Celery # https://docs.celeryproject.org/en/latest/userguide/configuration.html diff --git a/src/newsreader/conf/gitlab.py b/src/newsreader/conf/gitlab.py index 3108245..ee11c59 100644 --- a/src/newsreader/conf/gitlab.py +++ b/src/newsreader/conf/gitlab.py @@ -1,4 +1,4 @@ -from .base import * # noqa +from .base import * # isort:skip DEBUG = True diff --git a/src/newsreader/news/collection/base.py b/src/newsreader/news/collection/base.py index ea8e015..519f4f8 100644 --- a/src/newsreader/news/collection/base.py +++ b/src/newsreader/news/collection/base.py @@ -1,5 +1,3 @@ -from django.db.models.query import QuerySet - from bs4 import BeautifulSoup from newsreader.news.collection.exceptions import StreamParseException diff --git a/src/newsreader/news/collection/endpoints.py b/src/newsreader/news/collection/endpoints.py index 0a13766..7f2ede0 100644 --- a/src/newsreader/news/collection/endpoints.py +++ b/src/newsreader/news/collection/endpoints.py @@ -6,7 +6,6 @@ from rest_framework.generics import ( get_object_or_404, ) from rest_framework.response import Response -from rest_framework.serializers import Serializer from newsreader.core.pagination import LargeResultSetPagination, ResultSetPagination from newsreader.news.collection.models import CollectionRule diff --git a/src/newsreader/news/collection/feed.py b/src/newsreader/news/collection/feed.py index 0e8b258..46a7a3b 100644 --- a/src/newsreader/news/collection/feed.py +++ b/src/newsreader/news/collection/feed.py @@ -1,5 +1,8 @@ +import logging + from concurrent.futures import ThreadPoolExecutor, as_completed +from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist from django.db.models.fields import CharField, TextField from django.template.defaultfilters import truncatechars from django.utils import timezone @@ -21,11 +24,13 @@ from newsreader.news.collection.exceptions import ( StreamParseException, StreamTimeOutException, ) -from newsreader.news.collection.models import CollectionRule from newsreader.news.collection.utils import build_publication_date, fetch from newsreader.news.core.models import Post +logger = logging.getLogger(__name__) + + class FeedBuilder(Builder): instances = [] @@ -164,7 +169,8 @@ class FeedClient(Client): yield response_data except StreamException as e: - stream.rule.error = e.message + length = stream.rule._meta.get_field("error").max_length + stream.rule.error = e.message[-length:] stream.rule.succeeded = False yield ({"entries": []}, stream) @@ -195,8 +201,8 @@ class FeedDuplicateHandler: if instance.remote_identifier in self.existing_identifiers: existing_post = self.handle_duplicate(instance) - if existing_post: - yield existing_post + yield existing_post + continue elif not instance.remote_identifier and self.in_database(instance): continue @@ -229,7 +235,16 @@ class FeedDuplicateHandler: remote_identifier=instance.remote_identifier ) except ObjectDoesNotExist: - return + logger.error( + f"Duplicate handler tried retrieving post {instance.remote_identifier} but failed doing so." + ) + return instance + except MultipleObjectsReturned: + existing_instances = self.queryset.filter( + remote_identifier=instance.remote_identifier + ).order_by("-publication_date") + existing_instance = existing_instances.last() + existing_instances.exclude(pk=existing_instance.pk).delete() for field in instance._meta.get_fields(): getattr(existing_instance, field.name, object()) diff --git a/src/newsreader/news/collection/migrations/0005_auto_20200303_1932.py b/src/newsreader/news/collection/migrations/0005_auto_20200303_1932.py new file mode 100644 index 0000000..cdd3e32 --- /dev/null +++ b/src/newsreader/news/collection/migrations/0005_auto_20200303_1932.py @@ -0,0 +1,16 @@ +# Generated by Django 2.2 on 2020-03-03 19:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [("collection", "0004_auto_20190714_1422")] + + operations = [ + migrations.AlterField( + model_name="collectionrule", + name="error", + field=models.CharField(blank=True, max_length=1024, null=True), + ) + ] diff --git a/src/newsreader/news/collection/models.py b/src/newsreader/news/collection/models.py index 04ea596..8552ebf 100644 --- a/src/newsreader/news/collection/models.py +++ b/src/newsreader/news/collection/models.py @@ -33,7 +33,7 @@ class CollectionRule(TimeStampedModel): last_suceeded = models.DateTimeField(blank=True, null=True) succeeded = models.BooleanField(default=False) - error = models.CharField(max_length=255, blank=True, null=True) + error = models.CharField(max_length=1024, blank=True, null=True) user = models.ForeignKey("accounts.User", _("Owner"), related_name="rules") diff --git a/src/newsreader/news/collection/tests/endpoints/rule/detail/tests.py b/src/newsreader/news/collection/tests/endpoints/rule/detail/tests.py index a489d55..1c281d9 100644 --- a/src/newsreader/news/collection/tests/endpoints/rule/detail/tests.py +++ b/src/newsreader/news/collection/tests/endpoints/rule/detail/tests.py @@ -1,6 +1,6 @@ import json -from django.test import Client, TestCase +from django.test import TestCase from django.urls import reverse from newsreader.accounts.tests.factories import UserFactory diff --git a/src/newsreader/news/collection/tests/endpoints/rule/list/tests.py b/src/newsreader/news/collection/tests/endpoints/rule/list/tests.py index 32d8a0d..0e2a269 100644 --- a/src/newsreader/news/collection/tests/endpoints/rule/list/tests.py +++ b/src/newsreader/news/collection/tests/endpoints/rule/list/tests.py @@ -2,7 +2,7 @@ import json from datetime import date, datetime, time -from django.test import Client, TestCase +from django.test import TestCase from django.urls import reverse import pytz diff --git a/src/newsreader/news/collection/tests/feed/client/tests.py b/src/newsreader/news/collection/tests/feed/client/tests.py index 9c11cbd..dd3c1e4 100644 --- a/src/newsreader/news/collection/tests/feed/client/tests.py +++ b/src/newsreader/news/collection/tests/feed/client/tests.py @@ -1,6 +1,7 @@ from unittest.mock import MagicMock, patch from django.test import TestCase +from django.utils.lorem_ipsum import words from newsreader.news.collection.exceptions import ( StreamDeniedException, @@ -101,3 +102,29 @@ class FeedClientTestCase(TestCase): self.assertEquals(stream.rule.succeeded, False) self.mocked_read.assert_called_once_with() + + def test_client_catches_stream_parse_exception(self): + rule = CollectionRuleFactory.create() + mock_stream = MagicMock(rule=rule) + self.mocked_read.side_effect = StreamParseException("Stream has wrong contents") + + with FeedClient([rule]) as client: + for data, stream in client: + self.assertEquals(data, {"entries": []}) + self.assertEquals(stream.rule.error, "Stream has wrong contents") + self.assertEquals(stream.rule.succeeded, False) + + self.mocked_read.assert_called_once_with() + + def test_client_catches_long_exception_text(self): + rule = CollectionRuleFactory.create() + mock_stream = MagicMock(rule=rule) + self.mocked_read.side_effect = StreamParseException(words(1000)) + + with FeedClient([rule]) as client: + for data, stream in client: + self.assertEquals(data, {"entries": []}) + self.assertEquals(len(stream.rule.error), 1024) + self.assertEquals(stream.rule.succeeded, False) + + self.mocked_read.assert_called_once_with() diff --git a/src/newsreader/news/collection/tests/feed/duplicate_handler/tests.py b/src/newsreader/news/collection/tests/feed/duplicate_handler/tests.py index 18b6a99..b794f3e 100644 --- a/src/newsreader/news/collection/tests/feed/duplicate_handler/tests.py +++ b/src/newsreader/news/collection/tests/feed/duplicate_handler/tests.py @@ -33,7 +33,7 @@ class FeedDuplicateHandlerTestCase(TestCase): self.assertTrue(post.title != existing_post.title) def test_duplicate_entries_in_recent_database(self): - PostFactory.create_batch(size=20) + PostFactory.create_batch(size=10) publication_date = timezone.now() @@ -60,3 +60,24 @@ class FeedDuplicateHandlerTestCase(TestCase): posts = list(posts_gen) self.assertEquals(len(posts), 0) + + def test_multiple_existing_entries_with_identifier(self): + timezone.now() + rule = CollectionRuleFactory() + + PostFactory.create_batch( + remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule, size=5 + ) + + new_post = PostFactory.build( + remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", + title="This is a new one", + rule=rule, + ) + + with FeedDuplicateHandler(rule) as duplicate_handler: + posts_gen = duplicate_handler.check([new_post]) + posts = list(posts_gen) + + self.assertEquals(len(posts), 1) + self.assertEquals(posts[0].title, new_post.title) diff --git a/src/newsreader/news/collection/tests/test_views.py b/src/newsreader/news/collection/tests/test_views.py index 7515b98..a9b07d0 100644 --- a/src/newsreader/news/collection/tests/test_views.py +++ b/src/newsreader/news/collection/tests/test_views.py @@ -1,7 +1,7 @@ import os from django.conf import settings -from django.test import Client, TestCase +from django.test import TestCase from django.urls import reverse from django.utils.translation import gettext_lazy as _ diff --git a/src/newsreader/news/collection/utils.py b/src/newsreader/news/collection/utils.py index 261dc14..0aa096f 100644 --- a/src/newsreader/news/collection/utils.py +++ b/src/newsreader/news/collection/utils.py @@ -5,7 +5,6 @@ from django.utils import timezone import requests from requests.exceptions import RequestException -from requests.models import Response from newsreader.news.collection.response_handler import ResponseHandler diff --git a/src/newsreader/news/core/endpoints.py b/src/newsreader/news/core/endpoints.py index 91736a9..acc4ab2 100644 --- a/src/newsreader/news/core/endpoints.py +++ b/src/newsreader/news/core/endpoints.py @@ -13,8 +13,7 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from newsreader.accounts.permissions import IsPostOwner -from newsreader.core.pagination import LargeResultSetPagination, ResultSetPagination -from newsreader.news.collection.models import CollectionRule +from newsreader.core.pagination import LargeResultSetPagination from newsreader.news.collection.serializers import RuleSerializer from newsreader.news.core.filters import ReadFilter from newsreader.news.core.models import Category, Post diff --git a/src/newsreader/news/core/serializers.py b/src/newsreader/news/core/serializers.py index e6c7a08..d4353c9 100644 --- a/src/newsreader/news/core/serializers.py +++ b/src/newsreader/news/core/serializers.py @@ -1,6 +1,5 @@ from rest_framework import serializers -from newsreader.news import collection from newsreader.news.core.models import Category, Post diff --git a/src/newsreader/news/core/tests/endpoints/category/detail/tests.py b/src/newsreader/news/core/tests/endpoints/category/detail/tests.py index 05b4e92..2bd6bcb 100644 --- a/src/newsreader/news/core/tests/endpoints/category/detail/tests.py +++ b/src/newsreader/news/core/tests/endpoints/category/detail/tests.py @@ -1,6 +1,6 @@ import json -from django.test import Client, TestCase +from django.test import TestCase from django.urls import reverse from newsreader.accounts.tests.factories import UserFactory diff --git a/src/newsreader/news/core/tests/endpoints/category/list/tests.py b/src/newsreader/news/core/tests/endpoints/category/list/tests.py index 043e805..d44f204 100644 --- a/src/newsreader/news/core/tests/endpoints/category/list/tests.py +++ b/src/newsreader/news/core/tests/endpoints/category/list/tests.py @@ -2,7 +2,7 @@ import json from datetime import date, datetime, time -from django.test import Client, TestCase +from django.test import TestCase from django.urls import reverse import pytz diff --git a/src/newsreader/news/core/tests/endpoints/post/detail/tests.py b/src/newsreader/news/core/tests/endpoints/post/detail/tests.py index f012cc2..7c8c31e 100644 --- a/src/newsreader/news/core/tests/endpoints/post/detail/tests.py +++ b/src/newsreader/news/core/tests/endpoints/post/detail/tests.py @@ -1,6 +1,6 @@ import json -from django.test import Client, TestCase +from django.test import TestCase from django.urls import reverse from newsreader.accounts.tests.factories import UserFactory diff --git a/src/newsreader/news/core/tests/endpoints/post/list/tests.py b/src/newsreader/news/core/tests/endpoints/post/list/tests.py index ba7aba9..f3639bf 100644 --- a/src/newsreader/news/core/tests/endpoints/post/list/tests.py +++ b/src/newsreader/news/core/tests/endpoints/post/list/tests.py @@ -1,6 +1,6 @@ from datetime import date, datetime, time -from django.test import Client, TestCase +from django.test import TestCase from django.urls import reverse import pytz diff --git a/src/newsreader/news/core/tests/test_views.py b/src/newsreader/news/core/tests/test_views.py index 12861fb..e4bf458 100644 --- a/src/newsreader/news/core/tests/test_views.py +++ b/src/newsreader/news/core/tests/test_views.py @@ -1,4 +1,4 @@ -from django.test import Client, TestCase +from django.test import TestCase from django.urls import reverse from newsreader.accounts.tests.factories import UserFactory