Fix data errors
This commit is contained in:
parent
afc3c11775
commit
533561ba1e
27 changed files with 101 additions and 39 deletions
|
|
@ -83,4 +83,4 @@ python linting:
|
|||
script:
|
||||
- isort -rc src/ --check-only
|
||||
- black -l 88 --check src/
|
||||
- autoflake -rc src/
|
||||
- autoflake --check --remove-all-unused-imports --ignore-init-module-imports --recursive src/
|
||||
|
|
|
|||
|
|
@ -1,16 +1,13 @@
|
|||
import datetime
|
||||
|
||||
from django.conf import settings
|
||||
from django.core import mail
|
||||
from django.test import TestCase
|
||||
from django.test.utils import override_settings
|
||||
from django.urls import reverse
|
||||
from django.utils.translation import gettext as _
|
||||
|
||||
from registration.models import RegistrationProfile
|
||||
|
||||
from newsreader.accounts.models import User
|
||||
from newsreader.accounts.tests.factories import UserFactory
|
||||
|
||||
|
||||
class ActivationTestCase(TestCase):
|
||||
|
|
|
|||
|
|
@ -1,14 +1,10 @@
|
|||
from typing import Dict
|
||||
|
||||
from django.contrib.auth.tokens import default_token_generator as token_generator
|
||||
from django.core import mail
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
from django.utils.encoding import force_bytes
|
||||
from django.utils.http import urlsafe_base64_encode
|
||||
from django.utils.translation import gettext as _
|
||||
|
||||
from newsreader.accounts.models import User
|
||||
from newsreader.accounts.tests.factories import UserFactory
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,10 @@
|
|||
from django.conf import settings
|
||||
from django.core import mail
|
||||
from django.test import TransactionTestCase as TestCase
|
||||
from django.test.utils import override_settings
|
||||
from django.urls import reverse
|
||||
from django.utils.translation import gettext as _
|
||||
|
||||
from registration.models import RegistrationProfile
|
||||
|
||||
from newsreader.accounts.models import User
|
||||
from newsreader.accounts.tests.factories import RegistrationProfileFactory, UserFactory
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from django.test import TestCase
|
||||
|
||||
from django_celery_beat.models import IntervalSchedule, PeriodicTask
|
||||
from django_celery_beat.models import PeriodicTask
|
||||
|
||||
from newsreader.accounts.models import User
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from django.urls import include, path
|
||||
from django.urls import path
|
||||
|
||||
from newsreader.accounts.views import (
|
||||
ActivationCompleteView,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from .base import *
|
||||
from .base import * # isort:skip
|
||||
|
||||
|
||||
DEBUG = True
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from .dev import *
|
||||
|
||||
from .dev import * # isort:skip
|
||||
|
||||
# Celery
|
||||
# https://docs.celeryproject.org/en/latest/userguide/configuration.html
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from .base import * # noqa
|
||||
from .base import * # isort:skip
|
||||
|
||||
|
||||
DEBUG = True
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
from django.db.models.query import QuerySet
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from newsreader.news.collection.exceptions import StreamParseException
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ from rest_framework.generics import (
|
|||
get_object_or_404,
|
||||
)
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.serializers import Serializer
|
||||
|
||||
from newsreader.core.pagination import LargeResultSetPagination, ResultSetPagination
|
||||
from newsreader.news.collection.models import CollectionRule
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
import logging
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist
|
||||
from django.db.models.fields import CharField, TextField
|
||||
from django.template.defaultfilters import truncatechars
|
||||
from django.utils import timezone
|
||||
|
|
@ -21,11 +24,13 @@ from newsreader.news.collection.exceptions import (
|
|||
StreamParseException,
|
||||
StreamTimeOutException,
|
||||
)
|
||||
from newsreader.news.collection.models import CollectionRule
|
||||
from newsreader.news.collection.utils import build_publication_date, fetch
|
||||
from newsreader.news.core.models import Post
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FeedBuilder(Builder):
|
||||
instances = []
|
||||
|
||||
|
|
@ -164,7 +169,8 @@ class FeedClient(Client):
|
|||
|
||||
yield response_data
|
||||
except StreamException as e:
|
||||
stream.rule.error = e.message
|
||||
length = stream.rule._meta.get_field("error").max_length
|
||||
stream.rule.error = e.message[-length:]
|
||||
stream.rule.succeeded = False
|
||||
|
||||
yield ({"entries": []}, stream)
|
||||
|
|
@ -195,8 +201,8 @@ class FeedDuplicateHandler:
|
|||
if instance.remote_identifier in self.existing_identifiers:
|
||||
existing_post = self.handle_duplicate(instance)
|
||||
|
||||
if existing_post:
|
||||
yield existing_post
|
||||
|
||||
continue
|
||||
elif not instance.remote_identifier and self.in_database(instance):
|
||||
continue
|
||||
|
|
@ -229,7 +235,16 @@ class FeedDuplicateHandler:
|
|||
remote_identifier=instance.remote_identifier
|
||||
)
|
||||
except ObjectDoesNotExist:
|
||||
return
|
||||
logger.error(
|
||||
f"Duplicate handler tried retrieving post {instance.remote_identifier} but failed doing so."
|
||||
)
|
||||
return instance
|
||||
except MultipleObjectsReturned:
|
||||
existing_instances = self.queryset.filter(
|
||||
remote_identifier=instance.remote_identifier
|
||||
).order_by("-publication_date")
|
||||
existing_instance = existing_instances.last()
|
||||
existing_instances.exclude(pk=existing_instance.pk).delete()
|
||||
|
||||
for field in instance._meta.get_fields():
|
||||
getattr(existing_instance, field.name, object())
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
# Generated by Django 2.2 on 2020-03-03 19:32
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [("collection", "0004_auto_20190714_1422")]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="collectionrule",
|
||||
name="error",
|
||||
field=models.CharField(blank=True, max_length=1024, null=True),
|
||||
)
|
||||
]
|
||||
|
|
@ -33,7 +33,7 @@ class CollectionRule(TimeStampedModel):
|
|||
|
||||
last_suceeded = models.DateTimeField(blank=True, null=True)
|
||||
succeeded = models.BooleanField(default=False)
|
||||
error = models.CharField(max_length=255, blank=True, null=True)
|
||||
error = models.CharField(max_length=1024, blank=True, null=True)
|
||||
|
||||
user = models.ForeignKey("accounts.User", _("Owner"), related_name="rules")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import json
|
||||
|
||||
from django.test import Client, TestCase
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
from newsreader.accounts.tests.factories import UserFactory
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import json
|
|||
|
||||
from datetime import date, datetime, time
|
||||
|
||||
from django.test import Client, TestCase
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
import pytz
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from django.test import TestCase
|
||||
from django.utils.lorem_ipsum import words
|
||||
|
||||
from newsreader.news.collection.exceptions import (
|
||||
StreamDeniedException,
|
||||
|
|
@ -101,3 +102,29 @@ class FeedClientTestCase(TestCase):
|
|||
self.assertEquals(stream.rule.succeeded, False)
|
||||
|
||||
self.mocked_read.assert_called_once_with()
|
||||
|
||||
def test_client_catches_stream_parse_exception(self):
|
||||
rule = CollectionRuleFactory.create()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
self.mocked_read.side_effect = StreamParseException("Stream has wrong contents")
|
||||
|
||||
with FeedClient([rule]) as client:
|
||||
for data, stream in client:
|
||||
self.assertEquals(data, {"entries": []})
|
||||
self.assertEquals(stream.rule.error, "Stream has wrong contents")
|
||||
self.assertEquals(stream.rule.succeeded, False)
|
||||
|
||||
self.mocked_read.assert_called_once_with()
|
||||
|
||||
def test_client_catches_long_exception_text(self):
|
||||
rule = CollectionRuleFactory.create()
|
||||
mock_stream = MagicMock(rule=rule)
|
||||
self.mocked_read.side_effect = StreamParseException(words(1000))
|
||||
|
||||
with FeedClient([rule]) as client:
|
||||
for data, stream in client:
|
||||
self.assertEquals(data, {"entries": []})
|
||||
self.assertEquals(len(stream.rule.error), 1024)
|
||||
self.assertEquals(stream.rule.succeeded, False)
|
||||
|
||||
self.mocked_read.assert_called_once_with()
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
|
|||
self.assertTrue(post.title != existing_post.title)
|
||||
|
||||
def test_duplicate_entries_in_recent_database(self):
|
||||
PostFactory.create_batch(size=20)
|
||||
PostFactory.create_batch(size=10)
|
||||
|
||||
publication_date = timezone.now()
|
||||
|
||||
|
|
@ -60,3 +60,24 @@ class FeedDuplicateHandlerTestCase(TestCase):
|
|||
posts = list(posts_gen)
|
||||
|
||||
self.assertEquals(len(posts), 0)
|
||||
|
||||
def test_multiple_existing_entries_with_identifier(self):
|
||||
timezone.now()
|
||||
rule = CollectionRuleFactory()
|
||||
|
||||
PostFactory.create_batch(
|
||||
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule, size=5
|
||||
)
|
||||
|
||||
new_post = PostFactory.build(
|
||||
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7",
|
||||
title="This is a new one",
|
||||
rule=rule,
|
||||
)
|
||||
|
||||
with FeedDuplicateHandler(rule) as duplicate_handler:
|
||||
posts_gen = duplicate_handler.check([new_post])
|
||||
posts = list(posts_gen)
|
||||
|
||||
self.assertEquals(len(posts), 1)
|
||||
self.assertEquals(posts[0].title, new_post.title)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.test import Client, TestCase
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ from django.utils import timezone
|
|||
import requests
|
||||
|
||||
from requests.exceptions import RequestException
|
||||
from requests.models import Response
|
||||
|
||||
from newsreader.news.collection.response_handler import ResponseHandler
|
||||
|
||||
|
|
|
|||
|
|
@ -13,8 +13,7 @@ from rest_framework.permissions import IsAuthenticated
|
|||
from rest_framework.response import Response
|
||||
|
||||
from newsreader.accounts.permissions import IsPostOwner
|
||||
from newsreader.core.pagination import LargeResultSetPagination, ResultSetPagination
|
||||
from newsreader.news.collection.models import CollectionRule
|
||||
from newsreader.core.pagination import LargeResultSetPagination
|
||||
from newsreader.news.collection.serializers import RuleSerializer
|
||||
from newsreader.news.core.filters import ReadFilter
|
||||
from newsreader.news.core.models import Category, Post
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
from rest_framework import serializers
|
||||
|
||||
from newsreader.news import collection
|
||||
from newsreader.news.core.models import Category, Post
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import json
|
||||
|
||||
from django.test import Client, TestCase
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
from newsreader.accounts.tests.factories import UserFactory
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import json
|
|||
|
||||
from datetime import date, datetime, time
|
||||
|
||||
from django.test import Client, TestCase
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
import pytz
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import json
|
||||
|
||||
from django.test import Client, TestCase
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
from newsreader.accounts.tests.factories import UserFactory
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from datetime import date, datetime, time
|
||||
|
||||
from django.test import Client, TestCase
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
import pytz
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from django.test import Client, TestCase
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
from newsreader.accounts.tests.factories import UserFactory
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue