Add reddit integration

This commit is contained in:
sonny 2020-07-12 20:10:57 +02:00
parent 6f30571dd1
commit 6ce013d0d4
65 changed files with 8949 additions and 372 deletions

View file

@ -31,6 +31,8 @@ services:
- DJANGO_SETTINGS_MODULE=newsreader.conf.docker
depends_on:
- rabbitmq
volumes:
- .:/app
django:
build:
context: .
@ -45,6 +47,8 @@ services:
volumes:
- .:/app
- static-files:/app/src/newsreader/static
stdin_open: true
tty: true
webpack:
build:
context: .

View file

@ -1,9 +1,19 @@
from django import forms
from django.contrib import admin
from django.utils.translation import ugettext as _
from newsreader.accounts.models import User
class UserAdminForm(forms.ModelForm):
class Meta:
widgets = {
"email": forms.EmailInput(attrs={"size": "50"}),
"reddit_access_token": forms.TextInput(attrs={"size": "90"}),
"reddit_refresh_token": forms.TextInput(attrs={"size": "90"}),
}
class UserAdmin(admin.ModelAdmin):
list_display = ("email", "last_name", "date_joined", "is_active")
list_filter = ("is_active", "is_staff", "is_superuser")
@ -11,17 +21,20 @@ class UserAdmin(admin.ModelAdmin):
search_fields = ["email", "last_name", "first_name"]
readonly_fields = ("last_login", "date_joined")
form = UserAdminForm
fieldsets = (
(
_("User settings"),
{"fields": ("email", "first_name", "last_name", "is_active")},
),
(
_("Reddit settings"),
{"fields": ("reddit_access_token", "reddit_refresh_token")},
),
(
_("Permission settings"),
{
"classes": ("collapse",),
"fields": ("is_staff", "is_superuser", "groups", "user_permissions"),
},
{"classes": ("collapse",), "fields": ("is_staff", "is_superuser")},
),
(_("Misc settings"), {"fields": ("date_joined", "last_login")}),
)

View file

@ -0,0 +1,21 @@
# Generated by Django 3.0.5 on 2020-06-03 20:30
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("accounts", "0009_auto_20200524_1218")]
operations = [
migrations.AddField(
model_name="user",
name="reddit_access_token",
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AddField(
model_name="user",
name="reddit_refresh_token",
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View file

@ -50,6 +50,9 @@ class User(AbstractUser):
verbose_name="collection task",
)
reddit_refresh_token = models.CharField(max_length=255, blank=True, null=True)
reddit_access_token = models.CharField(max_length=255, blank=True, null=True)
username = None
objects = UserManager()
@ -69,7 +72,7 @@ class User(AbstractUser):
enabled=True,
interval=task_interval,
name=f"{self.email}-collection-task",
task="newsreader.news.collection.tasks.FeedTask",
task="FeedTask",
args=json.dumps([self.pk]),
)

View file

@ -13,6 +13,18 @@
</a>
{% include "components/form/confirm-button.html" %}
{% if reddit_authorization_url %}
<a class="link button button--reddit" href="{{ reddit_authorization_url }}">
{% trans "Authorize Reddit account" %}
</a>
{% endif %}
{% if reddit_refresh_url %}
<a class="link button button--reddit" href="{{ reddit_refresh_url }}">
{% trans "Refresh Reddit access token" %}
</a>
{% endif %}
</fieldset>
</section>
{% endblock actions %}

View file

@ -0,0 +1,17 @@
{% extends "base.html" %}
{% block content %}
<main id="settings--page" class="main">
<section class="section text-section">
{% if error %}
<h1 class="h1">Reddit authorization failed</h1>
<p>{{ error }}</p>
{% elif access_token and refresh_token %}
<h1 class="h1">Reddit account is linked</h1>
<p>Your reddit account was successfully linked.</p>
{% endif %}
<p><a href="{% url 'accounts:settings' %}">Return to settings page</a></p>
</section>
</main>
{% endblock %}

View file

@ -0,0 +1,161 @@
from unittest.mock import patch
from urllib.parse import urlencode
from uuid import uuid4
from django.core.cache import cache
from django.test import TestCase
from django.urls import reverse
from newsreader.accounts.models import User
from newsreader.accounts.tests.factories import UserFactory
from newsreader.news.collection.exceptions import StreamTooManyException
class SettingsViewTestCase(TestCase):
def setUp(self):
self.user = UserFactory(email="test@test.nl", password="test")
self.client.force_login(self.user)
self.url = reverse("accounts:settings")
def test_simple(self):
response = self.client.get(self.url)
self.assertEquals(response.status_code, 200)
self.assertContains(response, "Authorize Reddit account")
def test_user_credential_change(self):
response = self.client.post(
reverse("accounts:settings"),
{"first_name": "First name", "last_name": "Last name"},
)
user = User.objects.get()
self.assertRedirects(response, reverse("accounts:settings"))
self.assertEquals(user.first_name, "First name")
self.assertEquals(user.last_name, "Last name")
def test_linked_reddit_account(self):
self.user.reddit_refresh_token = "test"
self.user.save()
response = self.client.get(self.url)
self.assertEquals(response.status_code, 200)
self.assertNotContains(response, "Authorize Reddit account")
class RedditTemplateViewTestCase(TestCase):
def setUp(self):
self.user = UserFactory(email="test@test.nl", password="test")
self.client.force_login(self.user)
self.base_url = reverse("accounts:reddit-template")
self.state = str(uuid4())
self.patch = patch("newsreader.news.collection.reddit.post")
self.mocked_post = self.patch.start()
def tearDown(self):
patch.stopall()
def test_simple(self):
response = self.client.get(self.base_url)
self.assertEquals(response.status_code, 200)
self.assertContains(response, "Return to settings page")
def test_successful_authorization(self):
self.mocked_post.return_value.json.return_value = {
"access_token": "1001010412",
"refresh_token": "134510143",
}
cache.set(f"{self.user.email}-reddit-auth", self.state)
params = {"state": self.state, "code": "Valid code"}
url = f"{self.base_url}?{urlencode(params)}"
response = self.client.get(url)
self.mocked_post.assert_called_once()
self.assertEquals(response.status_code, 200)
self.assertContains(response, "Your reddit account was successfully linked.")
self.user.refresh_from_db()
self.assertEquals(self.user.reddit_access_token, "1001010412")
self.assertEquals(self.user.reddit_refresh_token, "134510143")
self.assertEquals(cache.get(f"{self.user.email}-reddit-auth"), None)
def test_error(self):
params = {"error": "Denied authorization"}
url = f"{self.base_url}?{urlencode(params)}"
response = self.client.get(url)
self.assertEquals(response.status_code, 200)
self.assertContains(response, "Denied authorization")
def test_invalid_state(self):
cache.set(f"{self.user.email}-reddit-auth", str(uuid4()))
params = {"code": "Valid code", "state": "Invalid state"}
url = f"{self.base_url}?{urlencode(params)}"
response = self.client.get(url)
self.assertEquals(response.status_code, 200)
self.assertContains(
response, "The saved state for Reddit authorization did not match"
)
def test_stream_error(self):
self.mocked_post.side_effect = StreamTooManyException
cache.set(f"{self.user.email}-reddit-auth", self.state)
params = {"state": self.state, "code": "Valid code"}
url = f"{self.base_url}?{urlencode(params)}"
response = self.client.get(url)
self.mocked_post.assert_called_once()
self.assertEquals(response.status_code, 200)
self.assertContains(response, "Too many requests")
self.user.refresh_from_db()
self.assertEquals(self.user.reddit_access_token, None)
self.assertEquals(self.user.reddit_refresh_token, None)
self.assertEquals(cache.get(f"{self.user.email}-reddit-auth"), self.state)
def test_unexpected_json(self):
self.mocked_post.return_value.json.return_value = {"message": "Happy eastern"}
cache.set(f"{self.user.email}-reddit-auth", self.state)
params = {"state": self.state, "code": "Valid code"}
url = f"{self.base_url}?{urlencode(params)}"
response = self.client.get(url)
self.mocked_post.assert_called_once()
self.assertEquals(response.status_code, 200)
self.assertContains(response, "Access and refresh token not found in response")
self.user.refresh_from_db()
self.assertEquals(self.user.reddit_access_token, None)
self.assertEquals(self.user.reddit_refresh_token, None)
self.assertEquals(cache.get(f"{self.user.email}-reddit-auth"), self.state)

View file

@ -1,29 +0,0 @@
from django.test import TestCase
from django.urls import reverse
from newsreader.accounts.models import User
from newsreader.accounts.tests.factories import UserFactory
class UserSettingsViewTestCase(TestCase):
def setUp(self):
self.user = UserFactory(password="test")
self.client.force_login(self.user)
def test_simple(self):
response = self.client.get(reverse("accounts:settings"))
self.assertEquals(response.status_code, 200)
def test_user_credential_change(self):
response = self.client.post(
reverse("accounts:settings"),
{"first_name": "First name", "last_name": "Last name"},
)
user = User.objects.get()
self.assertRedirects(response, reverse("accounts:settings"))
self.assertEquals(user.first_name, "First name")
self.assertEquals(user.last_name, "Last name")

View file

@ -12,6 +12,8 @@ from newsreader.accounts.views import (
PasswordResetConfirmView,
PasswordResetDoneView,
PasswordResetView,
RedditTemplateView,
RedditTokenRedirectView,
RegistrationClosedView,
RegistrationCompleteView,
RegistrationView,
@ -61,4 +63,14 @@ urlpatterns = [
name="password-change",
),
path("settings/", login_required(SettingsView.as_view()), name="settings"),
path(
"settings/reddit/callback/",
login_required(RedditTemplateView.as_view()),
name="reddit-template",
),
path(
"settings/reddit/refresh/",
login_required(RedditTokenRedirectView.as_view()),
name="reddit-refresh",
),
]

View file

@ -1,13 +1,22 @@
from django.contrib import messages
from django.contrib.auth import views as django_views
from django.core.cache import cache
from django.shortcuts import render
from django.urls import reverse_lazy
from django.views.generic import TemplateView
from django.utils.translation import gettext as _
from django.views.generic import RedirectView, TemplateView
from django.views.generic.edit import FormView, ModelFormMixin
from registration.backends.default import views as registration_views
from newsreader.accounts.forms import UserSettingsForm
from newsreader.accounts.models import User
from newsreader.news.collection.exceptions import StreamException
from newsreader.news.collection.reddit import (
get_reddit_access_token,
get_reddit_authorization_url,
)
from newsreader.news.collection.tasks import RedditTokenTask
class LoginView(django_views.LoginView):
@ -111,5 +120,91 @@ class SettingsView(ModelFormMixin, FormView):
def get_object(self, **kwargs):
return self.request.user
def get_context_data(self, **kwargs):
user = self.request.user
reddit_authorization_url = None
reddit_refresh_url = None
reddit_task_active = cache.get(f"{user.email}-reddit-refresh")
if (
user.reddit_refresh_token
and not user.reddit_access_token
and not reddit_task_active
):
reddit_refresh_url = reverse_lazy("accounts:reddit-refresh")
if not user.reddit_refresh_token:
reddit_authorization_url = get_reddit_authorization_url(user)
return {
**super().get_context_data(**kwargs),
"reddit_authorization_url": reddit_authorization_url,
"reddit_refresh_url": reddit_refresh_url,
}
def get_form_kwargs(self):
return {**super().get_form_kwargs(), "instance": self.request.user}
class RedditTemplateView(TemplateView):
template_name = "accounts/views/reddit.html"
def get(self, request, *args, **kwargs):
context = self.get_context_data(**kwargs)
error = request.GET.get("error", None)
state = request.GET.get("state", None)
code = request.GET.get("code", None)
if error:
return self.render_to_response({**context, "error": error})
if not code or not state:
return self.render_to_response(context)
cached_state = cache.get(f"{request.user.email}-reddit-auth")
if state != cached_state:
return self.render_to_response(
{
**context,
"error": "The saved state for Reddit authorization did not match",
}
)
try:
access_token, refresh_token = get_reddit_access_token(code, request.user)
return self.render_to_response(
{
**context,
"access_token": access_token,
"refresh_token": refresh_token,
}
)
except StreamException as e:
return self.render_to_response({**context, "error": str(e)})
except KeyError:
return self.render_to_response(
{**context, "error": "Access and refresh token not found in response"}
)
class RedditTokenRedirectView(RedirectView):
url = reverse_lazy("accounts:settings")
def get(self, request, *args, **kwargs):
response = super().get(request, *args, **kwargs)
user = request.user
task_active = cache.get(f"{user.email}-reddit-refresh")
if not task_active:
RedditTokenTask.delay(user.pk)
messages.success(request, _("Access token is being retrieved"))
cache.set(f"{user.email}-reddit-refresh", 1, 300)
return response
messages.error(request, _("Unable to retrieve token"))
return response

View file

@ -11,8 +11,8 @@ DJANGO_PROJECT_DIR = os.path.join(BASE_DIR, "src", "newsreader")
# SECURITY WARNING: don"t run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = ["127.0.0.1"]
INTERNAL_IPS = ["127.0.0.1"]
ALLOWED_HOSTS = ["127.0.0.1", "localhost"]
INTERNAL_IPS = ["127.0.0.1", "localhost"]
# Application definition
INSTALLED_APPS = [
@ -162,7 +162,13 @@ LOGGING = {
"level": "INFO",
"propagate": False,
},
"celery.task": {"handlers": ["syslog", "console"], "level": "INFO"},
"celery": {"handlers": ["syslog", "console"], "level": "INFO"},
"celery.task": {
"handlers": ["syslog", "console"],
"level": "INFO",
"propagate": False,
},
"newsreader": {"handlers": ["syslog", "console"], "level": "INFO"},
},
}
@ -205,6 +211,12 @@ STATICFILES_FINDERS = [
DEFAULT_FROM_EMAIL = "newsreader@rss.fudiggity.nl"
# Project settings
# Reddit integration
REDDIT_CLIENT_ID = "CLIENT_ID"
REDDIT_CLIENT_SECRET = "CLIENT_SECRET"
REDDIT_REDIRECT_URL = "http://127.0.0.1:8000/accounts/settings/reddit/callback/"
# Third party settings
AXES_HANDLER = "axes.handlers.cache.AxesCacheHandler"
AXES_CACHE = "axes"

View file

@ -45,6 +45,11 @@ TEMPLATES = [
}
]
# Reddit integration
REDDIT_CLIENT_ID = os.environ["REDDIT_CLIENT_ID"]
REDDIT_CLIENT_SECRET = os.environ["REDDIT_CLIENT_SECRET"]
REDDIT_REDIRECT_URL = "https://rss.fudiggity.nl/settings/reddit/callback/"
# Third party settings
AXES_HANDLER = "axes.handlers.database.AxesDatabaseHandler"

View file

@ -2,7 +2,7 @@ import React from 'react';
const Card = props => {
return (
<div className="card">
<div id={`${props.id}`} className="card">
<div className="card__header">{props.header}</div>
<div className="card__content">{props.content}</div>
<div className="card__footer">{props.footer}</div>

View file

@ -1,18 +1,23 @@
from bs4 import BeautifulSoup
from newsreader.news.collection.exceptions import StreamParseException
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.utils import fetch
class Stream:
"""
Contains the data and makes it available for processing
"""
rule = None
def __init__(self, rule):
self.rule = rule
def read(self):
raise NotImplementedError
def parse(self, payload):
def parse(self, response):
raise NotImplementedError
class Meta:
@ -20,9 +25,13 @@ class Stream:
class Client:
"""
Retrieves the data with streams
"""
stream = Stream
def __init__(self, rules=None):
def __init__(self, rules=[]):
self.rules = rules if rules else CollectionRule.objects.enabled()
def __enter__(self):
@ -39,7 +48,12 @@ class Client:
class Builder:
"""
Creates the collected posts
"""
instances = []
stream = None
def __init__(self, stream):
self.stream = stream
@ -62,6 +76,10 @@ class Builder:
class Collector:
"""
Glue between client, streams and builder
"""
client = None
builder = None

View file

@ -0,0 +1,7 @@
from django.db.models import TextChoices
from django.utils.translation import gettext as _
class RuleTypeChoices(TextChoices):
feed = "feed", _("Feed")
subreddit = "subreddit", _("Subreddit")

View file

@ -1,7 +1,8 @@
class StreamException(Exception):
message = "Stream exception"
def __init__(self, message=None):
def __init__(self, response=None, message=None):
self.response = response
self.message = message if message else self.message
def __str__(self):
@ -28,5 +29,9 @@ class StreamParseException(StreamException):
message = "Stream could not be parsed"
class StreamConnectionError(StreamException):
class StreamConnectionException(StreamException):
message = "A connection to the stream could not be made"
class StreamTooManyException(StreamException):
message = "Too many requests"

View file

@ -4,8 +4,6 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import timedelta
from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist
from django.db.models.fields import CharField, TextField
from django.template.defaultfilters import truncatechars
from django.utils import timezone
import bleach
@ -14,6 +12,7 @@ import pytz
from feedparser import parse
from newsreader.news.collection.base import Builder, Client, Collector, Stream
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.constants import (
WHITELISTED_ATTRIBUTES,
WHITELISTED_TAGS,
@ -25,7 +24,12 @@ from newsreader.news.collection.exceptions import (
StreamParseException,
StreamTimeOutException,
)
from newsreader.news.collection.utils import build_publication_date, fetch
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.utils import (
build_publication_date,
fetch,
truncate_text,
)
from newsreader.news.core.models import Post
@ -37,10 +41,13 @@ class FeedBuilder(Builder):
def __enter__(self):
_, stream = self.stream
self.instances = []
self.existing_posts = {
post.remote_identifier: post
for post in Post.objects.filter(rule=stream.rule)
for post in Post.objects.filter(
rule=stream.rule, rule__type=RuleTypeChoices.feed
)
}
return super().__enter__()
@ -73,7 +80,7 @@ class FeedBuilder(Builder):
if not field in entry:
continue
value = self.truncate_text(model_field, entry[field])
value = truncate_text(Post, model_field, entry[field])
if field == "published_parsed":
data[model_field] = build_publication_date(value, tz)
@ -103,21 +110,6 @@ class FeedBuilder(Builder):
strip_comments=True,
)
def truncate_text(self, field_name, value):
field = Post._meta.get_field(field_name)
max_length = field.max_length
cls = type(field)
if not value or not max_length:
return value
elif not bool(issubclass(cls, CharField) or issubclass(cls, TextField)):
return value
if len(value) > max_length:
return truncatechars(value, max_length)
return value
def get_content(self, items):
content = "\n ".join([item.get("value") for item in items])
return self.sanitize_fragment(content)
@ -129,21 +121,29 @@ class FeedBuilder(Builder):
class FeedStream(Stream):
def read(self):
url = self.rule.url
response = fetch(url)
response = fetch(self.rule.url)
return (self.parse(response.content), self)
return self.parse(response), self
def parse(self, payload):
def parse(self, response):
try:
return parse(payload)
return parse(response.content)
except TypeError as e:
raise StreamParseException("Could not parse feed") from e
message = "Could not parse feed"
raise StreamParseException(response=response, message=message) from e
class FeedClient(Client):
stream = FeedStream
def __init__(self, rules=[]):
if rules:
self.rules = rules
else:
self.rules = CollectionRule.objects.filter(
enabled=True, type=RuleTypeChoices.feed
)
def __enter__(self):
streams = [self.stream(rule) for rule in self.rules]

View file

@ -1,18 +1,29 @@
from django import forms
from django.utils.safestring import mark_safe
from django.utils.translation import gettext_lazy as _
import pytz
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.models import CollectionRule
from newsreader.news.core.models import Category
def get_reddit_help_text():
return mark_safe(
"Only subreddits are supported. For example: "
"<a className='link' target='_blank' rel='noopener noreferrer'"
" href='https://www.reddit.com/r/aww'>https://www.reddit.com/r/aww</a>"
)
class CollectionRuleForm(forms.ModelForm):
category = forms.ModelChoiceField(required=False, queryset=Category.objects.all())
timezone = forms.ChoiceField(
widget=forms.Select(attrs={"size": len(pytz.all_timezones)}),
choices=((timezone, timezone) for timezone in pytz.all_timezones),
help_text=_("The timezone which the feed uses"),
initial=pytz.utc,
)
def __init__(self, *args, **kwargs):
@ -20,8 +31,7 @@ class CollectionRuleForm(forms.ModelForm):
super().__init__(*args, **kwargs)
if self.user:
self.fields["category"].queryset = Category.objects.filter(user=self.user)
self.fields["category"].queryset = Category.objects.filter(user=self.user)
def save(self, commit=True):
instance = super().save(commit=False)
@ -49,6 +59,32 @@ class CollectionRuleBulkForm(forms.Form):
self.fields["rules"].queryset = CollectionRule.objects.filter(user=user)
class SubRedditRuleForm(CollectionRuleForm):
url = forms.URLField(max_length=1024, help_text=get_reddit_help_text)
timezone = None
def save(self, commit=True):
instance = super().save(commit=False)
instance.type = RuleTypeChoices.subreddit
instance.timezone = str(pytz.utc)
instance.user = self.user
if not instance.url.endswith(".json"):
instance.url = f"{instance.url}.json"
if commit:
instance.save()
self.save_m2m()
return instance
class Meta:
model = CollectionRule
fields = ("name", "url", "favicon", "category")
class OPMLImportForm(forms.Form):
file = forms.FileField(allow_empty_file=False)
skip_existing = forms.BooleanField(initial=False, required=False)

View file

@ -0,0 +1,20 @@
# Generated by Django 3.0.5 on 2020-06-03 20:30
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("collection", "0007_collectionrule_enabled")]
operations = [
migrations.AddField(
model_name="collectionrule",
name="type",
field=models.CharField(
choices=[("feed", "Feed"), ("subreddit", "Subreddit")],
default="feed",
max_length=20,
),
)
]

View file

@ -1,9 +1,11 @@
from django.db import models
from django.urls import reverse
from django.utils.translation import gettext as _
import pytz
from newsreader.core.models import TimeStampedModel
from newsreader.news.collection.choices import RuleTypeChoices
class CollectionRuleQuerySet(models.QuerySet):
@ -13,6 +15,9 @@ class CollectionRuleQuerySet(models.QuerySet):
class CollectionRule(TimeStampedModel):
name = models.CharField(max_length=100)
type = models.CharField(
max_length=20, choices=RuleTypeChoices.choices, default=RuleTypeChoices.feed
)
url = models.URLField(max_length=1024)
website_url = models.URLField(
@ -23,7 +28,7 @@ class CollectionRule(TimeStampedModel):
timezone = models.CharField(
choices=((timezone, timezone) for timezone in pytz.all_timezones),
max_length=100,
default="UTC",
default=str(pytz.utc),
)
category = models.ForeignKey(
@ -38,7 +43,9 @@ class CollectionRule(TimeStampedModel):
last_suceeded = models.DateTimeField(blank=True, null=True)
succeeded = models.BooleanField(default=False)
error = models.CharField(max_length=1024, blank=True, null=True)
enabled = models.BooleanField(
default=True, help_text=_("Wether or not to collect items from this feed")
)
@ -54,3 +61,10 @@ class CollectionRule(TimeStampedModel):
def __str__(self):
return self.name
@property
def update_url(self):
if self.type == RuleTypeChoices.subreddit:
return reverse("news:collection:subreddit-update", kwargs={"pk": self.pk})
return reverse("news:collection:rule-update", kwargs={"pk": self.pk})

View file

@ -0,0 +1,307 @@
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timedelta
from html import unescape
from json.decoder import JSONDecodeError
from urllib.parse import urlencode
from uuid import uuid4
from django.conf import settings
from django.core.cache import cache
from django.utils import timezone
import bleach
import pytz
import requests
from newsreader.news.collection.base import Builder, Client, Collector, Stream
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.constants import (
WHITELISTED_ATTRIBUTES,
WHITELISTED_TAGS,
)
from newsreader.news.collection.exceptions import (
StreamDeniedException,
StreamException,
StreamParseException,
StreamTooManyException,
)
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.tasks import RedditTokenTask
from newsreader.news.collection.utils import fetch, post, truncate_text
from newsreader.news.core.models import Post
logger = logging.getLogger(__name__)
REDDIT_URL = "https://www.reddit.com"
REDDIT_API_URL = "https://oauth.reddit.com"
RATE_LIMIT = 60
RATE_LIMIT_DURATION = timedelta(seconds=60)
def get_reddit_authorization_url(user):
state = str(uuid4())
cache.set(f"{user.email}-reddit-auth", state)
params = {
"client_id": settings.REDDIT_CLIENT_ID,
"redirect_uri": settings.REDDIT_REDIRECT_URL,
"state": state,
"response_type": "code",
"duration": "permanent",
"scope": "identity,mysubreddits,save,read",
}
authorization_url = f"{REDDIT_URL}/api/v1/authorize"
return f"{authorization_url}?{urlencode(params)}"
def get_reddit_access_token(code, user):
client_auth = requests.auth.HTTPBasicAuth(
settings.REDDIT_CLIENT_ID, settings.REDDIT_CLIENT_SECRET
)
response = post(
f"{REDDIT_URL}/api/v1/access_token",
data={
"redirect_uri": settings.REDDIT_REDIRECT_URL,
"grant_type": "authorization_code",
"code": code,
},
auth=client_auth,
)
response_data = response.json()
user.reddit_access_token = response_data["access_token"]
user.reddit_refresh_token = response_data["refresh_token"]
user.save()
cache.delete(f"{user.email}-reddit-auth")
return response_data["access_token"], response_data["refresh_token"]
class RedditBuilder(Builder):
def __enter__(self):
_, stream = self.stream
self.instances = []
self.existing_posts = {
post.remote_identifier: post
for post in Post.objects.filter(
rule=stream.rule, rule__type=RuleTypeChoices.subreddit
)
}
return super().__enter__()
def create_posts(self, stream):
data, stream = stream
posts = []
if not "data" in data or not "children" in data["data"]:
return
posts = data["data"]["children"]
self.instances = self.build(posts, stream.rule)
def build(self, posts, rule):
for post in posts:
if not "data" in post:
continue
remote_identifier = post["data"]["id"]
title = truncate_text(Post, "title", post["data"]["title"])
author = truncate_text(Post, "author", post["data"]["author"])
url_fragment = f"{post['data']['permalink']}"
uncleaned_body = post["data"]["selftext_html"]
unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
body = (
bleach.clean(
unescaped_body,
tags=WHITELISTED_TAGS,
attributes=WHITELISTED_ATTRIBUTES,
strip=True,
strip_comments=True,
)
if unescaped_body
else ""
)
try:
parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
created_date = pytz.utc.localize(parsed_date)
except (OverflowError, OSError):
logging.warning(f"Failed parsing timestamp from {url_fragment}")
created_date = timezone.now()
data = {
"remote_identifier": remote_identifier,
"title": title,
"body": body,
"author": author,
"url": f"{REDDIT_URL}{url_fragment}",
"publication_date": created_date,
"rule": rule,
}
if remote_identifier in self.existing_posts:
existing_post = self.existing_posts[remote_identifier]
if created_date > existing_post.publication_date:
for key, value in data.items():
setattr(existing_post, key, value)
yield existing_post
continue
yield Post(**data)
def save(self):
for post in self.instances:
post.save()
class RedditScheduler:
max_amount = RATE_LIMIT
max_user_amount = RATE_LIMIT / 4
def __init__(self, subreddits=[]):
if not subreddits:
self.subreddits = CollectionRule.objects.filter(
type=RuleTypeChoices.subreddit,
user__reddit_access_token__isnull=False,
user__reddit_refresh_token__isnull=False,
enabled=True,
).order_by("last_suceeded")[:200]
else:
self.subreddits = subreddits
def get_scheduled_rules(self):
rule_mapping = {}
current_amount = 0
for subreddit in self.subreddits:
user_pk = subreddit.user.pk
if current_amount == self.max_amount:
break
if user_pk in rule_mapping:
max_amount_reached = len(rule_mapping[user_pk]) == self.max_user_amount
if max_amount_reached:
continue
rule_mapping[user_pk].append(subreddit)
current_amount += 1
continue
rule_mapping[user_pk] = [subreddit]
current_amount += 1
return list(rule_mapping.values())
class RedditStream(Stream):
headers = {}
user = None
def __init__(self, rule):
super().__init__(rule)
self.user = self.rule.user
self.headers = {
f"Authorization": f"bearer {self.rule.user.reddit_access_token}"
}
def read(self):
response = fetch(self.rule.url, headers=self.headers)
return self.parse(response), self
def parse(self, response):
try:
return response.json()
except JSONDecodeError as e:
raise StreamParseException(
response=response, message=f"Failed parsing json"
) from e
class RedditClient(Client):
stream = RedditStream
def __init__(self, rules=[]):
self.rules = rules
def __enter__(self):
streams = [[self.stream(rule) for rule in batch] for batch in self.rules]
rate_limitted = False
with ThreadPoolExecutor(max_workers=10) as executor:
for batch in streams:
futures = {executor.submit(stream.read): stream for stream in batch}
if rate_limitted:
break
for future in as_completed(futures):
stream = futures[future]
try:
response_data = future.result()
stream.rule.error = None
stream.rule.succeeded = True
stream.rule.last_suceeded = timezone.now()
yield response_data
except StreamDeniedException as e:
logger.exception(
f"Access token expired for user {stream.user.pk}"
)
stream.rule.user.reddit_access_token = None
stream.rule.user.save()
self.set_rule_error(stream.rule, e)
RedditTokenTask.delay(stream.rule.user.pk)
break
except StreamTooManyException as e:
logger.exception("Ratelimit hit, aborting batched subreddits")
self.set_rule_error(stream.rule, e)
rate_limitted = True
break
except StreamException as e:
logger.exception(
"Stream failed reading content from " f"{stream.rule.url}"
)
self.set_rule_error(stream.rule, e)
continue
finally:
stream.rule.save()
def set_rule_error(self, rule, exception):
length = rule._meta.get_field("error").max_length
rule.error = exception.message[-length:]
rule.succeeded = False
class RedditCollector(Collector):
builder = RedditBuilder
client = RedditClient

View file

@ -1,12 +1,13 @@
from requests.exceptions import ConnectionError as RequestConnectionError
from newsreader.news.collection.exceptions import (
StreamConnectionError,
StreamConnectionException,
StreamDeniedException,
StreamException,
StreamForbiddenException,
StreamNotFoundException,
StreamTimeOutException,
StreamTooManyException,
)
@ -16,9 +17,10 @@ class ResponseHandler:
401: StreamDeniedException,
403: StreamForbiddenException,
408: StreamTimeOutException,
429: StreamTooManyException,
}
exception_mapping = {RequestConnectionError: StreamConnectionError}
exception_mapping = {RequestConnectionError: StreamConnectionException}
def __enter__(self):
return self
@ -27,16 +29,20 @@ class ResponseHandler:
status_code = response.status_code
if status_code in self.status_code_mapping:
raise self.status_code_mapping[status_code]
exception = self.status_code_mapping[status_code]
raise exception(response)
def map_exception(self, exception):
if isinstance(exception, StreamException):
raise exception
def handle_exception(self, exception):
try:
stream_exception = self.exception_mapping[type(exception)]
except KeyError:
stream_exception = StreamException
message = getattr(exception, "message", str(exception))
raise stream_exception(message=message) from exception
raise stream_exception(exception.response, message=message) from exception
def __exit__(self, *args, **kwargs):
pass

View file

@ -1,11 +1,15 @@
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
import requests
from celery.exceptions import Reject
from celery.utils.log import get_task_logger
from newsreader.accounts.models import User
from newsreader.celery import app
from newsreader.news.collection.feed import FeedCollector
from newsreader.news.collection.utils import post
from newsreader.utils.celery import MemCacheLock
@ -13,7 +17,7 @@ logger = get_task_logger(__name__)
class FeedTask(app.Task):
name = "newsreader.news.collection.tasks.FeedTask"
name = "FeedTask"
ignore_result = True
def run(self, user_pk):
@ -41,4 +45,74 @@ class FeedTask(app.Task):
raise Reject(reason="Task already running", requeue=False)
class RedditTask(app.Task):
name = "RedditTask"
ignore_result = True
def run(self):
from newsreader.news.collection.reddit import RedditCollector, RedditScheduler
with MemCacheLock("reddit-task", self.app.oid) as acquired:
if acquired:
logger.info(f"Running reddit task")
scheduler = RedditScheduler()
subreddits = scheduler.get_scheduled_rules()
collector = RedditCollector()
collector.collect(rules=subreddits)
else:
logger.warning(f"Cancelling task due to existing lock")
raise Reject(reason="Task already running", requeue=False)
class RedditTokenTask(app.Task):
name = "RedditTokenTask"
ignore_result = True
def run(self, user_pk):
from newsreader.news.collection.reddit import REDDIT_URL
try:
user = User.objects.get(pk=user_pk)
except ObjectDoesNotExist:
message = f"User {user_pk} does not exist"
logger.exception(message)
raise Reject(reason=message, requeue=False)
if not user.reddit_refresh_token:
raise Reject(reason=f"User {user_pk} has no refresh token", requeue=False)
client_auth = requests.auth.HTTPBasicAuth(
settings.REDDIT_CLIENT_ID, settings.REDDIT_CLIENT_SECRET
)
try:
response = post(
f"{REDDIT_URL}/api/v1/access_token",
data={
"grant_type": "refresh_token",
"refresh_token": user.reddit_refresh_token,
},
auth=client_auth,
)
except StreamException:
logger.exception(
f"Failed refreshing reddit access token for user {user_pk}"
)
user.reddit_refresh_token = None
user.save()
return
response_data = response.json()
user.reddit_access_token = response_data["access_token"]
user.save()
FeedTask = app.register_task(FeedTask())
RedditTask = app.register_task(RedditTask())
RedditTokenTask = app.register_task(RedditTokenTask())

View file

@ -15,6 +15,7 @@
<div class="form__actions">
<a class="link button button--confirm" href="{% url "news:collection:rule-create" %}">{% trans "Add a rule" %}</a>
<a class="link button button--confirm" href="{% url "news:collection:subreddit-create" %}">{% trans "Add a subreddit" %}</a>
<a class="link button button--confirm" href="{% url "news:collection:import" %}">{% trans "Import rules" %}</a>
</div>
</section>
@ -48,7 +49,7 @@
<td class="table__item rules-table__item" title="{{ rule.succeeded }}">{{ rule.succeeded }}</td>
<td class="table__item rules-table__item" title="{{ rule.enabled }}">{{ rule.enabled }}</td>
<td class="table__item rules-table__item">
<a class="link" href="{% url "news:collection:rule-update" rule.pk %}"><i class="gg-pen"></i></a>
<a class="link" href="{{ rule.update_url }}"><i class="gg-pen"></i></a>
</td>
</tr>
{% endfor %}

View file

@ -0,0 +1,9 @@
{% extends "base.html" %}
{% load static %}
{% block content %}
<main id="subreddit--page" class="main">
{% url "news:collection:rules" as cancel_url %}
{% include "components/form/form.html" with form=form title="Add a subreddit" cancel_url=cancel_url confirm_text="Add subrredit" %}
</main>
{% endblock %}

View file

@ -0,0 +1,9 @@
{% extends "base.html" %}
{% load static %}
{% block content %}
<main id="subreddit--page" class="main">
{% url "news:collection:rules" as cancel_url %}
{% include "components/form/form.html" with form=form title="Update subreddit" cancel_url=cancel_url confirm_text="Save subreddit" %}
</main>
{% endblock %}

View file

@ -1,7 +1,9 @@
import factory
from newsreader.accounts.tests.factories import UserFactory
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.reddit import REDDIT_URL
class CollectionRuleFactory(factory.django.DjangoModelFactory):
@ -17,3 +19,12 @@ class CollectionRuleFactory(factory.django.DjangoModelFactory):
class Meta:
model = CollectionRule
class FeedFactory(CollectionRuleFactory):
type = RuleTypeChoices.feed
class SubredditFactory(CollectionRuleFactory):
type = RuleTypeChoices.subreddit
website_url = REDDIT_URL

View file

@ -9,7 +9,7 @@ import pytz
from freezegun import freeze_time
from newsreader.news.collection.feed import FeedBuilder
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from newsreader.news.collection.tests.factories import FeedFactory
from newsreader.news.core.models import Post
from newsreader.news.core.tests.factories import PostFactory
@ -23,7 +23,7 @@ class FeedBuilderTestCase(TestCase):
def test_basic_entry(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((simple_mock, mock_stream)) as builder:
@ -54,7 +54,7 @@ class FeedBuilderTestCase(TestCase):
def test_multiple_entries(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((multiple_mock, mock_stream)) as builder:
@ -115,7 +115,7 @@ class FeedBuilderTestCase(TestCase):
def test_entries_without_remote_identifier(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_without_identifier, mock_stream)) as builder:
@ -154,7 +154,7 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_publication_date(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_without_publish_date, mock_stream)) as builder:
@ -186,7 +186,7 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_url(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_without_url, mock_stream)) as builder:
@ -212,7 +212,7 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_body(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_without_body, mock_stream)) as builder:
@ -246,7 +246,7 @@ class FeedBuilderTestCase(TestCase):
def test_entry_without_author(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_without_author, mock_stream)) as builder:
@ -274,7 +274,7 @@ class FeedBuilderTestCase(TestCase):
def test_empty_entries(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_without_entries, mock_stream)) as builder:
@ -284,7 +284,7 @@ class FeedBuilderTestCase(TestCase):
def test_update_entries(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
existing_first_post = PostFactory.create(
@ -314,7 +314,7 @@ class FeedBuilderTestCase(TestCase):
def test_html_sanitizing(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_html, mock_stream)) as builder:
@ -336,7 +336,7 @@ class FeedBuilderTestCase(TestCase):
def test_long_author_text_is_truncated(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_long_author, mock_stream)) as builder:
@ -350,7 +350,7 @@ class FeedBuilderTestCase(TestCase):
def test_long_title_text_is_truncated(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_long_title, mock_stream)) as builder:
@ -364,7 +364,7 @@ class FeedBuilderTestCase(TestCase):
def test_content_detail_is_prioritized_if_longer(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_longer_content_detail, mock_stream)) as builder:
@ -381,7 +381,7 @@ class FeedBuilderTestCase(TestCase):
def test_content_detail_is_not_prioritized_if_shorter(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_shorter_content_detail, mock_stream)) as builder:
@ -397,7 +397,7 @@ class FeedBuilderTestCase(TestCase):
def test_content_detail_is_concatinated(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
rule = FeedFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_multiple_content_detail, mock_stream)) as builder:

View file

@ -11,7 +11,7 @@ from newsreader.news.collection.exceptions import (
StreamTimeOutException,
)
from newsreader.news.collection.feed import FeedClient
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from newsreader.news.collection.tests.factories import FeedFactory
from .mocks import simple_mock
@ -27,8 +27,9 @@ class FeedClientTestCase(TestCase):
patch.stopall()
def test_client_retrieves_single_rules(self):
rule = CollectionRuleFactory.create()
rule = FeedFactory.create()
mock_stream = MagicMock(rule=rule)
self.mocked_read.return_value = (simple_mock, mock_stream)
with FeedClient([rule]) as client:
@ -39,9 +40,10 @@ class FeedClientTestCase(TestCase):
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_exception(self):
rule = CollectionRuleFactory.create()
rule = FeedFactory.create()
mock_stream = MagicMock(rule=rule)
self.mocked_read.side_effect = StreamException("Stream exception")
self.mocked_read.side_effect = StreamException(message="Stream exception")
with FeedClient([rule]) as client:
for data, stream in client:
@ -52,9 +54,12 @@ class FeedClientTestCase(TestCase):
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_not_found_exception(self):
rule = CollectionRuleFactory.create()
rule = FeedFactory.create()
mock_stream = MagicMock(rule=rule)
self.mocked_read.side_effect = StreamNotFoundException("Stream not found")
self.mocked_read.side_effect = StreamNotFoundException(
message="Stream not found"
)
with FeedClient([rule]) as client:
for data, stream in client:
@ -65,9 +70,10 @@ class FeedClientTestCase(TestCase):
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_denied_exception(self):
rule = CollectionRuleFactory.create()
rule = FeedFactory.create()
mock_stream = MagicMock(rule=rule)
self.mocked_read.side_effect = StreamDeniedException("Stream denied")
self.mocked_read.side_effect = StreamDeniedException(message="Stream denied")
with FeedClient([rule]) as client:
for data, stream in client:
@ -78,9 +84,12 @@ class FeedClientTestCase(TestCase):
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_timed_out(self):
rule = CollectionRuleFactory.create()
rule = FeedFactory.create()
mock_stream = MagicMock(rule=rule)
self.mocked_read.side_effect = StreamTimeOutException("Stream timed out")
self.mocked_read.side_effect = StreamTimeOutException(
message="Stream timed out"
)
with FeedClient([rule]) as client:
for data, stream in client:
@ -91,22 +100,12 @@ class FeedClientTestCase(TestCase):
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_parse_exception(self):
rule = CollectionRuleFactory.create()
rule = FeedFactory.create()
mock_stream = MagicMock(rule=rule)
self.mocked_read.side_effect = StreamParseException("Stream has wrong contents")
with FeedClient([rule]) as client:
for data, stream in client:
self.assertEquals(data, {"entries": []})
self.assertEquals(stream.rule.error, "Stream has wrong contents")
self.assertEquals(stream.rule.succeeded, False)
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_parse_exception(self):
rule = CollectionRuleFactory.create()
mock_stream = MagicMock(rule=rule)
self.mocked_read.side_effect = StreamParseException("Stream has wrong contents")
self.mocked_read.side_effect = StreamParseException(
message="Stream has wrong contents"
)
with FeedClient([rule]) as client:
for data, stream in client:
@ -117,9 +116,10 @@ class FeedClientTestCase(TestCase):
self.mocked_read.assert_called_once_with()
def test_client_catches_long_exception_text(self):
rule = CollectionRuleFactory.create()
rule = FeedFactory.create()
mock_stream = MagicMock(rule=rule)
self.mocked_read.side_effect = StreamParseException(words(1000))
self.mocked_read.side_effect = StreamParseException(message=words(1000))
with FeedClient([rule]) as client:
for data, stream in client:

View file

@ -18,7 +18,7 @@ from newsreader.news.collection.exceptions import (
StreamTimeOutException,
)
from newsreader.news.collection.feed import FeedCollector
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from newsreader.news.collection.tests.factories import FeedFactory
from newsreader.news.collection.utils import build_publication_date
from newsreader.news.core.models import Post
from newsreader.news.core.tests.factories import PostFactory
@ -42,7 +42,7 @@ class FeedCollectorTestCase(TestCase):
@freeze_time("2019-10-30 12:30:00")
def test_simple_batch(self):
self.mocked_parse.return_value = multiple_mock
rule = CollectionRuleFactory()
rule = FeedFactory()
collector = FeedCollector()
collector.collect()
@ -58,7 +58,7 @@ class FeedCollectorTestCase(TestCase):
def test_emtpy_batch(self):
self.mocked_fetch.return_value = MagicMock()
self.mocked_parse.return_value = empty_mock
rule = CollectionRuleFactory()
rule = FeedFactory()
collector = FeedCollector()
collector.collect()
@ -72,7 +72,7 @@ class FeedCollectorTestCase(TestCase):
def test_not_found(self):
self.mocked_fetch.side_effect = StreamNotFoundException
rule = CollectionRuleFactory()
rule = FeedFactory()
collector = FeedCollector()
collector.collect()
@ -88,7 +88,7 @@ class FeedCollectorTestCase(TestCase):
last_suceeded = timezone.make_aware(
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
)
rule = CollectionRuleFactory(last_suceeded=last_suceeded)
rule = FeedFactory(last_suceeded=last_suceeded)
collector = FeedCollector()
collector.collect()
@ -105,7 +105,7 @@ class FeedCollectorTestCase(TestCase):
last_suceeded = timezone.make_aware(
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
)
rule = CollectionRuleFactory(last_suceeded=last_suceeded)
rule = FeedFactory(last_suceeded=last_suceeded)
collector = FeedCollector()
collector.collect()
@ -122,7 +122,7 @@ class FeedCollectorTestCase(TestCase):
last_suceeded = timezone.make_aware(
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
)
rule = CollectionRuleFactory(last_suceeded=last_suceeded)
rule = FeedFactory(last_suceeded=last_suceeded)
collector = FeedCollector()
collector.collect()
@ -137,7 +137,7 @@ class FeedCollectorTestCase(TestCase):
@freeze_time("2019-10-30 12:30:00")
def test_duplicates(self):
self.mocked_parse.return_value = duplicate_mock
rule = CollectionRuleFactory()
rule = FeedFactory()
aware_datetime = build_publication_date(
struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), pytz.utc
@ -192,7 +192,7 @@ class FeedCollectorTestCase(TestCase):
@freeze_time("2019-02-22 12:30:00")
def test_items_with_identifiers_get_updated(self):
self.mocked_parse.return_value = multiple_update_mock
rule = CollectionRuleFactory()
rule = FeedFactory()
first_post = PostFactory(
remote_identifier="https://www.bbc.co.uk/news/world-us-canada-48338168",
@ -248,10 +248,7 @@ class FeedCollectorTestCase(TestCase):
@freeze_time("2019-02-22 12:30:00")
def test_disabled_rules(self):
rules = (
CollectionRuleFactory(enabled=False),
CollectionRuleFactory(enabled=True),
)
rules = (FeedFactory(enabled=False), FeedFactory(enabled=True))
self.mocked_parse.return_value = multiple_mock

View file

@ -6,7 +6,7 @@ from django.utils import timezone
from freezegun import freeze_time
from newsreader.news.collection.feed import FeedDuplicateHandler
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from newsreader.news.collection.tests.factories import FeedFactory
from newsreader.news.core.models import Post
from newsreader.news.core.tests.factories import PostFactory
@ -17,7 +17,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
self.maxDiff = None
def test_duplicate_entries_with_remote_identifiers(self):
rule = CollectionRuleFactory()
rule = FeedFactory()
existing_post = PostFactory.create(
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule
@ -52,7 +52,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
self.assertEquals(post.read, False)
def test_duplicate_entries_with_different_remote_identifiers(self):
rule = CollectionRuleFactory()
rule = FeedFactory()
existing_post = PostFactory(
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7",
@ -98,7 +98,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
self.assertEquals(post.read, False)
def test_duplicate_entries_in_recent_database(self):
rule = CollectionRuleFactory()
rule = FeedFactory()
existing_post = PostFactory(
url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
@ -145,7 +145,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
self.assertEquals(post.read, False)
def test_multiple_existing_entries_with_identifier(self):
rule = CollectionRuleFactory()
rule = FeedFactory()
PostFactory.create_batch(
remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule, size=5
@ -187,7 +187,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
self.assertEquals(post.read, False)
def test_duplicate_entries_outside_time_slot(self):
rule = CollectionRuleFactory()
rule = FeedFactory()
existing_post = PostFactory(
url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
@ -234,7 +234,7 @@ class FeedDuplicateHandlerTestCase(TestCase):
self.assertEquals(post.read, False)
def test_duplicate_entries_in_collected_entries(self):
rule = CollectionRuleFactory()
rule = FeedFactory()
post_1 = PostFactory.build(
title="title got updated",
body="body",

View file

@ -1,59 +1,174 @@
from time import struct_time
simple_mock = {
"bozo": 1,
simple_mock = bytes(
"""<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title><![CDATA[BBC News - Home]]></title>
<description><![CDATA[BBC News - Home]]></description>
<link>https://www.bbc.co.uk/news/</link>
<image>
<url>https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif</url>
<title>BBC News - Home</title>
<link>https://www.bbc.co.uk/news/</link>
</image>
<generator>RSS for Node</generator>
<lastBuildDate>Sun, 12 Jul 2020 17:21:20 GMT</lastBuildDate>
<copyright><![CDATA[Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/2/hi/help/rss/4498287.stm for terms and conditions of reuse.]]></copyright>
<language><![CDATA[en-gb]]></language>
<ttl>15</ttl>
<item>
<title><![CDATA[Coronavirus: I trust people's sense on face masks - Gove]]></title>
<description><![CDATA[Minister Michael Gove says he does not think face coverings should be mandatory in shops in England.]]></description>
<link>https://www.bbc.co.uk/news/uk-53381000</link>
<guid isPermaLink="true">https://www.bbc.co.uk/news/uk-53381000</guid>
<pubDate>Sun, 12 Jul 2020 16:15:03 GMT</pubDate>
</item>
<item>
<title><![CDATA[Farm outbreak leads 200 to self isolate ]]></title>
<description><![CDATA[Up to 200 vegetable pickers and packers will remain on the farm in Herefordshire while isolating.]]></description>
<link>https://www.bbc.co.uk/news/uk-england-hereford-worcester-53381802</link>
<guid isPermaLink="true">https://www.bbc.co.uk/news/uk-england-hereford-worcester-53381802</guid>
<pubDate>Sun, 12 Jul 2020 17:19:31 GMT</pubDate>
</item>
<item>
<title><![CDATA[English Channel search operation after migrant crossings]]></title>
<description><![CDATA[Several boats are spotted as the home secretary visits France for talks on tackling people smuggling.]]></description>
<link>https://www.bbc.co.uk/news/uk-53382563</link>
<guid isPermaLink="true">https://www.bbc.co.uk/news/uk-53382563</guid>
<pubDate>Sun, 12 Jul 2020 15:47:17 GMT</pubDate>
</item>
</channel>
</rss>""",
"utf-8",
)
simple_mock_parsed = {
"bozo": 0,
"encoding": "utf-8",
"entries": [
{
"guidislink": False,
"href": "",
"id": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"link": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"id": "https://www.bbc.co.uk/news/uk-53381000",
"link": "https://www.bbc.co.uk/news/uk-53381000",
"links": [
{
"href": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"href": "https://www.bbc.co.uk/news/uk-53381000",
"rel": "alternate",
"type": "text/html",
}
],
"media_thumbnail": [
{
"height": "1152",
"url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg",
"width": "2048",
}
],
"published": "Mon, 20 May 2019 16:07:37 GMT",
"published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
"summary": "Foreign Minister Mohammad Javad Zarif says the US "
"president should try showing Iranians some respect.",
"published": "Sun, 12 Jul 2020 16:15:03 GMT",
"published_parsed": struct_time((2020, 7, 12, 16, 15, 3, 6, 194, 0)),
"summary": "Minister Michael Gove says he does not think face "
"coverings should be mandatory in shops in England.",
"summary_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"base": "",
"language": None,
"type": "text/html",
"value": "Foreign Minister Mohammad Javad "
"Zarif says the US president should "
"try showing Iranians some "
"respect.",
"value": "Minister Michael Gove says he does "
"not think face coverings should be "
"mandatory in shops in England.",
},
"title": "Trump's 'genocidal taunts' will not end Iran - Zarif",
"title": "Coronavirus: I trust people's sense on face masks - " "Gove",
"title_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"base": "",
"language": None,
"type": "text/plain",
"value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif",
"value": "Coronavirus: I trust people's sense " "on face masks - Gove",
},
}
},
{
"guidislink": False,
"id": "https://www.bbc.co.uk/news/uk-england-hereford-worcester-53381802",
"link": "https://www.bbc.co.uk/news/uk-england-hereford-worcester-53381802",
"links": [
{
"href": "https://www.bbc.co.uk/news/uk-england-hereford-worcester-53381802",
"rel": "alternate",
"type": "text/html",
}
],
"published": "Sun, 12 Jul 2020 17:19:31 GMT",
"published_parsed": struct_time((2020, 7, 12, 17, 19, 31, 6, 194, 0)),
"summary": "Up to 200 vegetable pickers and packers will remain "
"on the farm in Herefordshire while isolating.",
"summary_detail": {
"base": "",
"language": None,
"type": "text/html",
"value": "Up to 200 vegetable pickers and "
"packers will remain on the farm in "
"Herefordshire while isolating.",
},
"title": "Farm outbreak leads 200 to self isolate",
"title_detail": {
"base": "",
"language": None,
"type": "text/plain",
"value": "Farm outbreak leads 200 to self " "isolate",
},
},
{
"guidislink": False,
"id": "https://www.bbc.co.uk/news/uk-53382563",
"link": "https://www.bbc.co.uk/news/uk-53382563",
"links": [
{
"href": "https://www.bbc.co.uk/news/uk-53382563",
"rel": "alternate",
"type": "text/html",
}
],
"published": "Sun, 12 Jul 2020 15:47:17 GMT",
"published_parsed": struct_time((2020, 7, 12, 15, 47, 17, 6, 194, 0)),
"summary": "Several boats are spotted as the home secretary "
"visits France for talks on tackling people "
"smuggling.",
"summary_detail": {
"base": "",
"language": None,
"type": "text/html",
"value": "Several boats are spotted as the "
"home secretary visits France for "
"talks on tackling people "
"smuggling.",
},
"title": "English Channel search operation after migrant " "crossings",
"title_detail": {
"base": "",
"language": None,
"type": "text/plain",
"value": "English Channel search operation " "after migrant crossings",
},
},
],
"feed": {
"generator": "RSS for Node",
"generator_detail": {"name": "RSS for Node"},
"image": {
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
"link": "https://www.bbc.co.uk/news/",
"links": [
{
"href": "https://www.bbc.co.uk/news/",
"rel": "alternate",
"type": "text/html",
}
],
"title": "BBC News - Home",
"language": "en-gb",
"link": "https://www.bbc.co.uk/news/",
"title_detail": {
"base": "",
"language": None,
"type": "text/plain",
"value": "BBC News - Home",
},
},
"language": "en-gb",
"link": "https://www.bbc.co.uk/news/",
"links": [
{
"href": "https://www.bbc.co.uk/news/",
@ -61,9 +176,41 @@ simple_mock = {
"type": "text/html",
}
],
"rights": "Copyright: (C) British Broadcasting Corporation, see "
"http://news.bbc.co.uk/2/hi/help/rss/4498287.stm for terms "
"and conditions of reuse.",
"rights_detail": {
"base": "",
"language": None,
"type": "text/plain",
"value": "Copyright: (C) British Broadcasting "
"Corporation, see "
"http://news.bbc.co.uk/2/hi/help/rss/4498287.stm "
"for terms and conditions of reuse.",
},
"subtitle": "BBC News - Home",
"subtitle_detail": {
"base": "",
"language": None,
"type": "text/html",
"value": "BBC News - Home",
},
"title": "BBC News - Home",
"title_detail": {
"base": "",
"language": None,
"type": "text/plain",
"value": "BBC News - Home",
},
"ttl": "15",
"updated": "Sun, 12 Jul 2020 17:21:20 GMT",
"updated_parsed": struct_time((2020, 7, 12, 17, 21, 20, 6, 194, 0)),
},
"namespaces": {
"": "http://www.w3.org/2005/Atom",
"content": "http://purl.org/rss/1.0/modules/content/",
"dc": "http://purl.org/dc/elements/1.1/",
"media": "http://search.yahoo.com/mrss/",
},
"href": "http://feeds.bbci.co.uk/news/rss.xml",
"status": 200,
"version": "rss20",
}

View file

@ -11,9 +11,9 @@ from newsreader.news.collection.exceptions import (
StreamTimeOutException,
)
from newsreader.news.collection.feed import FeedStream
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from newsreader.news.collection.tests.factories import FeedFactory
from .mocks import simple_mock
from .mocks import simple_mock, simple_mock_parsed
class FeedStreamTestCase(TestCase):
@ -29,19 +29,19 @@ class FeedStreamTestCase(TestCase):
def test_simple_stream(self):
self.mocked_fetch.return_value = MagicMock(content=simple_mock)
rule = CollectionRuleFactory()
rule = FeedFactory()
stream = FeedStream(rule)
data, stream = stream.read()
self.mocked_fetch.assert_called_once_with(rule.url)
self.assertEquals(data["entries"], data["entries"])
self.assertEquals(stream, stream)
self.assertEquals(data, simple_mock_parsed)
self.assertEquals(stream.rule, rule)
def test_stream_raises_exception(self):
self.mocked_fetch.side_effect = StreamException
rule = CollectionRuleFactory()
rule = FeedFactory()
stream = FeedStream(rule)
with self.assertRaises(StreamException):
@ -52,7 +52,7 @@ class FeedStreamTestCase(TestCase):
def test_stream_raises_denied_exception(self):
self.mocked_fetch.side_effect = StreamDeniedException
rule = CollectionRuleFactory()
rule = FeedFactory()
stream = FeedStream(rule)
with self.assertRaises(StreamDeniedException):
@ -63,7 +63,7 @@ class FeedStreamTestCase(TestCase):
def test_stream_raises_not_found_exception(self):
self.mocked_fetch.side_effect = StreamNotFoundException
rule = CollectionRuleFactory()
rule = FeedFactory()
stream = FeedStream(rule)
with self.assertRaises(StreamNotFoundException):
@ -74,7 +74,7 @@ class FeedStreamTestCase(TestCase):
def test_stream_raises_time_out_exception(self):
self.mocked_fetch.side_effect = StreamTimeOutException
rule = CollectionRuleFactory()
rule = FeedFactory()
stream = FeedStream(rule)
with self.assertRaises(StreamTimeOutException):
@ -85,7 +85,7 @@ class FeedStreamTestCase(TestCase):
def test_stream_raises_forbidden_exception(self):
self.mocked_fetch.side_effect = StreamForbiddenException
rule = CollectionRuleFactory()
rule = FeedFactory()
stream = FeedStream(rule)
with self.assertRaises(StreamForbiddenException):
@ -98,7 +98,7 @@ class FeedStreamTestCase(TestCase):
self.mocked_fetch.return_value = MagicMock()
mocked_parse.side_effect = TypeError
rule = CollectionRuleFactory()
rule = FeedFactory()
stream = FeedStream(rule)
with self.assertRaises(StreamParseException):

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,185 @@
from datetime import datetime
from unittest.mock import MagicMock
from django.test import TestCase
import pytz
from newsreader.news.collection.reddit import RedditBuilder
from newsreader.news.collection.tests.factories import SubredditFactory
from newsreader.news.collection.tests.reddit.builder.mocks import (
author_mock,
empty_mock,
simple_mock,
title_mock,
unknown_mock,
unsanitized_mock,
)
from newsreader.news.core.models import Post
from newsreader.news.core.tests.factories import PostFactory
class RedditBuilderTestCase(TestCase):
def setUp(self):
self.maxDiff = None
def test_simple_mock(self):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
with builder((simple_mock, mock_stream)) as builder:
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
self.assertCountEqual(
("hm0qct", "hna75r", "hngs71", "hngsj8", "hnd7cy"), posts.keys()
)
post = posts["hm0qct"]
self.assertEquals(post.rule, subreddit)
self.assertEquals(
post.title,
"Linux Experiences/Rants or Education/Certifications thread - July 06, 2020",
)
self.assertIn(
" This megathread is also to hear opinions from anyone just starting out"
" with Linux or those that have used Linux (GNU or otherwise) for a long",
post.body,
)
self.assertIn(
"<p>For those looking for certifications please use this megathread to ask about how"
" to get certified whether it&#39;s for the business world or for your own satisfaction."
' Be sure to check out <a href="/r/linuxadmin">r/linuxadmin</a> for more discussion in the'
" SysAdmin world!</p>",
post.body,
)
self.assertEquals(post.author, "AutoModerator")
self.assertEquals(
post.url,
"https://www.reddit.com/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/",
)
self.assertEquals(
post.publication_date, pytz.utc.localize(datetime(2020, 7, 6, 6, 11, 22))
)
def test_empty_data(self):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
with builder((empty_mock, mock_stream)) as builder:
builder.save()
self.assertEquals(Post.objects.count(), 0)
def test_unknown_mock(self):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
with builder((unknown_mock, mock_stream)) as builder:
builder.save()
self.assertEquals(Post.objects.count(), 0)
def test_update_posts(self):
subreddit = SubredditFactory()
existing_publication_date = pytz.utc.localize(datetime(2020, 7, 8, 14, 0, 0))
existing_post = PostFactory(
remote_identifier="hngsj8",
publication_date=existing_publication_date,
author="Old author",
title="Old title",
body="Old body",
url="https://bbc.com/",
rule=subreddit,
)
builder = RedditBuilder
mock_stream = MagicMock(rule=subreddit)
with builder((simple_mock, mock_stream)) as builder:
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
self.assertCountEqual(
("hm0qct", "hna75r", "hngs71", "hngsj8", "hnd7cy"), posts.keys()
)
existing_post.refresh_from_db()
self.assertEquals(existing_post.remote_identifier, "hngsj8")
self.assertEquals(existing_post.author, "nixcraft")
self.assertEquals(existing_post.title, "KeePassXC 2.6.0 released")
self.assertEquals(existing_post.body, "")
self.assertEquals(
existing_post.publication_date,
pytz.utc.localize(datetime(2020, 7, 8, 15, 11, 6)),
)
self.assertEquals(
existing_post.url,
"https://www.reddit.com/r/linux/comments/hngsj8/" "keepassxc_260_released/",
)
def test_html_sanitizing(self):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
with builder((unsanitized_mock, mock_stream)) as builder:
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
self.assertCountEqual(("hnd7cy",), posts.keys())
post = posts["hnd7cy"]
self.assertEquals(post.body, "<article></article>")
def test_long_author_text_is_truncated(self):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
with builder((author_mock, mock_stream)) as builder:
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
self.assertCountEqual(("hnd7cy",), posts.keys())
post = posts["hnd7cy"]
self.assertEquals(post.author, "TheQuantumZeroTheQuantumZeroTheQuantumZ…")
def test_long_title_text_is_truncated(self):
builder = RedditBuilder
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
with builder((title_mock, mock_stream)) as builder:
builder.save()
posts = {post.remote_identifier: post for post in Post.objects.all()}
self.assertCountEqual(("hnd7cy",), posts.keys())
post = posts["hnd7cy"]
self.assertEquals(
post.title,
'Board statement on the LibreOffice 7.0 RC "Personal EditionBoard statement on the LibreOffice 7.0 RC "Personal Edition" label" labelBoard statement on the LibreOffice 7.0 RC "PersBoard statement on t…',
)

View file

@ -0,0 +1,160 @@
# Note that some response data is truncated
simple_mock = {
"data": {
"after": "t3_hjywyf",
"before": None,
"children": [
{
"data": {
"approved_at_utc": None,
"approved_by": None,
"archived": False,
"author": "AutoModerator",
"banned_at_utc": None,
"banned_by": None,
"category": None,
"content_categories": None,
"created": 1593605471.0,
"created_utc": 1593576671.0,
"discussion_type": None,
"distinguished": "moderator",
"domain": "self.linux",
"edited": False,
"hidden": False,
"id": "hj34ck",
"locked": False,
"name": "t3_hj34ck",
"permalink": "/r/linux/comments/hj34ck/weekly_questions_and_hardware_thread_july_01_2020/",
"pinned": False,
"selftext": "Welcome to r/linux! If you're "
"new to Linux or trying to get "
"started this thread is for you. "
"Get help here or as always, "
"check out r/linuxquestions or "
"r/linux4noobs\n"
"\n"
"This megathread is for all your "
"question needs. As we don't "
"allow questions on r/linux "
"outside of this megathread, "
"please consider using "
"r/linuxquestions or "
"r/linux4noobs for the best "
"solution to your problem.\n"
"\n"
"Ask your hardware requests here "
"too or try r/linuxhardware!",
"selftext_html": "&lt;!-- SC_OFF "
"--&gt;&lt;div "
'class="md"&gt;&lt;p&gt;Welcome '
"to &lt;a "
'href="/r/linux"&gt;r/linux&lt;/a&gt;! '
"If you&amp;#39;re new to "
"Linux or trying to get "
"started this thread is for "
"you. Get help here or as "
"always, check out &lt;a "
'href="/r/linuxquestions"&gt;r/linuxquestions&lt;/a&gt; '
"or &lt;a "
'href="/r/linux4noobs"&gt;r/linux4noobs&lt;/a&gt;&lt;/p&gt;\n'
"\n"
"&lt;p&gt;This megathread is "
"for all your question "
"needs. As we don&amp;#39;t "
"allow questions on &lt;a "
'href="/r/linux"&gt;r/linux&lt;/a&gt; '
"outside of this megathread, "
"please consider using &lt;a "
'href="/r/linuxquestions"&gt;r/linuxquestions&lt;/a&gt; '
"or &lt;a "
'href="/r/linux4noobs"&gt;r/linux4noobs&lt;/a&gt; '
"for the best solution to "
"your problem.&lt;/p&gt;\n"
"\n"
"&lt;p&gt;Ask your hardware "
"requests here too or try "
"&lt;a "
'href="/r/linuxhardware"&gt;r/linuxhardware&lt;/a&gt;!&lt;/p&gt;\n'
"&lt;/div&gt;&lt;!-- SC_ON "
"--&gt;",
"spoiler": False,
"stickied": True,
"subreddit": "linux",
"subreddit_id": "t5_2qh1a",
"subreddit_name_prefixed": "r/linux",
"title": "Weekly Questions and Hardware " "Thread - July 01, 2020",
"url": "https://www.reddit.com/r/linux/comments/hj34ck/weekly_questions_and_hardware_thread_july_01_2020/",
"visited": False,
},
"kind": "t3",
},
{
"data": {
"archived": False,
"author": "AutoModerator",
"banned_at_utc": None,
"banned_by": None,
"category": None,
"created": 1593824903.0,
"created_utc": 1593796103.0,
"discussion_type": None,
"domain": "self.linux",
"edited": False,
"hidden": False,
"id": "hkmu0t",
"name": "t3_hkmu0t",
"permalink": "/r/linux/comments/hkmu0t/weekend_fluff_linux_in_the_wild_thread_july_03/",
"pinned": False,
"saved": False,
"selftext": "Welcome to the weekend! This "
"stickied thread is for you to "
"post pictures of your ubuntu "
"2006 install disk, slackware "
"floppies, on-topic memes or "
"more.\n"
"\n"
"When it's not the weekend, be "
"sure to check out "
"r/WildLinuxAppears or "
"r/linuxmemes!",
"selftext_html": "&lt;!-- SC_OFF "
"--&gt;&lt;div "
'class="md"&gt;&lt;p&gt;Welcome '
"to the weekend! This "
"stickied thread is for you "
"to post pictures of your "
"ubuntu 2006 install disk, "
"slackware floppies, "
"on-topic memes or "
"more.&lt;/p&gt;\n"
"\n"
"&lt;p&gt;When it&amp;#39;s "
"not the weekend, be sure to "
"check out &lt;a "
'href="/r/WildLinuxAppears"&gt;r/WildLinuxAppears&lt;/a&gt; '
"or &lt;a "
'href="/r/linuxmemes"&gt;r/linuxmemes&lt;/a&gt;!&lt;/p&gt;\n'
"&lt;/div&gt;&lt;!-- SC_ON "
"--&gt;",
"spoiler": False,
"stickied": True,
"subreddit": "linux",
"subreddit_id": "t5_2qh1a",
"subreddit_name_prefixed": "r/linux",
"subreddit_subscribers": 542073,
"subreddit_type": "public",
"thumbnail": "",
"title": "Weekend Fluff / Linux in the Wild "
"Thread - July 03, 2020",
"url": "https://www.reddit.com/r/linux/comments/hkmu0t/weekend_fluff_linux_in_the_wild_thread_july_03/",
"visited": False,
},
"kind": "t3",
},
],
"dist": 27,
"modhash": None,
},
"kind": "Listing",
}

View file

@ -0,0 +1,164 @@
from unittest.mock import MagicMock, patch
from uuid import uuid4
from django.test import TestCase
from django.utils.lorem_ipsum import words
from newsreader.accounts.tests.factories import UserFactory
from newsreader.news.collection.exceptions import (
StreamDeniedException,
StreamException,
StreamNotFoundException,
StreamParseException,
StreamTimeOutException,
StreamTooManyException,
)
from newsreader.news.collection.reddit import RedditClient
from newsreader.news.collection.tests.factories import SubredditFactory
from .mocks import simple_mock
class RedditClientTestCase(TestCase):
def setUp(self):
self.maxDiff = None
self.patched_read = patch("newsreader.news.collection.reddit.RedditStream.read")
self.mocked_read = self.patched_read.start()
def tearDown(self):
patch.stopall()
def test_client_retrieves_single_rules(self):
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
self.mocked_read.return_value = (simple_mock, mock_stream)
with RedditClient([[subreddit]]) as client:
for data, stream in client:
with self.subTest(data=data, stream=stream):
self.assertEquals(data, simple_mock)
self.assertEquals(stream, mock_stream)
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_exception(self):
subreddit = SubredditFactory()
self.mocked_read.side_effect = StreamException(message="Stream exception")
with RedditClient([[subreddit]]) as client:
for data, stream in client:
with self.subTest(data=data, stream=stream):
self.assertEquals(data, None)
self.assertEquals(stream, None)
self.assertEquals(stream.rule.error, "Stream exception")
self.assertEquals(stream.rule.succeeded, False)
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_not_found_exception(self):
subreddit = SubredditFactory.create()
self.mocked_read.side_effect = StreamNotFoundException(
message="Stream not found"
)
with RedditClient([[subreddit]]) as client:
for data, stream in client:
with self.subTest(data=data, stream=stream):
self.assertEquals(data, None)
self.assertEquals(stream, None)
self.assertEquals(stream.rule.error, "Stream not found")
self.assertEquals(stream.rule.succeeded, False)
self.mocked_read.assert_called_once_with()
@patch("newsreader.news.collection.reddit.RedditTokenTask")
def test_client_catches_stream_denied_exception(self, mocked_task):
user = UserFactory(
reddit_access_token=str(uuid4()), reddit_refresh_token=str(uuid4())
)
subreddit = SubredditFactory(user=user)
self.mocked_read.side_effect = StreamDeniedException(message="Token expired")
with RedditClient([(subreddit,)]) as client:
results = [(data, stream) for data, stream in client]
self.mocked_read.assert_called_once_with()
mocked_task.delay.assert_called_once_with(user.pk)
self.assertEquals(len(results), 0)
user.refresh_from_db()
subreddit.refresh_from_db()
self.assertEquals(user.reddit_access_token, None)
self.assertEquals(subreddit.succeeded, False)
self.assertEquals(subreddit.error, "Token expired")
def test_client_catches_stream_timed_out_exception(self):
subreddit = SubredditFactory()
self.mocked_read.side_effect = StreamTimeOutException(
message="Stream timed out"
)
with RedditClient([[subreddit]]) as client:
for data, stream in client:
with self.subTest(data=data, stream=stream):
self.assertEquals(data, None)
self.assertEquals(stream, None)
self.assertEquals(stream.rule.error, "Stream timed out")
self.assertEquals(stream.rule.succeeded, False)
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_too_many_exception(self):
subreddit = SubredditFactory()
self.mocked_read.side_effect = StreamTooManyException
with RedditClient([[subreddit]]) as client:
for data, stream in client:
with self.subTest(data=data, stream=stream):
self.assertEquals(data, None)
self.assertEquals(stream, None)
self.assertEquals(stream.rule.error, "Too many requests")
self.assertEquals(stream.rule.succeeded, False)
self.mocked_read.assert_called_once_with()
def test_client_catches_stream_parse_exception(self):
subreddit = SubredditFactory()
self.mocked_read.side_effect = StreamParseException(
message="Stream could not be parsed"
)
with RedditClient([[subreddit]]) as client:
for data, stream in client:
with self.subTest(data=data, stream=stream):
self.assertEquals(data, None)
self.assertEquals(stream, None)
self.assertEquals(stream.rule.error, "Stream could not be parsed")
self.assertEquals(stream.rule.succeeded, False)
self.mocked_read.assert_called_once_with()
def test_client_catches_long_exception_text(self):
subreddit = SubredditFactory()
mock_stream = MagicMock(rule=subreddit)
self.mocked_read.side_effect = StreamParseException(message=words(1000))
with RedditClient([[subreddit]]) as client:
for data, stream in client:
self.assertEquals(data, None)
self.assertEquals(stream, None)
self.assertEquals(len(stream.rule.error), 1024)
self.assertEquals(stream.rule.succeeded, False)
self.mocked_read.assert_called_once_with()

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,204 @@
from datetime import datetime
from unittest.mock import patch
from uuid import uuid4
from django.test import TestCase
from django.utils import timezone
import pytz
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.exceptions import (
StreamDeniedException,
StreamForbiddenException,
StreamNotFoundException,
StreamTimeOutException,
)
from newsreader.news.collection.reddit import RedditCollector
from newsreader.news.collection.tests.factories import SubredditFactory
from newsreader.news.collection.tests.reddit.collector.mocks import (
empty_mock,
simple_mock_1,
simple_mock_2,
)
from newsreader.news.core.models import Post
class RedditCollectorTestCase(TestCase):
def setUp(self):
self.maxDiff = None
self.patched_get = patch("newsreader.news.collection.reddit.fetch")
self.mocked_fetch = self.patched_get.start()
self.patched_parse = patch(
"newsreader.news.collection.reddit.RedditStream.parse"
)
self.mocked_parse = self.patched_parse.start()
def tearDown(self):
patch.stopall()
def test_simple_batch(self):
self.mocked_parse.side_effect = (simple_mock_1, simple_mock_2)
rules = (
(subreddit,)
for subreddit in SubredditFactory.create_batch(
user__reddit_access_token=str(uuid4()),
user__reddit_refresh_token=str(uuid4()),
enabled=True,
size=2,
)
)
collector = RedditCollector()
collector.collect(rules=rules)
self.assertCountEqual(
Post.objects.values_list("remote_identifier", flat=True),
(
"hm6byg",
"hpkhgj",
"hph00n",
"hp9mlw",
"hpjn8x",
"gdfaip",
"hmd2ez",
"hpr28u",
"hpps6f",
"hp7uqe",
),
)
for subreddit in rules:
with self.subTest(subreddit=subreddit):
self.assertEquals(subreddit.succeeded, True)
self.assertEquals(subreddit.last_suceeded, timezone.now())
self.assertEquals(subreddit.error, None)
post = Post.objects.get(
remote_identifier="hph00n", rule__type=RuleTypeChoices.subreddit
)
self.assertEquals(
post.publication_date, pytz.utc.localize(datetime(2020, 7, 11, 22, 23, 24))
)
self.assertEquals(post.author, "HannahB888")
self.assertEquals(
post.title, "Drake Interplanetary Smartkey thing that I made!"
)
self.assertEquals(
post.url,
"https://www.reddit.com/r/starcitizen/comments/hph00n/drake_interplanetary_smartkey_thing_that_i_made/",
)
post = Post.objects.get(
remote_identifier="hpr28u", rule__type=RuleTypeChoices.subreddit
)
self.assertEquals(
post.publication_date, pytz.utc.localize(datetime(2020, 7, 12, 10, 29, 10))
)
self.assertEquals(post.author, "Sebaron")
self.assertEquals(
post.title,
"I am a medical student, and I recently programmed an open-source eye-tracker for brain research",
)
self.assertEquals(
post.url,
"https://www.reddit.com/r/Python/comments/hpr28u/i_am_a_medical_student_and_i_recently_programmed/",
)
def test_empty_batch(self):
self.mocked_parse.side_effect = (empty_mock, empty_mock)
rules = (
(subreddit,)
for subreddit in SubredditFactory.create_batch(
user__reddit_access_token=str(uuid4()),
user__reddit_refresh_token=str(uuid4()),
enabled=True,
size=2,
)
)
collector = RedditCollector()
collector.collect(rules=rules)
self.assertEquals(Post.objects.count(), 0)
for subreddit in rules:
with self.subTest(subreddit=subreddit):
self.assertEquals(subreddit.succeeded, True)
self.assertEquals(subreddit.last_suceeded, timezone.now())
self.assertEquals(subreddit.error, None)
def test_not_found(self):
self.mocked_fetch.side_effect = StreamNotFoundException
rule = SubredditFactory(
user__reddit_access_token=str(uuid4()),
user__reddit_refresh_token=str(uuid4()),
enabled=True,
)
collector = RedditCollector()
collector.collect(rules=((rule,),))
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream not found")
@patch("newsreader.news.collection.reddit.RedditTokenTask")
def test_denied(self, mocked_task):
self.mocked_fetch.side_effect = StreamDeniedException
rule = SubredditFactory(
user__reddit_access_token=str(uuid4()),
user__reddit_refresh_token=str(uuid4()),
enabled=True,
)
collector = RedditCollector()
collector.collect(rules=((rule,),))
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream does not have sufficient permissions")
mocked_task.delay.assert_called_once_with(rule.user.pk)
def test_forbidden(self):
self.mocked_fetch.side_effect = StreamForbiddenException
rule = SubredditFactory(
user__reddit_access_token=str(uuid4()),
user__reddit_refresh_token=str(uuid4()),
enabled=True,
)
collector = RedditCollector()
collector.collect(rules=((rule,),))
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream forbidden")
def test_timed_out(self):
self.mocked_fetch.side_effect = StreamTimeOutException
rule = SubredditFactory(
user__reddit_access_token=str(uuid4()),
user__reddit_refresh_token=str(uuid4()),
enabled=True,
)
collector = RedditCollector()
collector.collect(rules=((rule,),))
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream timed out")

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,144 @@
from json.decoder import JSONDecodeError
from unittest.mock import patch
from uuid import uuid4
from django.test import TestCase
from newsreader.accounts.tests.factories import UserFactory
from newsreader.news.collection.exceptions import (
StreamDeniedException,
StreamException,
StreamForbiddenException,
StreamNotFoundException,
StreamParseException,
StreamTimeOutException,
)
from newsreader.news.collection.reddit import RedditStream
from newsreader.news.collection.tests.factories import SubredditFactory
from newsreader.news.collection.tests.reddit.stream.mocks import simple_mock
class RedditStreamTestCase(TestCase):
def setUp(self):
self.maxDiff = None
self.patched_fetch = patch("newsreader.news.collection.reddit.fetch")
self.mocked_fetch = self.patched_fetch.start()
def tearDown(self):
patch.stopall()
def test_simple_stream(self):
self.mocked_fetch.return_value.json.return_value = simple_mock
access_token = str(uuid4())
user = UserFactory(reddit_access_token=access_token)
subreddit = SubredditFactory(user=user)
stream = RedditStream(subreddit)
data, stream = stream.read()
self.assertEquals(data, simple_mock)
self.assertEquals(stream, stream)
self.mocked_fetch.assert_called_once_with(
subreddit.url, headers={"Authorization": f"bearer {access_token}"}
)
def test_stream_raises_exception(self):
self.mocked_fetch.side_effect = StreamException
access_token = str(uuid4())
user = UserFactory(reddit_access_token=access_token)
subreddit = SubredditFactory(user=user)
stream = RedditStream(subreddit)
with self.assertRaises(StreamException):
stream.read()
self.mocked_fetch.assert_called_once_with(
subreddit.url, headers={"Authorization": f"bearer {access_token}"}
)
def test_stream_raises_denied_exception(self):
self.mocked_fetch.side_effect = StreamDeniedException
access_token = str(uuid4())
user = UserFactory(reddit_access_token=access_token)
subreddit = SubredditFactory(user=user)
stream = RedditStream(subreddit)
with self.assertRaises(StreamDeniedException):
stream.read()
self.mocked_fetch.assert_called_once_with(
subreddit.url, headers={"Authorization": f"bearer {access_token}"}
)
def test_stream_raises_not_found_exception(self):
self.mocked_fetch.side_effect = StreamNotFoundException
access_token = str(uuid4())
user = UserFactory(reddit_access_token=access_token)
subreddit = SubredditFactory(user=user)
stream = RedditStream(subreddit)
with self.assertRaises(StreamNotFoundException):
stream.read()
self.mocked_fetch.assert_called_once_with(
subreddit.url, headers={"Authorization": f"bearer {access_token}"}
)
def test_stream_raises_time_out_exception(self):
self.mocked_fetch.side_effect = StreamTimeOutException
access_token = str(uuid4())
user = UserFactory(reddit_access_token=access_token)
subreddit = SubredditFactory(user=user)
stream = RedditStream(subreddit)
with self.assertRaises(StreamTimeOutException):
stream.read()
self.mocked_fetch.assert_called_once_with(
subreddit.url, headers={"Authorization": f"bearer {access_token}"}
)
def test_stream_raises_forbidden_exception(self):
self.mocked_fetch.side_effect = StreamForbiddenException
access_token = str(uuid4())
user = UserFactory(reddit_access_token=access_token)
subreddit = SubredditFactory(user=user)
stream = RedditStream(subreddit)
with self.assertRaises(StreamForbiddenException):
stream.read()
self.mocked_fetch.assert_called_once_with(
subreddit.url, headers={"Authorization": f"bearer {access_token}"}
)
def test_stream_raises_parse_exception(self):
self.mocked_fetch.return_value.json.side_effect = JSONDecodeError(
"No json found", "{}", 5
)
access_token = str(uuid4())
user = UserFactory(reddit_access_token=access_token)
subreddit = SubredditFactory(user=user)
stream = RedditStream(subreddit)
with self.assertRaises(StreamParseException):
stream.read()
self.mocked_fetch.assert_called_once_with(
subreddit.url, headers={"Authorization": f"bearer {access_token}"}
)

View file

@ -0,0 +1,142 @@
from datetime import timedelta
from django.test import TestCase
from django.utils import timezone
from freezegun import freeze_time
from newsreader.accounts.tests.factories import UserFactory
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.reddit import RedditScheduler
from newsreader.news.collection.tests.factories import CollectionRuleFactory
@freeze_time("2019-10-30 12:30:00")
class RedditSchedulerTestCase(TestCase):
def test_simple(self):
user_1 = UserFactory(
reddit_access_token="1231414", reddit_refresh_token="5235262"
)
user_2 = UserFactory(
reddit_access_token="3414777", reddit_refresh_token="3423425"
)
user_1_rules = [
CollectionRuleFactory(
user=user_1,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=4),
enabled=True,
),
CollectionRuleFactory(
user=user_1,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=3),
enabled=True,
),
CollectionRuleFactory(
user=user_1,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=2),
enabled=True,
),
]
user_2_rules = [
CollectionRuleFactory(
user=user_2,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=4),
enabled=True,
),
CollectionRuleFactory(
user=user_2,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=3),
enabled=True,
),
CollectionRuleFactory(
user=user_2,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=2),
enabled=True,
),
]
scheduler = RedditScheduler()
scheduled_subreddits = scheduler.get_scheduled_rules()
user_1_batch = [subreddit.pk for subreddit in scheduled_subreddits[0]]
self.assertIn(user_1_rules[0].pk, user_1_batch)
self.assertIn(user_1_rules[1].pk, user_1_batch)
self.assertIn(user_1_rules[2].pk, user_1_batch)
user_2_batch = [subreddit.pk for subreddit in scheduled_subreddits[1]]
self.assertIn(user_2_rules[0].pk, user_2_batch)
self.assertIn(user_2_rules[1].pk, user_2_batch)
self.assertIn(user_2_rules[2].pk, user_2_batch)
def test_max_amount(self):
users = UserFactory.create_batch(
reddit_access_token="1231414", reddit_refresh_token="5235262", size=5
)
nested_rules = [
CollectionRuleFactory.create_batch(
name=f"rule-{index}",
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(seconds=index),
enabled=True,
user=user,
size=15,
)
for index, user in enumerate(users)
]
rules = [rule for rule_list in nested_rules for rule in rule_list]
scheduler = RedditScheduler()
scheduled_subreddits = [
subreddit.pk
for batch in scheduler.get_scheduled_rules()
for subreddit in batch
]
for rule in rules[16:76]:
with self.subTest(rule=rule):
self.assertIn(rule.pk, scheduled_subreddits)
for rule in rules[0:15]:
with self.subTest(rule=rule):
self.assertNotIn(rule.pk, scheduled_subreddits)
def test_max_user_amount(self):
user = UserFactory(
reddit_access_token="1231414", reddit_refresh_token="5235262"
)
rules = [
CollectionRuleFactory(
name=f"rule-{index}",
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(seconds=index),
enabled=True,
user=user,
)
for index in range(1, 17)
]
scheduler = RedditScheduler()
scheduled_subreddits = [
subreddit.pk
for batch in scheduler.get_scheduled_rules()
for subreddit in batch
]
for rule in rules[1:16]:
with self.subTest(rule=rule):
self.assertIn(rule.pk, scheduled_subreddits)
self.assertNotIn(rules[0].pk, scheduled_subreddits)

View file

@ -6,97 +6,118 @@ from requests.exceptions import ConnectionError as RequestConnectionError
from requests.exceptions import HTTPError, RequestException, SSLError, TooManyRedirects
from newsreader.news.collection.exceptions import (
StreamConnectionError,
StreamConnectionException,
StreamDeniedException,
StreamException,
StreamForbiddenException,
StreamNotFoundException,
StreamTimeOutException,
StreamTooManyException,
)
from newsreader.news.collection.utils import fetch
from newsreader.news.collection.utils import fetch, post
class FetchTestCase(TestCase):
def setUp(self):
self.patched_get = patch("newsreader.news.collection.utils.requests.get")
self.mocked_get = self.patched_get.start()
class HelperFunctionTestCase:
def test_simple(self):
self.mocked_get.return_value = MagicMock(status_code=200, content="content")
self.mocked_method.return_value = MagicMock(status_code=200, content="content")
url = "https://www.bbc.co.uk/news"
response = fetch(url)
response = self.method(url)
self.assertEquals(response.content, "content")
def test_raises_not_found(self):
self.mocked_get.return_value = MagicMock(status_code=404)
self.mocked_method.return_value = MagicMock(status_code=404)
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamNotFoundException):
fetch(url)
self.method(url)
def test_raises_denied(self):
self.mocked_get.return_value = MagicMock(status_code=401)
self.mocked_method.return_value = MagicMock(status_code=401)
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamDeniedException):
fetch(url)
self.method(url)
def test_raises_forbidden(self):
self.mocked_get.return_value = MagicMock(status_code=403)
self.mocked_method.return_value = MagicMock(status_code=403)
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamForbiddenException):
fetch(url)
self.method(url)
def test_raises_timed_out(self):
self.mocked_get.return_value = MagicMock(status_code=408)
self.mocked_method.return_value = MagicMock(status_code=408)
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamTimeOutException):
fetch(url)
self.method(url)
def test_raises_stream_error_on_ssl_error(self):
self.mocked_get.side_effect = SSLError
self.mocked_method.side_effect = SSLError
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamException):
fetch(url)
self.method(url)
def test_raises_stream_error_on_connection_error(self):
self.mocked_get.side_effect = RequestConnectionError
self.mocked_method.side_effect = RequestConnectionError
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamConnectionError):
fetch(url)
with self.assertRaises(StreamConnectionException):
self.method(url)
def test_raises_stream_error_on_http_error(self):
self.mocked_get.side_effect = HTTPError
self.mocked_method.side_effect = HTTPError
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamException):
fetch(url)
self.method(url)
def test_raises_stream_error_on_request_exception(self):
self.mocked_get.side_effect = RequestException
self.mocked_method.side_effect = RequestException
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamException):
fetch(url)
self.method(url)
def test_raises_stream_error_on_too_many_redirects(self):
self.mocked_get.side_effect = TooManyRedirects
self.mocked_method.side_effect = TooManyRedirects
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamException):
fetch(url)
self.method(url)
def test_raises_stream_error_on_too_many_requests(self):
self.mocked_method.return_value = MagicMock(status_code=429)
url = "https://www.bbc.co.uk/news"
with self.assertRaises(StreamTooManyException):
self.method(url)
class FetchTestCase(HelperFunctionTestCase, TestCase):
def setUp(self):
self.patch = patch("newsreader.news.collection.utils.requests.get")
self.mocked_method = self.patch.start()
self.method = fetch
class PostTestCase(HelperFunctionTestCase, TestCase):
def setUp(self):
self.patch = patch("newsreader.news.collection.utils.requests.post")
self.mocked_method = self.patch.start()
self.method = post

View file

@ -0,0 +1,69 @@
from django.urls import reverse
from newsreader.accounts.tests.factories import UserFactory
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from newsreader.news.core.tests.factories import CategoryFactory
class CollectionRuleViewTestCase:
def setUp(self):
self.user = UserFactory(password="test")
self.client.force_login(self.user)
self.category = CategoryFactory(user=self.user)
self.form_data = {"name": "", "category": "", "url": "", "timezone": ""}
def test_simple(self):
response = self.client.get(self.url)
self.assertEquals(response.status_code, 200)
def test_no_category(self):
self.form_data.update(category="")
response = self.client.post(self.url, self.form_data)
self.assertEquals(response.status_code, 302)
rule = CollectionRule.objects.get()
self.assertEquals(rule.category, None)
def test_categories_only_from_user(self):
other_user = UserFactory()
other_categories = CategoryFactory.create_batch(size=4, user=other_user)
response = self.client.get(self.url)
for category in other_categories:
self.assertNotContains(response, category.name)
def test_category_of_other_user(self):
other_user = UserFactory()
other_rule = CollectionRuleFactory(name="other rule", user=other_user)
self.form_data.update(
name="new name",
category=other_rule.category,
url=other_rule.url,
timezone=other_rule.timezone,
)
other_url = reverse("news:collection:rule-update", args=[other_rule.pk])
response = self.client.post(other_url, self.form_data)
self.assertEquals(response.status_code, 404)
other_rule.refresh_from_db()
self.assertEquals(other_rule.name, "other rule")
def test_with_other_user_rules(self):
other_user = UserFactory()
other_categories = CategoryFactory.create_batch(size=4, user=other_user)
self.form_data.update(category=other_categories[2].pk)
response = self.client.post(self.url, self.form_data)
self.assertContains(response, "not one of the available choices")

View file

@ -4,7 +4,7 @@ from django.utils.translation import gettext_lazy as _
from newsreader.accounts.tests.factories import UserFactory
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from newsreader.news.collection.tests.factories import FeedFactory
class CollectionRuleBulkViewTestCase:
@ -21,9 +21,7 @@ class CollectionRuleBulkEnableViewTestCase(CollectionRuleBulkViewTestCase, TestC
self.url = reverse("news:collection:rules-enable")
self.rules = CollectionRuleFactory.create_batch(
size=5, user=self.user, enabled=False
)
self.rules = FeedFactory.create_batch(size=5, user=self.user, enabled=False)
def test_simple(self):
response = self.client.post(
@ -55,9 +53,7 @@ class CollectionRuleBulkEnableViewTestCase(CollectionRuleBulkViewTestCase, TestC
def test_rule_from_other_user(self):
other_user = UserFactory()
other_rules = CollectionRuleFactory.create_batch(
size=5, user=other_user, enabled=False
)
other_rules = FeedFactory.create_batch(size=5, user=other_user, enabled=False)
response = self.client.post(
self.url,
@ -100,9 +96,7 @@ class CollectionRuleBulkDisableViewTestCase(CollectionRuleBulkViewTestCase, Test
self.url = reverse("news:collection:rules-disable")
self.rules = CollectionRuleFactory.create_batch(
size=5, user=self.user, enabled=True
)
self.rules = FeedFactory.create_batch(size=5, user=self.user, enabled=True)
def test_simple(self):
response = self.client.post(
@ -134,9 +128,7 @@ class CollectionRuleBulkDisableViewTestCase(CollectionRuleBulkViewTestCase, Test
def test_rule_from_other_user(self):
other_user = UserFactory()
other_rules = CollectionRuleFactory.create_batch(
size=5, user=other_user, enabled=True
)
other_rules = FeedFactory.create_batch(size=5, user=other_user, enabled=True)
response = self.client.post(
self.url,
@ -179,7 +171,7 @@ class CollectionRuleBulkDeleteViewTestCase(CollectionRuleBulkViewTestCase, TestC
self.url = reverse("news:collection:rules-delete")
self.rules = CollectionRuleFactory.create_batch(size=5, user=self.user)
self.rules = FeedFactory.create_batch(size=5, user=self.user)
def test_simple(self):
response = self.client.post(
@ -207,9 +199,7 @@ class CollectionRuleBulkDeleteViewTestCase(CollectionRuleBulkViewTestCase, TestC
def test_rule_from_other_user(self):
other_user = UserFactory()
other_rules = CollectionRuleFactory.create_batch(
size=5, user=other_user, enabled=True
)
other_rules = FeedFactory.create_batch(size=5, user=other_user, enabled=True)
response = self.client.post(
self.url,

View file

@ -3,80 +3,18 @@ from django.urls import reverse
import pytz
from newsreader.accounts.tests.factories import UserFactory
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from newsreader.news.collection.tests.factories import FeedFactory
from newsreader.news.collection.tests.views.base import CollectionRuleViewTestCase
from newsreader.news.core.tests.factories import CategoryFactory
class CollectionRuleViewTestCase:
def setUp(self):
self.user = UserFactory(password="test")
self.client.force_login(self.user)
self.category = CategoryFactory(user=self.user)
self.form_data = {"name": "", "category": "", "url": "", "timezone": ""}
def test_simple(self):
response = self.client.get(self.url)
self.assertEquals(response.status_code, 200)
def test_no_category(self):
self.form_data.update(category="")
response = self.client.post(self.url, self.form_data)
self.assertEquals(response.status_code, 302)
rule = CollectionRule.objects.get()
self.assertEquals(rule.category, None)
def test_categories_only_from_user(self):
other_user = UserFactory()
other_categories = CategoryFactory.create_batch(size=4, user=other_user)
response = self.client.get(self.url)
for category in other_categories:
self.assertNotContains(response, category.name)
def test_category_of_other_user(self):
other_user = UserFactory()
other_rule = CollectionRuleFactory(name="other rule", user=other_user)
self.form_data.update(
name="new name",
category=other_rule.category,
url=other_rule.url,
timezone=other_rule.timezone,
)
other_url = reverse("rule-update", args=[other_rule.pk])
response = self.client.post(other_url, self.form_data)
self.assertEquals(response.status_code, 404)
other_rule.refresh_from_db()
self.assertEquals(other_rule.name, "other rule")
def test_with_other_user_rules(self):
other_user = UserFactory()
other_categories = CategoryFactory.create_batch(size=4, user=other_user)
self.form_data.update(category=other_categories[2].pk)
response = self.client.post(self.url, self.form_data)
self.assertContains(response, "not one of the available choices")
class CollectionRuleCreateViewTestCase(CollectionRuleViewTestCase, TestCase):
def setUp(self):
super().setUp()
self.url = reverse("rule-create")
self.url = reverse("news:collection:rule-create")
self.form_data.update(
name="new rule",
@ -92,6 +30,7 @@ class CollectionRuleCreateViewTestCase(CollectionRuleViewTestCase, TestCase):
rule = CollectionRule.objects.get(name="new rule")
self.assertEquals(rule.type, RuleTypeChoices.feed)
self.assertEquals(rule.url, "https://www.rss.com/rss")
self.assertEquals(rule.timezone, str(pytz.utc))
self.assertEquals(rule.favicon, None)
@ -103,10 +42,10 @@ class CollectionRuleUpdateViewTestCase(CollectionRuleViewTestCase, TestCase):
def setUp(self):
super().setUp()
self.rule = CollectionRuleFactory(
self.rule = FeedFactory(
name="collection rule", user=self.user, category=self.category
)
self.url = reverse("rule-update", args=[self.rule.pk])
self.url = reverse("news:collection:rule-update", kwargs={"pk": self.rule.pk})
self.form_data.update(
name=self.rule.name,
@ -146,3 +85,17 @@ class CollectionRuleUpdateViewTestCase(CollectionRuleViewTestCase, TestCase):
self.rule.refresh_from_db()
self.assertEquals(self.rule.category, None)
def test_rules_only(self):
rule = FeedFactory(
name="Python",
url="https://reddit.com/r/python",
user=self.user,
category=self.category,
type=RuleTypeChoices.subreddit,
)
url = reverse("news:collection:rule-update", kwargs={"pk": rule.pk})
response = self.client.get(url)
self.assertEquals(response.status_code, 404)

View file

@ -7,7 +7,7 @@ from django.utils.translation import gettext_lazy as _
from newsreader.accounts.tests.factories import UserFactory
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.tests.factories import CollectionRuleFactory
from newsreader.news.collection.tests.factories import FeedFactory
class OPMLImportTestCase(TestCase):
@ -16,7 +16,7 @@ class OPMLImportTestCase(TestCase):
self.client.force_login(self.user)
self.form_data = {"file": "", "skip_existing": False}
self.url = reverse("import")
self.url = reverse("news:collection:import")
def _get_file_path(self, name):
file_dir = os.path.join(settings.DJANGO_PROJECT_DIR, "utils", "tests", "files")
@ -30,22 +30,16 @@ class OPMLImportTestCase(TestCase):
response = self.client.post(self.url, self.form_data)
self.assertRedirects(response, reverse("rules"))
self.assertRedirects(response, reverse("news:collection:rules"))
rules = CollectionRule.objects.all()
self.assertEquals(len(rules), 4)
def test_existing_rules(self):
CollectionRuleFactory(
url="http://www.engadget.com/rss-full.xml", user=self.user
)
CollectionRuleFactory(url="https://news.ycombinator.com/rss", user=self.user)
CollectionRuleFactory(
url="http://feeds.feedburner.com/Techcrunch", user=self.user
)
CollectionRuleFactory(
url="http://feeds.feedburner.com/tweakers/nieuws", user=self.user
)
FeedFactory(url="http://www.engadget.com/rss-full.xml", user=self.user)
FeedFactory(url="https://news.ycombinator.com/rss", user=self.user)
FeedFactory(url="http://feeds.feedburner.com/Techcrunch", user=self.user)
FeedFactory(url="http://feeds.feedburner.com/tweakers/nieuws", user=self.user)
file_path = self._get_file_path("feeds.opml")
@ -54,22 +48,16 @@ class OPMLImportTestCase(TestCase):
response = self.client.post(self.url, self.form_data)
self.assertRedirects(response, reverse("rules"))
self.assertRedirects(response, reverse("news:collection:rules"))
rules = CollectionRule.objects.all()
self.assertEquals(len(rules), 8)
def test_skip_existing_rules(self):
CollectionRuleFactory(
url="http://www.engadget.com/rss-full.xml", user=self.user
)
CollectionRuleFactory(url="https://news.ycombinator.com/rss", user=self.user)
CollectionRuleFactory(
url="http://feeds.feedburner.com/Techcrunch", user=self.user
)
CollectionRuleFactory(
url="http://feeds.feedburner.com/tweakers/nieuws", user=self.user
)
FeedFactory(url="http://www.engadget.com/rss-full.xml", user=self.user)
FeedFactory(url="https://news.ycombinator.com/rss", user=self.user)
FeedFactory(url="http://feeds.feedburner.com/Techcrunch", user=self.user)
FeedFactory(url="http://feeds.feedburner.com/tweakers/nieuws", user=self.user)
file_path = self._get_file_path("feeds.opml")
@ -136,7 +124,7 @@ class OPMLImportTestCase(TestCase):
response = self.client.post(self.url, self.form_data)
self.assertRedirects(response, reverse("rules"))
self.assertRedirects(response, reverse("news:collection:rules"))
rules = CollectionRule.objects.all()
self.assertEquals(len(rules), 2)

View file

@ -0,0 +1,113 @@
from django.test import TestCase
from django.urls import reverse
import pytz
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.models import CollectionRule
from newsreader.news.collection.reddit import REDDIT_URL
from newsreader.news.collection.tests.factories import SubredditFactory
from newsreader.news.collection.tests.views.base import CollectionRuleViewTestCase
from newsreader.news.core.tests.factories import CategoryFactory
class SubRedditCreateViewTestCase(CollectionRuleViewTestCase, TestCase):
def setUp(self):
super().setUp()
self.form_data = {
"name": "new rule",
"url": "https://www.reddit.com/r/aww",
"category": str(self.category.pk),
}
self.url = reverse("news:collection:subreddit-create")
def test_creation(self):
response = self.client.post(self.url, self.form_data)
self.assertEquals(response.status_code, 302)
rule = CollectionRule.objects.get(name="new rule")
self.assertEquals(rule.type, RuleTypeChoices.subreddit)
self.assertEquals(rule.url, "https://www.reddit.com/r/aww.json")
self.assertEquals(rule.timezone, str(pytz.utc))
self.assertEquals(rule.favicon, None)
self.assertEquals(rule.category.pk, self.category.pk)
self.assertEquals(rule.user.pk, self.user.pk)
class SubRedditUpdateViewTestCase(CollectionRuleViewTestCase, TestCase):
def setUp(self):
super().setUp()
self.rule = SubredditFactory(
name="Python",
url=f"{REDDIT_URL}/r/python.json",
user=self.user,
category=self.category,
type=RuleTypeChoices.subreddit,
)
self.url = reverse(
"news:collection:subreddit-update", kwargs={"pk": self.rule.pk}
)
self.form_data = {
"name": self.rule.name,
"url": self.rule.url,
"category": str(self.category.pk),
"timezone": pytz.utc,
}
def test_name_change(self):
self.form_data.update(name="Python 2")
response = self.client.post(self.url, self.form_data)
self.assertEquals(response.status_code, 302)
self.rule.refresh_from_db()
self.assertEquals(self.rule.name, "Python 2")
def test_category_change(self):
new_category = CategoryFactory(user=self.user)
self.form_data.update(category=new_category.pk)
response = self.client.post(self.url, self.form_data)
self.assertEquals(response.status_code, 302)
self.rule.refresh_from_db()
self.assertEquals(self.rule.category.pk, new_category.pk)
def test_subreddit_rules_only(self):
rule = SubredditFactory(
name="Fake subreddit",
url="https://leddit.com/r/python",
user=self.user,
category=self.category,
type=RuleTypeChoices.feed,
)
url = reverse("news:collection:subreddit-update", kwargs={"pk": rule.pk})
response = self.client.get(url)
self.assertEquals(response.status_code, 404)
def test_url_change(self):
self.form_data.update(name="aww", url=f"{REDDIT_URL}/r/aww")
response = self.client.post(self.url, self.form_data)
self.assertEquals(response.status_code, 302)
rule = CollectionRule.objects.get(name="aww")
self.assertEquals(rule.type, RuleTypeChoices.subreddit)
self.assertEquals(rule.url, f"{REDDIT_URL}/r/aww.json")
self.assertEquals(rule.timezone, str(pytz.utc))
self.assertEquals(rule.favicon, None)
self.assertEquals(rule.category.pk, self.category.pk)
self.assertEquals(rule.user.pk, self.user.pk)

View file

@ -15,6 +15,8 @@ from newsreader.news.collection.views import (
CollectionRuleListView,
CollectionRuleUpdateView,
OPMLImportView,
SubRedditCreateView,
SubRedditUpdateView,
)
@ -52,5 +54,15 @@ urlpatterns = [
login_required(CollectionRuleBulkDisableView.as_view()),
name="rules-disable",
),
path(
"rules/subreddits/create/",
login_required(SubRedditCreateView.as_view()),
name="subreddit-create",
),
path(
"rules/subreddits/<int:pk>/",
login_required(SubRedditUpdateView.as_view()),
name="subreddit-update",
),
path("rules/import/", login_required(OPMLImportView.as_view()), name="import"),
]

View file

@ -1,5 +1,7 @@
from datetime import datetime
from django.db.models.fields import CharField, TextField
from django.template.defaultfilters import truncatechars
from django.utils import timezone
import pytz
@ -10,6 +12,9 @@ from requests.exceptions import RequestException
from newsreader.news.collection.response_handler import ResponseHandler
DEFAULT_HEADERS = {"User-Agent": "linux:rss.fudiggity.nl:v0.2"}
def build_publication_date(dt, tz):
try:
naive_datetime = datetime(*dt[:6])
@ -20,12 +25,46 @@ def build_publication_date(dt, tz):
return published_parsed.astimezone(pytz.utc)
def fetch(url):
def fetch(url, headers={}):
headers = {**DEFAULT_HEADERS, **headers}
with ResponseHandler() as response_handler:
try:
response = requests.get(url)
response = requests.get(url, headers=headers)
response_handler.handle_response(response)
except RequestException as exception:
response_handler.handle_exception(exception)
response_handler.map_exception(exception)
return response
def post(url, data=None, auth=None, headers={}):
headers = {**DEFAULT_HEADERS, **headers}
with ResponseHandler() as response_handler:
try:
response = requests.post(url, data=data, auth=auth, headers=headers)
response_handler.handle_response(response)
except RequestException as exception:
response_handler.map_exception(exception)
return response
def truncate_text(cls, field_name, value):
field = cls._meta.get_field(field_name)
max_length = field.max_length
field_cls = type(field)
is_charfield = bool(issubclass(field_cls, CharField))
is_textfield = bool(issubclass(field_cls, TextField))
if not value or not max_length:
return value
elif not is_charfield or is_textfield:
return value
if len(value) > max_length:
return truncatechars(value, max_length)
return value

View file

@ -0,0 +1,13 @@
from newsreader.news.collection.views.reddit import (
SubRedditCreateView,
SubRedditUpdateView,
)
from newsreader.news.collection.views.rules import (
CollectionRuleBulkDeleteView,
CollectionRuleBulkDisableView,
CollectionRuleBulkEnableView,
CollectionRuleCreateView,
CollectionRuleListView,
CollectionRuleUpdateView,
OPMLImportView,
)

View file

@ -0,0 +1,36 @@
from django.urls import reverse_lazy
import pytz
from newsreader.news.collection.forms import CollectionRuleForm
from newsreader.news.collection.models import CollectionRule
from newsreader.news.core.models import Category
class CollectionRuleViewMixin:
queryset = CollectionRule.objects.order_by("name")
def get_queryset(self):
user = self.request.user
return self.queryset.filter(user=user)
class CollectionRuleDetailMixin:
success_url = reverse_lazy("news:collection:rules")
form_class = CollectionRuleForm
def get_context_data(self, **kwargs):
context_data = super().get_context_data(**kwargs)
categories = Category.objects.filter(user=self.request.user).order_by("name")
timezones = [timezone for timezone in pytz.all_timezones]
context_data["categories"] = categories
context_data["timezones"] = timezones
return context_data
def get_form_kwargs(self):
kwargs = super().get_form_kwargs()
kwargs["user"] = self.request.user
return kwargs

View file

@ -0,0 +1,26 @@
from django.views.generic.edit import CreateView, UpdateView
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.forms import SubRedditRuleForm
from newsreader.news.collection.views.base import (
CollectionRuleDetailMixin,
CollectionRuleViewMixin,
)
class SubRedditCreateView(
CollectionRuleViewMixin, CollectionRuleDetailMixin, CreateView
):
form_class = SubRedditRuleForm
template_name = "news/collection/views/subreddit-create.html"
class SubRedditUpdateView(
CollectionRuleViewMixin, CollectionRuleDetailMixin, UpdateView
):
form_class = SubRedditRuleForm
template_name = "news/collection/views/subreddit-update.html"
def get_queryset(self):
queryset = super().get_queryset()
return queryset.filter(type=RuleTypeChoices.subreddit)

View file

@ -1,51 +1,20 @@
from django.contrib import messages
from django.shortcuts import redirect
from django.urls import reverse, reverse_lazy
from django.utils.translation import gettext_lazy as _
from django.urls import reverse
from django.utils.translation import gettext as _
from django.views.generic.edit import CreateView, FormView, UpdateView
from django.views.generic.list import ListView
import pytz
from newsreader.news.collection.forms import (
CollectionRuleBulkForm,
CollectionRuleForm,
OPMLImportForm,
)
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.forms import CollectionRuleBulkForm, OPMLImportForm
from newsreader.news.collection.models import CollectionRule
from newsreader.news.core.models import Category
from newsreader.news.collection.views.base import (
CollectionRuleDetailMixin,
CollectionRuleViewMixin,
)
from newsreader.utils.opml import parse_opml
class CollectionRuleViewMixin:
queryset = CollectionRule.objects.order_by("name")
def get_queryset(self):
user = self.request.user
return self.queryset.filter(user=user).order_by("name")
class CollectionRuleDetailMixin:
success_url = reverse_lazy("news:collection:rules")
form_class = CollectionRuleForm
def get_context_data(self, **kwargs):
context_data = super().get_context_data(**kwargs)
rules = Category.objects.filter(user=self.request.user).order_by("name")
timezones = [timezone for timezone in pytz.all_timezones]
context_data["categories"] = rules
context_data["timezones"] = timezones
return context_data
def get_form_kwargs(self):
kwargs = super().get_form_kwargs()
kwargs["user"] = self.request.user
return kwargs
class CollectionRuleListView(CollectionRuleViewMixin, ListView):
paginate_by = 50
template_name = "news/collection/views/rules.html"
@ -58,6 +27,10 @@ class CollectionRuleUpdateView(
template_name = "news/collection/views/rule-update.html"
context_object_name = "rule"
def get_queryset(self):
queryset = super().get_queryset()
return queryset.filter(type=RuleTypeChoices.feed)
class CollectionRuleCreateView(
CollectionRuleViewMixin, CollectionRuleDetailMixin, CreateView
@ -121,7 +94,6 @@ class CollectionRuleBulkDeleteView(CollectionRuleBulkView):
class OPMLImportView(FormView):
form_class = OPMLImportForm
success_url = reverse_lazy("news:collection:rules")
template_name = "news/collection/views/import.html"
def form_valid(self, form):
@ -145,3 +117,6 @@ class OPMLImportView(FormView):
messages.success(self.request, message)
return super().form_valid(form)
def get_success_url(self):
return reverse("news:collection:rules")

View file

@ -0,0 +1,17 @@
# Generated by Django 3.0.7 on 2020-07-06 21:12
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("core", "0006_auto_20200524_1218")]
operations = [
migrations.AlterField(
model_name="post",
name="body",
field=models.TextField(blank=True, default=""),
preserve_default=False,
)
]

View file

@ -8,4 +8,3 @@
background-color: $white;
}

View file

@ -43,4 +43,13 @@
background-color: lighten($button-blue, 5%);
}
}
&--reddit {
color: $white !important;
background-color: lighten($reddit-orange, 5%);
&:hover {
background-color: $reddit-orange;
}
}
}

View file

@ -1,11 +1,11 @@
#settings--page {
.settings-form__fieldset:last-child {
& span {
display: flex;
flex-direction: row;
& >:first-child {
margin: 0 5px;
.form {
&__section {
&--last {
& .fieldset {
gap: 15px;
justify-content: flex-start;
}
}
}
}

View file

@ -40,3 +40,5 @@ $white: rgba(255, 255, 255, 1);
$black: rgba(0, 0, 0, 1);
$blue: darken($azureish-white, +50%);
$dark: rgba(0, 0, 0, 0.4);
$reddit-orange: rgba(255, 69, 0, 1);