0.2.3 #99
13 changed files with 65 additions and 86 deletions
|
|
@ -1,8 +1,5 @@
|
|||
import json
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.contrib.auth.models import AbstractUser
|
||||
from django.contrib.auth.models import UserManager as DjangoUserManager
|
||||
from django.db import models
|
||||
|
|
@ -12,7 +9,7 @@ from django_celery_beat.models import IntervalSchedule, PeriodicTask
|
|||
|
||||
|
||||
class UserManager(DjangoUserManager):
|
||||
def _create_user(self, email, password, **extra_fields) -> get_user_model:
|
||||
def _create_user(self, email, password, **extra_fields):
|
||||
"""
|
||||
Create and save a user with the given username, email, and password.
|
||||
"""
|
||||
|
|
@ -24,14 +21,12 @@ class UserManager(DjangoUserManager):
|
|||
user.save(using=self._db)
|
||||
return user
|
||||
|
||||
def create_user(self, email: str, password=None, **extra_fields) -> get_user_model:
|
||||
def create_user(self, email, password=None, **extra_fields):
|
||||
extra_fields.setdefault("is_staff", False)
|
||||
extra_fields.setdefault("is_superuser", False)
|
||||
return self._create_user(email, password, **extra_fields)
|
||||
|
||||
def create_superuser(
|
||||
self, email: str, password: str, **extra_fields
|
||||
) -> get_user_model:
|
||||
def create_superuser(self, email, password, **extra_fields):
|
||||
extra_fields.setdefault("is_staff", True)
|
||||
extra_fields.setdefault("is_superuser", True)
|
||||
|
||||
|
|
@ -62,7 +57,7 @@ class User(AbstractUser):
|
|||
USERNAME_FIELD = "email"
|
||||
REQUIRED_FIELDS = []
|
||||
|
||||
def save(self, *args, **kwargs) -> None:
|
||||
def save(self, *args, **kwargs):
|
||||
super().save(*args, **kwargs)
|
||||
|
||||
if not self.task:
|
||||
|
|
@ -80,6 +75,6 @@ class User(AbstractUser):
|
|||
|
||||
self.save()
|
||||
|
||||
def delete(self, *args, **kwargs) -> Iterable:
|
||||
def delete(self, *args, **kwargs):
|
||||
self.task.delete()
|
||||
return super().delete(*args, **kwargs)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
from typing import ContextManager, Dict, Optional, Tuple
|
||||
|
||||
from django.db.models.query import QuerySet
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
|
@ -10,13 +8,13 @@ from newsreader.news.collection.utils import fetch
|
|||
|
||||
|
||||
class Stream:
|
||||
def __init__(self, rule: CollectionRule) -> None:
|
||||
def __init__(self, rule):
|
||||
self.rule = rule
|
||||
|
||||
def read(self) -> Tuple:
|
||||
def read(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def parse(self, payload: bytes) -> Dict:
|
||||
def parse(self, payload):
|
||||
raise NotImplementedError
|
||||
|
||||
class Meta:
|
||||
|
|
@ -26,16 +24,16 @@ class Stream:
|
|||
class Client:
|
||||
stream = Stream
|
||||
|
||||
def __init__(self, rules: Optional[CollectionRule] = None) -> None:
|
||||
def __init__(self, rules=None):
|
||||
self.rules = rules if rules else CollectionRule.objects.all()
|
||||
|
||||
def __enter__(self) -> ContextManager:
|
||||
def __enter__(self):
|
||||
for rule in self.rules:
|
||||
stream = self.stream(rule)
|
||||
|
||||
yield stream.read()
|
||||
|
||||
def __exit__(self, *args, **kwargs) -> None:
|
||||
def __exit__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
class Meta:
|
||||
|
|
@ -45,20 +43,20 @@ class Client:
|
|||
class Builder:
|
||||
instances = []
|
||||
|
||||
def __init__(self, stream: Tuple) -> None:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
|
||||
def __enter__(self) -> ContextManager:
|
||||
def __enter__(self):
|
||||
self.create_posts(self.stream)
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs) -> None:
|
||||
def __exit__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def create_posts(self, stream: Tuple) -> None:
|
||||
def create_posts(self, stream):
|
||||
pass
|
||||
|
||||
def save(self) -> None:
|
||||
def save(self):
|
||||
pass
|
||||
|
||||
class Meta:
|
||||
|
|
@ -69,13 +67,11 @@ class Collector:
|
|||
client = None
|
||||
builder = None
|
||||
|
||||
def __init__(
|
||||
self, client: Optional[Client] = None, builder: Optional[Builder] = None
|
||||
) -> None:
|
||||
def __init__(self, client=None, builder=None):
|
||||
self.client = client if client else self.client
|
||||
self.builder = builder if builder else self.builder
|
||||
|
||||
def collect(self, rules: Optional[QuerySet] = None) -> None:
|
||||
def collect(self, rules=None):
|
||||
with self.client(rules=rules) as client:
|
||||
for data, stream in client:
|
||||
with self.builder((data, stream)) as builder:
|
||||
|
|
@ -86,15 +82,15 @@ class Collector:
|
|||
|
||||
|
||||
class WebsiteStream(Stream):
|
||||
def __init__(self, url: str) -> None:
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
|
||||
def read(self) -> Tuple:
|
||||
def read(self):
|
||||
response = fetch(self.url)
|
||||
|
||||
return (self.parse(response.content), self)
|
||||
|
||||
def parse(self, payload: bytes) -> BeautifulSoup:
|
||||
def parse(self, payload):
|
||||
try:
|
||||
return BeautifulSoup(payload, "lxml")
|
||||
except TypeError:
|
||||
|
|
@ -102,10 +98,10 @@ class WebsiteStream(Stream):
|
|||
|
||||
|
||||
class URLBuilder(Builder):
|
||||
def __enter__(self) -> ContextManager:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def build(self) -> Tuple:
|
||||
def build(self):
|
||||
data, stream = self.stream
|
||||
rule = stream.rule
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
from django.db.models.query import QuerySet
|
||||
|
||||
from rest_framework import status
|
||||
from rest_framework.generics import (
|
||||
GenericAPIView,
|
||||
|
|
@ -24,7 +22,7 @@ class ListRuleView(ListCreateAPIView):
|
|||
serializer_class = RuleSerializer
|
||||
pagination_class = ResultSetPagination
|
||||
|
||||
def get_queryset(self) -> QuerySet:
|
||||
def get_queryset(self):
|
||||
user = self.request.user
|
||||
return self.queryset.filter(user=user).order_by("-created")
|
||||
|
||||
|
|
@ -41,7 +39,7 @@ class NestedRuleView(ListAPIView):
|
|||
pagination_class = LargeResultSetPagination
|
||||
filter_backends = [ReadFilter]
|
||||
|
||||
def get_queryset(self) -> QuerySet:
|
||||
def get_queryset(self):
|
||||
lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field
|
||||
|
||||
# Default permission is IsOwner, therefore there shouldn't have to be
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import ContextManager, List, Optional
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from newsreader.news.collection.base import (
|
||||
|
|
@ -18,7 +17,7 @@ LINK_RELS = ["icon", "shortcut icon", "apple-touch-icon", "apple-touch-icon-prec
|
|||
|
||||
|
||||
class FaviconBuilder(Builder):
|
||||
def build(self) -> None:
|
||||
def build(self):
|
||||
rule, soup = self.stream
|
||||
|
||||
url = self.parse(soup, rule.website_url)
|
||||
|
|
@ -27,7 +26,7 @@ class FaviconBuilder(Builder):
|
|||
rule.favicon = url
|
||||
rule.save()
|
||||
|
||||
def parse(self, soup, website_url) -> Optional[str]:
|
||||
def parse(self, soup, website_url):
|
||||
if not soup.head:
|
||||
return
|
||||
|
||||
|
|
@ -48,7 +47,7 @@ class FaviconBuilder(Builder):
|
|||
|
||||
return url
|
||||
|
||||
def parse_links(self, links: List) -> Optional[str]:
|
||||
def parse_links(self, links):
|
||||
favicons = set()
|
||||
icons = set()
|
||||
|
||||
|
|
@ -73,10 +72,10 @@ class FaviconBuilder(Builder):
|
|||
class FaviconClient(Client):
|
||||
stream = WebsiteStream
|
||||
|
||||
def __init__(self, streams: List) -> None:
|
||||
def __init__(self, streams):
|
||||
self.streams = streams
|
||||
|
||||
def __enter__(self) -> ContextManager:
|
||||
def __enter__(self):
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = {
|
||||
executor.submit(stream.read): rule for rule, stream in self.streams
|
||||
|
|
@ -97,7 +96,7 @@ class FaviconCollector(Collector):
|
|||
feed_client, favicon_client = (FeedClient, FaviconClient)
|
||||
url_builder, favicon_builder = (URLBuilder, FaviconBuilder)
|
||||
|
||||
def collect(self, rules: Optional[List] = None) -> None:
|
||||
def collect(self, rules=None):
|
||||
streams = []
|
||||
|
||||
with self.feed_client(rules=rules) as client:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import ContextManager, Dict, Generator, List, Optional, Tuple
|
||||
|
||||
from django.db.models.fields import CharField, TextField
|
||||
from django.template.defaultfilters import truncatechars
|
||||
|
|
@ -27,7 +26,7 @@ from newsreader.news.core.models import Post
|
|||
class FeedBuilder(Builder):
|
||||
instances = []
|
||||
|
||||
def __enter__(self) -> ContextManager:
|
||||
def __enter__(self):
|
||||
_, stream = self.stream
|
||||
self.instances = []
|
||||
self.existing_posts = {
|
||||
|
|
@ -36,7 +35,7 @@ class FeedBuilder(Builder):
|
|||
|
||||
return super().__enter__()
|
||||
|
||||
def create_posts(self, stream: Tuple) -> None:
|
||||
def create_posts(self, stream):
|
||||
data, stream = stream
|
||||
entries = []
|
||||
|
||||
|
|
@ -51,7 +50,7 @@ class FeedBuilder(Builder):
|
|||
|
||||
self.instances = [post for post in posts]
|
||||
|
||||
def build(self, entries: List, rule: CollectionRule) -> Generator[Post, None, None]:
|
||||
def build(self, entries, rule):
|
||||
field_mapping = {
|
||||
"id": "remote_identifier",
|
||||
"title": "title",
|
||||
|
|
@ -90,7 +89,7 @@ class FeedBuilder(Builder):
|
|||
|
||||
yield Post(**data)
|
||||
|
||||
def sanitize_fragment(self, fragment: str) -> Optional[str]:
|
||||
def sanitize_fragment(self, fragment):
|
||||
if not fragment:
|
||||
return ""
|
||||
|
||||
|
|
@ -117,23 +116,23 @@ class FeedBuilder(Builder):
|
|||
|
||||
return value
|
||||
|
||||
def get_content(self, items: List) -> str:
|
||||
def get_content(self, items):
|
||||
content = "\n ".join([item.get("value") for item in items])
|
||||
return self.sanitize_fragment(content)
|
||||
|
||||
def save(self) -> None:
|
||||
def save(self):
|
||||
for post in self.instances:
|
||||
post.save()
|
||||
|
||||
|
||||
class FeedStream(Stream):
|
||||
def read(self) -> Tuple:
|
||||
def read(self):
|
||||
url = self.rule.url
|
||||
response = fetch(url)
|
||||
|
||||
return (self.parse(response.content), self)
|
||||
|
||||
def parse(self, payload: bytes) -> Dict:
|
||||
def parse(self, payload):
|
||||
try:
|
||||
return parse(payload)
|
||||
except TypeError as e:
|
||||
|
|
@ -143,7 +142,7 @@ class FeedStream(Stream):
|
|||
class FeedClient(Client):
|
||||
stream = FeedStream
|
||||
|
||||
def __enter__(self) -> ContextManager:
|
||||
def __enter__(self):
|
||||
streams = [self.stream(rule) for rule in self.rules]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
|
|
@ -175,19 +174,19 @@ class FeedCollector(Collector):
|
|||
|
||||
|
||||
class FeedDuplicateHandler:
|
||||
def __init__(self, rule: CollectionRule) -> None:
|
||||
def __init__(self, rule):
|
||||
self.queryset = rule.posts.all()
|
||||
|
||||
def __enter__(self) -> ContextManager:
|
||||
def __enter__(self):
|
||||
self.existing_identifiers = self.queryset.filter(
|
||||
remote_identifier__isnull=False
|
||||
).values_list("remote_identifier", flat=True)
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs) -> None:
|
||||
def __exit__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def check(self, instances: List) -> Generator[Post, None, None]:
|
||||
def check(self, instances):
|
||||
for instance in instances:
|
||||
if instance.remote_identifier in self.existing_identifiers:
|
||||
existing_post = self.handle_duplicate(instance)
|
||||
|
|
@ -200,7 +199,7 @@ class FeedDuplicateHandler:
|
|||
|
||||
yield instance
|
||||
|
||||
def in_database(self, post: Post) -> Optional[bool]:
|
||||
def in_database(self, post):
|
||||
values = {
|
||||
"url": post.url,
|
||||
"title": post.title,
|
||||
|
|
@ -212,7 +211,7 @@ class FeedDuplicateHandler:
|
|||
if self.is_duplicate(existing_post, values):
|
||||
return True
|
||||
|
||||
def is_duplicate(self, existing_post: Post, values: Dict) -> bool:
|
||||
def is_duplicate(self, existing_post, values):
|
||||
for key, value in values.items():
|
||||
existing_value = getattr(existing_post, key, None)
|
||||
if existing_value != value:
|
||||
|
|
@ -220,7 +219,7 @@ class FeedDuplicateHandler:
|
|||
|
||||
return True
|
||||
|
||||
def handle_duplicate(self, instance: Post) -> Optional[Post]:
|
||||
def handle_duplicate(self, instance):
|
||||
try:
|
||||
existing_instance = self.queryset.get(
|
||||
remote_identifier=instance.remote_identifier
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ class CollectionRuleForm(forms.ModelForm):
|
|||
choices=((timezone, timezone) for timezone in pytz.all_timezones),
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.user = kwargs.pop("user")
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
|
|
@ -21,7 +21,7 @@ class CollectionRuleForm(forms.ModelForm):
|
|||
if self.user:
|
||||
self.fields["category"].queryset = Category.objects.filter(user=self.user)
|
||||
|
||||
def save(self, commit=True) -> CollectionRule:
|
||||
def save(self, commit=True):
|
||||
instance = super().save(commit=False)
|
||||
instance.user = self.user
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
from typing import ContextManager
|
||||
|
||||
from requests.exceptions import ConnectionError as RequestConnectionError
|
||||
|
||||
from newsreader.news.collection.exceptions import (
|
||||
|
|
@ -22,10 +20,10 @@ class ResponseHandler:
|
|||
|
||||
exception_mapping = {RequestConnectionError: StreamConnectionError}
|
||||
|
||||
def __enter__(self) -> ContextManager:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def handle_response(self, response) -> None:
|
||||
def handle_response(self, response):
|
||||
status_code = response.status_code
|
||||
|
||||
if status_code in self.status_code_mapping:
|
||||
|
|
@ -40,5 +38,5 @@ class ResponseHandler:
|
|||
message = getattr(exception, "message", str(exception))
|
||||
raise stream_exception(message=message) from exception
|
||||
|
||||
def __exit__(self, *args, **kwargs) -> None:
|
||||
def __exit__(self, *args, **kwargs):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -1,6 +1,4 @@
|
|||
from datetime import datetime, tzinfo
|
||||
from time import struct_time
|
||||
from typing import Tuple
|
||||
from datetime import datetime
|
||||
|
||||
from django.utils import timezone
|
||||
|
||||
|
|
@ -12,7 +10,7 @@ from requests.models import Response
|
|||
from newsreader.news.collection.response_handler import ResponseHandler
|
||||
|
||||
|
||||
def build_publication_date(dt: struct_time, tz: tzinfo) -> Tuple:
|
||||
def build_publication_date(dt, tz):
|
||||
try:
|
||||
naive_datetime = datetime(*dt[:6])
|
||||
published_parsed = timezone.make_aware(naive_datetime, timezone=tz)
|
||||
|
|
@ -21,7 +19,7 @@ def build_publication_date(dt: struct_time, tz: tzinfo) -> Tuple:
|
|||
return published_parsed, True
|
||||
|
||||
|
||||
def fetch(url: str) -> Response:
|
||||
def fetch(url):
|
||||
with ResponseHandler() as response_handler:
|
||||
try:
|
||||
response = requests.get(url)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
from typing import Dict, Iterable
|
||||
|
||||
from django.contrib import messages
|
||||
from django.urls import reverse_lazy
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
|
@ -17,7 +15,7 @@ from newsreader.utils.opml import parse_opml
|
|||
class CollectionRuleViewMixin:
|
||||
queryset = CollectionRule.objects.order_by("name")
|
||||
|
||||
def get_queryset(self) -> Iterable:
|
||||
def get_queryset(self):
|
||||
user = self.request.user
|
||||
return self.queryset.filter(user=user)
|
||||
|
||||
|
|
@ -26,7 +24,7 @@ class CollectionRuleDetailMixin:
|
|||
success_url = reverse_lazy("rules")
|
||||
form_class = CollectionRuleForm
|
||||
|
||||
def get_context_data(self, **kwargs) -> Dict:
|
||||
def get_context_data(self, **kwargs):
|
||||
context_data = super().get_context_data(**kwargs)
|
||||
|
||||
rules = Category.objects.filter(user=self.request.user).order_by("name")
|
||||
|
|
@ -37,7 +35,7 @@ class CollectionRuleDetailMixin:
|
|||
|
||||
return context_data
|
||||
|
||||
def get_form_kwargs(self) -> Dict:
|
||||
def get_form_kwargs(self):
|
||||
kwargs = super().get_form_kwargs()
|
||||
kwargs["user"] = self.request.user
|
||||
return kwargs
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from django.db.models import Q
|
||||
from django.db.models.query import QuerySet
|
||||
|
||||
from rest_framework import status
|
||||
from rest_framework.generics import (
|
||||
|
|
@ -63,7 +62,7 @@ class NestedRuleCategoryView(ListAPIView):
|
|||
queryset = Category.objects.prefetch_related("rules").all()
|
||||
serializer_class = RuleSerializer
|
||||
|
||||
def get_queryset(self) -> QuerySet:
|
||||
def get_queryset(self):
|
||||
lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field
|
||||
|
||||
# Default permission is IsOwner, therefore there shouldn't have to be
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class CategoryForm(forms.ModelForm):
|
|||
|
||||
self.initial["user"] = self.user
|
||||
|
||||
def save(self, commit=True) -> Category:
|
||||
def save(self, commit=True):
|
||||
instance = super().save(commit=False)
|
||||
|
||||
if commit:
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from newsreadern.news.collection.serializers import RuleSerializer
|
||||
from rest_framework import serializers
|
||||
|
||||
from newsreader.news.posts.models import Category, Post
|
||||
|
|
@ -8,7 +9,7 @@ class CategorySerializer(serializers.ModelSerializer):
|
|||
|
||||
def get_rules(self, instance):
|
||||
rules = instance.rules.order_by("-modified", "-created")
|
||||
serializer = CollectionRuleSerializer(rules, many=True)
|
||||
serializer = RuleSerializer(rules, many=True)
|
||||
return serializer.data
|
||||
|
||||
class Meta:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
from typing import Dict, Iterable
|
||||
|
||||
from django.urls import reverse_lazy
|
||||
from django.views.generic.base import TemplateView
|
||||
from django.views.generic.edit import CreateView, UpdateView
|
||||
|
|
@ -14,7 +12,7 @@ class NewsView(TemplateView):
|
|||
template_name = "core/homepage.html"
|
||||
|
||||
# TODO serialize objects to show filled main page
|
||||
def get_context_data(self, **kwargs) -> Dict:
|
||||
def get_context_data(self, **kwargs):
|
||||
context = super().get_context_data(**kwargs)
|
||||
user = self.request.user
|
||||
|
||||
|
|
@ -35,7 +33,7 @@ class NewsView(TemplateView):
|
|||
class CategoryViewMixin:
|
||||
queryset = Category.objects.prefetch_related("rules").order_by("name")
|
||||
|
||||
def get_queryset(self) -> Iterable:
|
||||
def get_queryset(self):
|
||||
user = self.request.user
|
||||
return self.queryset.filter(user=user)
|
||||
|
||||
|
|
@ -44,7 +42,7 @@ class CategoryDetailMixin:
|
|||
success_url = reverse_lazy("categories")
|
||||
form_class = CategoryForm
|
||||
|
||||
def get_context_data(self, **kwargs) -> Dict:
|
||||
def get_context_data(self, **kwargs):
|
||||
context_data = super().get_context_data(**kwargs)
|
||||
|
||||
rules = CollectionRule.objects.filter(user=self.request.user).order_by("name")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue