This is clickbait
+This is clickbait
+diff --git a/.gitignore b/.gitignore index 8cab59f..8d9e86a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ *.pyc __pycache__/ local_settings.py +local.py db.sqlite3 media diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..e453b8d --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,7 @@ +[settings] +include_trailing_comma = true +line_length = 80 +multi_line_output = 3 +skip = env/ +forced_separate=django, newsreader +lines_between_types=1 diff --git a/requirements/base.txt b/requirements/base.txt new file mode 100644 index 0000000..0f82f61 --- /dev/null +++ b/requirements/base.txt @@ -0,0 +1,12 @@ +certifi==2019.3.9 +chardet==3.0.4 +Django==2.2 +feedparser==5.2.1 +idna==2.8 +pkg-resources==0.0.0 +pytz==2018.9 +requests==2.21.0 +sqlparse==0.3.0 +urllib3==1.24.1 +psycopg2-binary==2.8.1 +Pillow==6.0.0 diff --git a/requirements/dev.txt b/requirements/dev.txt new file mode 100644 index 0000000..0a685e3 --- /dev/null +++ b/requirements/dev.txt @@ -0,0 +1,4 @@ +-r base.txt + +factory-boy==2.12.0 +freezegun==0.3.12 diff --git a/src/manage.py b/src/manage.py new file mode 100755 index 0000000..a30fa10 --- /dev/null +++ b/src/manage.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'newsreader.conf.base') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/src/newsreader/__init__.py b/src/newsreader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/conf/__init__.py b/src/newsreader/conf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/conf/base.py b/src/newsreader/conf/base.py new file mode 100644 index 0000000..4b9f9df --- /dev/null +++ b/src/newsreader/conf/base.py @@ -0,0 +1,111 @@ +""" +Django settings for newsreader project. + +Generated by "django-admin startproject" using Django 2.2. + +For more information on this file, see +https://docs.djangoproject.com/en/2.2/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/2.2/ref/settings/ +""" + +import os + +# Build paths inside the project like this: os.path.join(BASE_DIR, ...) +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/2.2/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = "^!7a2jq5j!exc-55vf$anx9^6ff6=u_ub5=5p1(1x47fix)syh" + +# SECURITY WARNING: don"t run with debug turned on in production! +DEBUG = False + +ALLOWED_HOSTS = ["127.0.0.1"] + +# Application definition +INSTALLED_APPS = [ + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + # app modules + "newsreader.news.collection", + "newsreader.news.posts", +] + +MIDDLEWARE = [ + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", +] + +ROOT_URLCONF = "newsreader.urls" + +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + }, + }, +] + +WSGI_APPLICATION = "newsreader.wsgi.application" + +# Database +# https://docs.djangoproject.com/en/2.2/ref/settings/#databases +DATABASES = { + "default": { + "ENGINE": "django.db.backends.postgresql_psycopg2", + "NAME": "newsreader", + "USER": "newsreader", + } +} + +# Password validation +# https://docs.djangoproject.com/en/2.2/ref/settings/#auth-password-validators +AUTH_PASSWORD_VALIDATORS = [ + { + "NAME": + "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", + }, +] + +# Internationalization +# https://docs.djangoproject.com/en/2.2/topics/i18n/ +LANGUAGE_CODE = "en-us" + +TIME_ZONE = "UTC" +USE_I18N = True +USE_L10N = True +USE_TZ = True + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/2.2/howto/static-files/ +STATIC_URL = "/static/" diff --git a/src/newsreader/conf/dev.py b/src/newsreader/conf/dev.py new file mode 100644 index 0000000..8b5de69 --- /dev/null +++ b/src/newsreader/conf/dev.py @@ -0,0 +1,12 @@ +from .base import * + +# Development settings + +DEBUG = True + +EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend" + +try: + from .local import * +except ImportError: + pass diff --git a/src/newsreader/core/__init__.py b/src/newsreader/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/core/admin.py b/src/newsreader/core/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/src/newsreader/core/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/src/newsreader/core/apps.py b/src/newsreader/core/apps.py new file mode 100644 index 0000000..26f78a8 --- /dev/null +++ b/src/newsreader/core/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class CoreConfig(AppConfig): + name = 'core' diff --git a/src/newsreader/core/migrations/__init__.py b/src/newsreader/core/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/core/models.py b/src/newsreader/core/models.py new file mode 100644 index 0000000..4bd2e28 --- /dev/null +++ b/src/newsreader/core/models.py @@ -0,0 +1,13 @@ +from django.db import models + + +class TimeStampedModel(models.Model): + """ + An abstract base class model that provides self- + updating ``created`` and ``modified`` fields. + """ + created = models.DateTimeField(auto_now_add=True) + modified = models.DateTimeField(auto_now=True) + + class Meta: + abstract = True diff --git a/src/newsreader/core/tests.py b/src/newsreader/core/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/src/newsreader/core/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/src/newsreader/core/views.py b/src/newsreader/core/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/src/newsreader/core/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/src/newsreader/news/__init__.py b/src/newsreader/news/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/news/collection/__init__.py b/src/newsreader/news/collection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/news/collection/admin.py b/src/newsreader/news/collection/admin.py new file mode 100644 index 0000000..972b020 --- /dev/null +++ b/src/newsreader/news/collection/admin.py @@ -0,0 +1,23 @@ +from django.contrib import admin + +from newsreader.news.collection.models import CollectionRule + + +class CollectionRuleAdmin(admin.ModelAdmin): + fields = ( + "url", + "name", + "timezone", + "category", + ) + + list_display = ( + "name", + "category", + "url", + "last_suceeded", + "succeeded", + ) + + +admin.site.register(CollectionRule, CollectionRuleAdmin) diff --git a/src/newsreader/news/collection/apps.py b/src/newsreader/news/collection/apps.py new file mode 100644 index 0000000..1454371 --- /dev/null +++ b/src/newsreader/news/collection/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class CollectionConfig(AppConfig): + name = 'collection' diff --git a/src/newsreader/news/collection/base.py b/src/newsreader/news/collection/base.py new file mode 100644 index 0000000..58cd9c4 --- /dev/null +++ b/src/newsreader/news/collection/base.py @@ -0,0 +1,81 @@ +import requests + +from django.utils import timezone + +from newsreader.news.collection.models import CollectionRule + + +class Builder: + instances = [] + + def __init__(self, stream): + self.stream = stream + + def __enter__(self): + self.create_posts(self.stream) + return self + + def __exit__(self, *args, **kwargs): + pass + + def create_posts(self, stream): + pass + + def save(self): + pass + + class Meta: + abstract = True + + +class Collector: + client = None + builder = None + + def __init__(self, client=None, builder=None): + self.client = client if client else self.client + self.builder = builder if builder else self.builder + + def collect(self, rules=None): + with self.client(rules=rules) as client: + for data, stream in client: + with self.builder((data, stream)) as builder: + builder.save() + + class Meta: + abstract = True + + +class Stream: + def __init__(self, rule): + self.rule = rule + + def read(self): + url = self.rule.url + response = requests.get(url) + return (self.parse(response.content), self) + + def parse(self, payload): + raise NotImplementedError + + class Meta: + abstract = True + + +class Client: + stream = Stream + + def __init__(self, rules=None): + self.rules = rules if rules else CollectionRule.objects.all() + + def __enter__(self): + for rule in self.rules: + stream = self.stream(rule) + + yield stream.read() + + def __exit__(self, *args, **kwargs): + pass + + class Meta: + abstract = True diff --git a/src/newsreader/news/collection/exceptions.py b/src/newsreader/news/collection/exceptions.py new file mode 100644 index 0000000..8e12da1 --- /dev/null +++ b/src/newsreader/news/collection/exceptions.py @@ -0,0 +1,28 @@ +class StreamException(Exception): + message = "Stream exception" + + def __init__(self, message=None): + self.message = message if message else self.message + + def __str__(self): + return self.message + + +class StreamNotFoundException(StreamException): + message = "Stream not found" + + +class StreamDeniedException(StreamException): + message = "Stream does not have sufficient permissions" + + +class StreamTimeOutException(StreamException): + message = "Stream timed out" + + +class StreamForbiddenException(StreamException): + message = "Stream forbidden" + + +class StreamParseException(StreamException): + message = "Stream could not be parsed" diff --git a/src/newsreader/news/collection/feed.py b/src/newsreader/news/collection/feed.py new file mode 100644 index 0000000..fe4e2cf --- /dev/null +++ b/src/newsreader/news/collection/feed.py @@ -0,0 +1,211 @@ +from concurrent.futures import ThreadPoolExecutor, as_completed + +import bleach +import pytz +import requests + +from feedparser import parse + +from django.utils import timezone + +from newsreader.news.collection.base import Builder, Client, Collector, Stream +from newsreader.news.collection.exceptions import ( + StreamDeniedException, + StreamException, + StreamNotFoundException, + StreamParseException, + StreamTimeOutException, +) +from newsreader.news.collection.response_handler import ResponseHandler +from newsreader.news.collection.utils import build_publication_date +from newsreader.news.posts.models import Post + + +class FeedBuilder(Builder): + instances = [] + + def __enter__(self): + _, stream = self.stream + self.instances = [] + self.existing_posts = { + post.remote_identifier: post + for post in Post.objects.filter(rule=stream.rule) + } + + return super().__enter__() + + def create_posts(self, stream): + data, stream = stream + entries = [] + + with FeedDuplicateHandler(stream.rule) as duplicate_handler: + try: + entries = data["entries"] + except KeyError: + pass + + instances = self.build(entries, stream.rule) + posts = duplicate_handler.check(instances) + + self.instances = [post for post in posts] + + def build(self, entries, rule): + field_mapping = { + "id": "remote_identifier", + "title": "title", + "summary": "body", + "link": "url", + "published_parsed": "publication_date", + "author": "author" + } + + tz = pytz.timezone(rule.timezone) + + for entry in entries: + data = { + "rule_id": rule.pk, + "category": rule.category + } + + for field, value in field_mapping.items(): + if field in entry: + if field == "published_parsed": + created, aware_datetime = build_publication_date( + entry[field], tz + ) + data[value] = aware_datetime if created else None + elif field == "summary": + summary = self.sanitize_summary(entry[field]) + data[value] = summary + else: + data[value] = entry[field] + + yield Post(**data) + + def sanitize_summary(self, summary): + attrs = {"a": ["href", "rel"], "img": ["alt", "src"],} + tags = ["a", "img", "p"] + + return bleach.clean(summary, tags=tags, attributes=attrs) if summary else None + + def save(self): + for post in self.instances: + post.save() + + +class FeedStream(Stream): + def read(self): + url = self.rule.url + response = requests.get(url) + + with ResponseHandler(response) as response_handler: + response_handler.handle_response() + + return (self.parse(response.content), self) + + def parse(self, payload): + try: + return parse(payload) + except TypeError as e: + raise StreamParseException("Could not parse feed") from e + + +class FeedClient(Client): + stream = FeedStream + + def __enter__(self): + streams = [self.stream(rule) for rule in self.rules] + + with ThreadPoolExecutor(max_workers=10) as executor: + futures = { + executor.submit(stream.read): stream + for stream in streams + } + + for future in as_completed(futures): + stream = futures[future] + + try: + response_data = future.result() + + stream.rule.error = None + stream.rule.succeeded = True + stream.rule.last_suceeded = timezone.now() + + yield response_data + except StreamException as e: + stream.rule.error = e.message + stream.rule.succeeded = False + + yield ({"entries": []}, stream) + finally: + stream.rule.save() + + +class FeedCollector(Collector): + builder = FeedBuilder + client = FeedClient + + +class FeedDuplicateHandler: + def __init__(self, rule): + self.queryset = rule.post_set.all() + + def __enter__(self): + self.existing_identifiers = self.queryset.filter(remote_identifier__isnull=False).values_list( + "remote_identifier", flat=True + ) + return self + + def __exit__(self, *args, **kwargs): + pass + + def check(self, instances): + for instance in instances: + if instance.remote_identifier in self.existing_identifiers: + existing_post = self.handle_duplicate(instance) + + if existing_post: + yield existing_post + continue + elif not instance.remote_identifier and self.in_database(instance): + continue + + yield instance + + def in_database(self, entry): + values = { + "url": entry.url, + "title": entry.title, + "body": entry.body, + "publication_date": entry.publication_date + } + + for existing_entry in self.queryset.order_by("-publication_date")[:50]: + if self.is_duplicate(existing_entry, values): + return True + + def is_duplicate(self, existing_entry, values): + for key, value in values.items(): + existing_value = getattr(existing_entry, key, object()) + if existing_value != value: + return False + + return True + + def handle_duplicate(self, instance): + try: + existing_instance = self.queryset.get( + remote_identifier=instance.remote_identifier, + ) + except ObjectDoesNotExist: + return + + for field in instance._meta.get_fields(): + getattr(existing_instance, field.name, object()) + new_value = getattr(instance, field.name, object()) + + if new_value and field.name != "id": + setattr(existing_instance, field.name, new_value) + + return existing_instance diff --git a/src/newsreader/news/collection/management/commands/collect.py b/src/newsreader/news/collection/management/commands/collect.py new file mode 100644 index 0000000..c72301f --- /dev/null +++ b/src/newsreader/news/collection/management/commands/collect.py @@ -0,0 +1,14 @@ +from django.core.management.base import BaseCommand, CommandError + +from newsreader.news.collection.feed import FeedCollector +from newsreader.news.collection.models import CollectionRule + + +class Command(BaseCommand): + help = 'Collects Atom/RSS feeds' + + def handle(self, *args, **options): + CollectionRule.objects.all() + + collector = FeedCollector() + collector.collect() diff --git a/src/newsreader/news/collection/migrations/0001_initial.py b/src/newsreader/news/collection/migrations/0001_initial.py new file mode 100644 index 0000000..354a97f --- /dev/null +++ b/src/newsreader/news/collection/migrations/0001_initial.py @@ -0,0 +1,31 @@ +# Generated by Django 2.2 on 2019-04-10 20:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name='CollectionRule', + fields=[ + ( + 'id', + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name='ID' + ) + ), + ('name', models.CharField(max_length=100)), + ('url', models.URLField()), + ('last_suceeded', models.DateTimeField()), + ('succeeded', models.BooleanField(default=False)), + ], + ), + ] diff --git a/src/newsreader/news/collection/migrations/0002_auto_20190410_2028.py b/src/newsreader/news/collection/migrations/0002_auto_20190410_2028.py new file mode 100644 index 0000000..9c0807e --- /dev/null +++ b/src/newsreader/news/collection/migrations/0002_auto_20190410_2028.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2 on 2019-04-10 20:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('collection', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='collectionrule', + name='last_suceeded', + field=models.DateTimeField(blank=True, null=True), + ), + ] diff --git a/src/newsreader/news/collection/migrations/0003_collectionrule_category.py b/src/newsreader/news/collection/migrations/0003_collectionrule_category.py new file mode 100644 index 0000000..4c3f267 --- /dev/null +++ b/src/newsreader/news/collection/migrations/0003_collectionrule_category.py @@ -0,0 +1,28 @@ +# Generated by Django 2.2 on 2019-05-20 20:06 + +import django.db.models.deletion + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('posts', '0002_auto_20190520_2206'), + ('collection', '0002_auto_20190410_2028'), + ] + + operations = [ + migrations.AddField( + model_name='collectionrule', + name='category', + field=models.ForeignKey( + blank=True, + help_text='Posts from this rule will be tagged with this category', + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to='posts.Category', + verbose_name='Category' + ), + ), + ] diff --git a/src/newsreader/news/collection/migrations/0004_collectionrule_timezone.py b/src/newsreader/news/collection/migrations/0004_collectionrule_timezone.py new file mode 100644 index 0000000..e5943dc --- /dev/null +++ b/src/newsreader/news/collection/migrations/0004_collectionrule_timezone.py @@ -0,0 +1,517 @@ +# Generated by Django 2.2 on 2019-05-20 20:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('collection', '0003_collectionrule_category'), + ] + + operations = [ + migrations.AddField( + model_name='collectionrule', + name='timezone', + field=models.CharField( + choices=[ + ('Africa/Abidjan', 'Africa/Abidjan'), + ('Africa/Accra', 'Africa/Accra'), + ('Africa/Addis_Ababa', 'Africa/Addis_Ababa'), + ('Africa/Algiers', 'Africa/Algiers'), + ('Africa/Asmara', + 'Africa/Asmara'), ('Africa/Asmera', 'Africa/Asmera'), + ('Africa/Bamako', + 'Africa/Bamako'), ('Africa/Bangui', 'Africa/Bangui'), + ('Africa/Banjul', 'Africa/Banjul'), + ('Africa/Bissau', 'Africa/Bissau'), + ('Africa/Blantyre', 'Africa/Blantyre'), + ('Africa/Brazzaville', 'Africa/Brazzaville'), + ('Africa/Bujumbura', 'Africa/Bujumbura'), + ('Africa/Cairo', 'Africa/Cairo'), + ('Africa/Casablanca', 'Africa/Casablanca'), + ('Africa/Ceuta', + 'Africa/Ceuta'), ('Africa/Conakry', 'Africa/Conakry'), + ('Africa/Dakar', 'Africa/Dakar'), + ('Africa/Dar_es_Salaam', 'Africa/Dar_es_Salaam'), + ('Africa/Djibouti', 'Africa/Djibouti'), + ('Africa/Douala', 'Africa/Douala'), + ('Africa/El_Aaiun', 'Africa/El_Aaiun'), + ('Africa/Freetown', 'Africa/Freetown'), + ('Africa/Gaborone', 'Africa/Gaborone'), + ('Africa/Harare', 'Africa/Harare'), + ('Africa/Johannesburg', 'Africa/Johannesburg'), + ('Africa/Juba', 'Africa/Juba'), ('Africa/Kampala', 'Africa/Kampala'), + ('Africa/Khartoum', 'Africa/Khartoum'), + ('Africa/Kigali', 'Africa/Kigali'), + ('Africa/Kinshasa', 'Africa/Kinshasa'), + ('Africa/Lagos', 'Africa/Lagos'), + ('Africa/Libreville', 'Africa/Libreville'), + ('Africa/Lome', 'Africa/Lome'), ('Africa/Luanda', 'Africa/Luanda'), + ('Africa/Lubumbashi', 'Africa/Lubumbashi'), + ('Africa/Lusaka', + 'Africa/Lusaka'), ('Africa/Malabo', 'Africa/Malabo'), + ('Africa/Maputo', 'Africa/Maputo'), + ('Africa/Maseru', 'Africa/Maseru'), + ('Africa/Mbabane', 'Africa/Mbabane'), + ('Africa/Mogadishu', 'Africa/Mogadishu'), + ('Africa/Monrovia', 'Africa/Monrovia'), + ('Africa/Nairobi', 'Africa/Nairobi'), + ('Africa/Ndjamena', 'Africa/Ndjamena'), + ('Africa/Niamey', 'Africa/Niamey'), + ('Africa/Nouakchott', 'Africa/Nouakchott'), + ('Africa/Ouagadougou', 'Africa/Ouagadougou'), + ('Africa/Porto-Novo', 'Africa/Porto-Novo'), + ('Africa/Sao_Tome', 'Africa/Sao_Tome'), + ('Africa/Timbuktu', 'Africa/Timbuktu'), + ('Africa/Tripoli', 'Africa/Tripoli'), + ('Africa/Tunis', 'Africa/Tunis'), + ('Africa/Windhoek', 'Africa/Windhoek'), + ('America/Adak', 'America/Adak'), + ('America/Anchorage', 'America/Anchorage'), + ('America/Anguilla', 'America/Anguilla'), + ('America/Antigua', 'America/Antigua'), + ('America/Araguaina', 'America/Araguaina'), + ('America/Argentina/Buenos_Aires', 'America/Argentina/Buenos_Aires'), + ('America/Argentina/Catamarca', 'America/Argentina/Catamarca'), + ( + 'America/Argentina/ComodRivadavia', + 'America/Argentina/ComodRivadavia' + ), ('America/Argentina/Cordoba', 'America/Argentina/Cordoba'), + ('America/Argentina/Jujuy', 'America/Argentina/Jujuy'), + ('America/Argentina/La_Rioja', 'America/Argentina/La_Rioja'), + ('America/Argentina/Mendoza', 'America/Argentina/Mendoza'), + ('America/Argentina/Rio_Gallegos', 'America/Argentina/Rio_Gallegos'), + ('America/Argentina/Salta', 'America/Argentina/Salta'), + ('America/Argentina/San_Juan', 'America/Argentina/San_Juan'), + ('America/Argentina/San_Luis', 'America/Argentina/San_Luis'), + ('America/Argentina/Tucuman', 'America/Argentina/Tucuman'), + ('America/Argentina/Ushuaia', 'America/Argentina/Ushuaia'), + ('America/Aruba', 'America/Aruba'), + ('America/Asuncion', 'America/Asuncion'), + ('America/Atikokan', 'America/Atikokan'), + ('America/Atka', 'America/Atka'), ('America/Bahia', 'America/Bahia'), + ('America/Bahia_Banderas', 'America/Bahia_Banderas'), + ('America/Barbados', 'America/Barbados'), + ('America/Belem', 'America/Belem'), + ('America/Belize', 'America/Belize'), + ('America/Blanc-Sablon', 'America/Blanc-Sablon'), + ('America/Boa_Vista', 'America/Boa_Vista'), + ('America/Bogota', 'America/Bogota'), + ('America/Boise', 'America/Boise'), + ('America/Buenos_Aires', 'America/Buenos_Aires'), + ('America/Cambridge_Bay', 'America/Cambridge_Bay'), + ('America/Campo_Grande', 'America/Campo_Grande'), + ('America/Cancun', 'America/Cancun'), + ('America/Caracas', 'America/Caracas'), + ('America/Catamarca', 'America/Catamarca'), + ('America/Cayenne', 'America/Cayenne'), + ('America/Cayman', 'America/Cayman'), + ('America/Chicago', 'America/Chicago'), + ('America/Chihuahua', 'America/Chihuahua'), + ('America/Coral_Harbour', 'America/Coral_Harbour'), + ('America/Cordoba', 'America/Cordoba'), + ('America/Costa_Rica', 'America/Costa_Rica'), + ('America/Creston', 'America/Creston'), + ('America/Cuiaba', 'America/Cuiaba'), + ('America/Curacao', 'America/Curacao'), + ('America/Danmarkshavn', 'America/Danmarkshavn'), + ('America/Dawson', 'America/Dawson'), + ('America/Dawson_Creek', 'America/Dawson_Creek'), + ('America/Denver', 'America/Denver'), + ('America/Detroit', 'America/Detroit'), + ('America/Dominica', 'America/Dominica'), + ('America/Edmonton', 'America/Edmonton'), + ('America/Eirunepe', 'America/Eirunepe'), + ('America/El_Salvador', 'America/El_Salvador'), + ('America/Ensenada', 'America/Ensenada'), + ('America/Fort_Nelson', 'America/Fort_Nelson'), + ('America/Fort_Wayne', 'America/Fort_Wayne'), + ('America/Fortaleza', 'America/Fortaleza'), + ('America/Glace_Bay', 'America/Glace_Bay'), + ('America/Godthab', 'America/Godthab'), + ('America/Goose_Bay', 'America/Goose_Bay'), + ('America/Grand_Turk', 'America/Grand_Turk'), + ('America/Grenada', 'America/Grenada'), + ('America/Guadeloupe', 'America/Guadeloupe'), + ('America/Guatemala', 'America/Guatemala'), + ('America/Guayaquil', 'America/Guayaquil'), + ('America/Guyana', 'America/Guyana'), + ('America/Halifax', 'America/Halifax'), + ('America/Havana', 'America/Havana'), + ('America/Hermosillo', 'America/Hermosillo'), + ('America/Indiana/Indianapolis', 'America/Indiana/Indianapolis'), + ('America/Indiana/Knox', 'America/Indiana/Knox'), + ('America/Indiana/Marengo', 'America/Indiana/Marengo'), + ('America/Indiana/Petersburg', 'America/Indiana/Petersburg'), + ('America/Indiana/Tell_City', 'America/Indiana/Tell_City'), + ('America/Indiana/Vevay', 'America/Indiana/Vevay'), + ('America/Indiana/Vincennes', 'America/Indiana/Vincennes'), + ('America/Indiana/Winamac', 'America/Indiana/Winamac'), + ('America/Indianapolis', 'America/Indianapolis'), + ('America/Inuvik', 'America/Inuvik'), + ('America/Iqaluit', 'America/Iqaluit'), + ('America/Jamaica', 'America/Jamaica'), + ('America/Jujuy', 'America/Jujuy'), + ('America/Juneau', 'America/Juneau'), + ('America/Kentucky/Louisville', 'America/Kentucky/Louisville'), + ('America/Kentucky/Monticello', 'America/Kentucky/Monticello'), + ('America/Knox_IN', 'America/Knox_IN'), + ('America/Kralendijk', 'America/Kralendijk'), + ('America/La_Paz', + 'America/La_Paz'), ('America/Lima', 'America/Lima'), + ('America/Los_Angeles', 'America/Los_Angeles'), + ('America/Louisville', 'America/Louisville'), + ('America/Lower_Princes', 'America/Lower_Princes'), + ('America/Maceio', 'America/Maceio'), + ('America/Managua', 'America/Managua'), + ('America/Manaus', 'America/Manaus'), + ('America/Marigot', 'America/Marigot'), + ('America/Martinique', 'America/Martinique'), + ('America/Matamoros', 'America/Matamoros'), + ('America/Mazatlan', 'America/Mazatlan'), + ('America/Mendoza', 'America/Mendoza'), + ('America/Menominee', 'America/Menominee'), + ('America/Merida', 'America/Merida'), + ('America/Metlakatla', 'America/Metlakatla'), + ('America/Mexico_City', 'America/Mexico_City'), + ('America/Miquelon', 'America/Miquelon'), + ('America/Moncton', 'America/Moncton'), + ('America/Monterrey', 'America/Monterrey'), + ('America/Montevideo', 'America/Montevideo'), + ('America/Montreal', 'America/Montreal'), + ('America/Montserrat', 'America/Montserrat'), + ('America/Nassau', 'America/Nassau'), + ('America/New_York', 'America/New_York'), + ('America/Nipigon', 'America/Nipigon'), + ('America/Nome', 'America/Nome'), + ('America/Noronha', 'America/Noronha'), + ('America/North_Dakota/Beulah', 'America/North_Dakota/Beulah'), + ('America/North_Dakota/Center', 'America/North_Dakota/Center'), + ('America/North_Dakota/New_Salem', 'America/North_Dakota/New_Salem'), + ('America/Ojinaga', 'America/Ojinaga'), + ('America/Panama', 'America/Panama'), + ('America/Pangnirtung', 'America/Pangnirtung'), + ('America/Paramaribo', 'America/Paramaribo'), + ('America/Phoenix', 'America/Phoenix'), + ('America/Port-au-Prince', 'America/Port-au-Prince'), + ('America/Port_of_Spain', 'America/Port_of_Spain'), + ('America/Porto_Acre', 'America/Porto_Acre'), + ('America/Porto_Velho', 'America/Porto_Velho'), + ('America/Puerto_Rico', 'America/Puerto_Rico'), + ('America/Punta_Arenas', 'America/Punta_Arenas'), + ('America/Rainy_River', 'America/Rainy_River'), + ('America/Rankin_Inlet', 'America/Rankin_Inlet'), + ('America/Recife', 'America/Recife'), + ('America/Regina', 'America/Regina'), + ('America/Resolute', 'America/Resolute'), + ('America/Rio_Branco', 'America/Rio_Branco'), + ('America/Rosario', 'America/Rosario'), + ('America/Santa_Isabel', 'America/Santa_Isabel'), + ('America/Santarem', 'America/Santarem'), + ('America/Santiago', 'America/Santiago'), + ('America/Santo_Domingo', 'America/Santo_Domingo'), + ('America/Sao_Paulo', 'America/Sao_Paulo'), + ('America/Scoresbysund', 'America/Scoresbysund'), + ('America/Shiprock', 'America/Shiprock'), + ('America/Sitka', 'America/Sitka'), + ('America/St_Barthelemy', 'America/St_Barthelemy'), + ('America/St_Johns', 'America/St_Johns'), + ('America/St_Kitts', 'America/St_Kitts'), + ('America/St_Lucia', 'America/St_Lucia'), + ('America/St_Thomas', 'America/St_Thomas'), + ('America/St_Vincent', 'America/St_Vincent'), + ('America/Swift_Current', 'America/Swift_Current'), + ('America/Tegucigalpa', 'America/Tegucigalpa'), + ('America/Thule', 'America/Thule'), + ('America/Thunder_Bay', 'America/Thunder_Bay'), + ('America/Tijuana', 'America/Tijuana'), + ('America/Toronto', 'America/Toronto'), + ('America/Tortola', 'America/Tortola'), + ('America/Vancouver', 'America/Vancouver'), + ('America/Virgin', 'America/Virgin'), + ('America/Whitehorse', 'America/Whitehorse'), + ('America/Winnipeg', 'America/Winnipeg'), + ('America/Yakutat', 'America/Yakutat'), + ('America/Yellowknife', 'America/Yellowknife'), + ('Antarctica/Casey', 'Antarctica/Casey'), + ('Antarctica/Davis', 'Antarctica/Davis'), + ('Antarctica/DumontDUrville', 'Antarctica/DumontDUrville'), + ('Antarctica/Macquarie', 'Antarctica/Macquarie'), + ('Antarctica/Mawson', 'Antarctica/Mawson'), + ('Antarctica/McMurdo', 'Antarctica/McMurdo'), + ('Antarctica/Palmer', 'Antarctica/Palmer'), + ('Antarctica/Rothera', 'Antarctica/Rothera'), + ('Antarctica/South_Pole', 'Antarctica/South_Pole'), + ('Antarctica/Syowa', 'Antarctica/Syowa'), + ('Antarctica/Troll', 'Antarctica/Troll'), + ('Antarctica/Vostok', 'Antarctica/Vostok'), + ('Arctic/Longyearbyen', 'Arctic/Longyearbyen'), + ('Asia/Aden', 'Asia/Aden'), ('Asia/Almaty', 'Asia/Almaty'), + ('Asia/Amman', 'Asia/Amman'), ('Asia/Anadyr', 'Asia/Anadyr'), + ('Asia/Aqtau', 'Asia/Aqtau'), ('Asia/Aqtobe', 'Asia/Aqtobe'), + ('Asia/Ashgabat', 'Asia/Ashgabat'), + ('Asia/Ashkhabad', 'Asia/Ashkhabad'), ('Asia/Atyrau', 'Asia/Atyrau'), + ('Asia/Baghdad', 'Asia/Baghdad'), ('Asia/Bahrain', 'Asia/Bahrain'), + ('Asia/Baku', 'Asia/Baku'), ('Asia/Bangkok', 'Asia/Bangkok'), + ('Asia/Barnaul', 'Asia/Barnaul'), ('Asia/Beirut', 'Asia/Beirut'), + ('Asia/Bishkek', 'Asia/Bishkek'), ('Asia/Brunei', 'Asia/Brunei'), + ('Asia/Calcutta', 'Asia/Calcutta'), ('Asia/Chita', 'Asia/Chita'), + ('Asia/Choibalsan', 'Asia/Choibalsan'), + ('Asia/Chongqing', 'Asia/Chongqing'), + ('Asia/Chungking', + 'Asia/Chungking'), ('Asia/Colombo', 'Asia/Colombo'), + ('Asia/Dacca', 'Asia/Dacca'), ('Asia/Damascus', 'Asia/Damascus'), + ('Asia/Dhaka', 'Asia/Dhaka'), ('Asia/Dili', 'Asia/Dili'), + ('Asia/Dubai', 'Asia/Dubai'), ('Asia/Dushanbe', 'Asia/Dushanbe'), + ('Asia/Famagusta', 'Asia/Famagusta'), ('Asia/Gaza', 'Asia/Gaza'), + ('Asia/Harbin', 'Asia/Harbin'), ('Asia/Hebron', 'Asia/Hebron'), + ('Asia/Ho_Chi_Minh', 'Asia/Ho_Chi_Minh'), + ('Asia/Hong_Kong', 'Asia/Hong_Kong'), ('Asia/Hovd', 'Asia/Hovd'), + ('Asia/Irkutsk', 'Asia/Irkutsk'), ('Asia/Istanbul', 'Asia/Istanbul'), + ('Asia/Jakarta', 'Asia/Jakarta'), ('Asia/Jayapura', 'Asia/Jayapura'), + ('Asia/Jerusalem', 'Asia/Jerusalem'), ('Asia/Kabul', 'Asia/Kabul'), + ('Asia/Kamchatka', 'Asia/Kamchatka'), + ('Asia/Karachi', 'Asia/Karachi'), ('Asia/Kashgar', 'Asia/Kashgar'), + ('Asia/Kathmandu', 'Asia/Kathmandu'), + ('Asia/Katmandu', + 'Asia/Katmandu'), ('Asia/Khandyga', 'Asia/Khandyga'), + ('Asia/Kolkata', 'Asia/Kolkata'), + ('Asia/Krasnoyarsk', 'Asia/Krasnoyarsk'), + ('Asia/Kuala_Lumpur', 'Asia/Kuala_Lumpur'), + ('Asia/Kuching', 'Asia/Kuching'), ('Asia/Kuwait', 'Asia/Kuwait'), + ('Asia/Macao', 'Asia/Macao'), ('Asia/Macau', 'Asia/Macau'), + ('Asia/Magadan', 'Asia/Magadan'), ('Asia/Makassar', 'Asia/Makassar'), + ('Asia/Manila', 'Asia/Manila'), ('Asia/Muscat', 'Asia/Muscat'), + ('Asia/Nicosia', 'Asia/Nicosia'), + ('Asia/Novokuznetsk', 'Asia/Novokuznetsk'), + ('Asia/Novosibirsk', 'Asia/Novosibirsk'), ('Asia/Omsk', 'Asia/Omsk'), + ('Asia/Oral', 'Asia/Oral'), ('Asia/Phnom_Penh', 'Asia/Phnom_Penh'), + ('Asia/Pontianak', 'Asia/Pontianak'), + ('Asia/Pyongyang', 'Asia/Pyongyang'), ('Asia/Qatar', 'Asia/Qatar'), + ('Asia/Qostanay', 'Asia/Qostanay'), + ('Asia/Qyzylorda', + 'Asia/Qyzylorda'), ('Asia/Rangoon', 'Asia/Rangoon'), + ('Asia/Riyadh', 'Asia/Riyadh'), ('Asia/Saigon', 'Asia/Saigon'), + ('Asia/Sakhalin', 'Asia/Sakhalin'), + ('Asia/Samarkand', 'Asia/Samarkand'), ('Asia/Seoul', 'Asia/Seoul'), + ('Asia/Shanghai', 'Asia/Shanghai'), + ('Asia/Singapore', 'Asia/Singapore'), + ('Asia/Srednekolymsk', 'Asia/Srednekolymsk'), + ('Asia/Taipei', 'Asia/Taipei'), ('Asia/Tashkent', 'Asia/Tashkent'), + ('Asia/Tbilisi', 'Asia/Tbilisi'), ('Asia/Tehran', 'Asia/Tehran'), + ('Asia/Tel_Aviv', 'Asia/Tel_Aviv'), ('Asia/Thimbu', 'Asia/Thimbu'), + ('Asia/Thimphu', 'Asia/Thimphu'), ('Asia/Tokyo', 'Asia/Tokyo'), + ('Asia/Tomsk', 'Asia/Tomsk'), + ('Asia/Ujung_Pandang', 'Asia/Ujung_Pandang'), + ('Asia/Ulaanbaatar', 'Asia/Ulaanbaatar'), + ('Asia/Ulan_Bator', + 'Asia/Ulan_Bator'), ('Asia/Urumqi', 'Asia/Urumqi'), + ('Asia/Ust-Nera', 'Asia/Ust-Nera'), + ('Asia/Vientiane', 'Asia/Vientiane'), + ('Asia/Vladivostok', 'Asia/Vladivostok'), + ('Asia/Yakutsk', 'Asia/Yakutsk'), ('Asia/Yangon', 'Asia/Yangon'), + ('Asia/Yekaterinburg', 'Asia/Yekaterinburg'), + ('Asia/Yerevan', 'Asia/Yerevan'), + ('Atlantic/Azores', 'Atlantic/Azores'), + ('Atlantic/Bermuda', 'Atlantic/Bermuda'), + ('Atlantic/Canary', 'Atlantic/Canary'), + ('Atlantic/Cape_Verde', 'Atlantic/Cape_Verde'), + ('Atlantic/Faeroe', 'Atlantic/Faeroe'), + ('Atlantic/Faroe', 'Atlantic/Faroe'), + ('Atlantic/Jan_Mayen', 'Atlantic/Jan_Mayen'), + ('Atlantic/Madeira', 'Atlantic/Madeira'), + ('Atlantic/Reykjavik', 'Atlantic/Reykjavik'), + ('Atlantic/South_Georgia', 'Atlantic/South_Georgia'), + ('Atlantic/St_Helena', 'Atlantic/St_Helena'), + ('Atlantic/Stanley', 'Atlantic/Stanley'), + ('Australia/ACT', 'Australia/ACT'), + ('Australia/Adelaide', 'Australia/Adelaide'), + ('Australia/Brisbane', 'Australia/Brisbane'), + ('Australia/Broken_Hill', 'Australia/Broken_Hill'), + ('Australia/Canberra', 'Australia/Canberra'), + ('Australia/Currie', 'Australia/Currie'), + ('Australia/Darwin', 'Australia/Darwin'), + ('Australia/Eucla', 'Australia/Eucla'), + ('Australia/Hobart', 'Australia/Hobart'), + ('Australia/LHI', 'Australia/LHI'), + ('Australia/Lindeman', 'Australia/Lindeman'), + ('Australia/Lord_Howe', 'Australia/Lord_Howe'), + ('Australia/Melbourne', 'Australia/Melbourne'), + ('Australia/NSW', 'Australia/NSW'), + ('Australia/North', 'Australia/North'), + ('Australia/Perth', 'Australia/Perth'), + ('Australia/Queensland', 'Australia/Queensland'), + ('Australia/South', 'Australia/South'), + ('Australia/Sydney', 'Australia/Sydney'), + ('Australia/Tasmania', 'Australia/Tasmania'), + ('Australia/Victoria', 'Australia/Victoria'), + ('Australia/West', 'Australia/West'), + ('Australia/Yancowinna', 'Australia/Yancowinna'), + ('Brazil/Acre', 'Brazil/Acre'), + ('Brazil/DeNoronha', 'Brazil/DeNoronha'), + ('Brazil/East', 'Brazil/East'), ('Brazil/West', 'Brazil/West'), + ('CET', 'CET'), ('CST6CDT', 'CST6CDT'), + ('Canada/Atlantic', 'Canada/Atlantic'), + ('Canada/Central', 'Canada/Central'), + ('Canada/Eastern', 'Canada/Eastern'), + ('Canada/Mountain', 'Canada/Mountain'), + ('Canada/Newfoundland', 'Canada/Newfoundland'), + ('Canada/Pacific', 'Canada/Pacific'), + ('Canada/Saskatchewan', 'Canada/Saskatchewan'), + ('Canada/Yukon', 'Canada/Yukon'), + ('Chile/Continental', 'Chile/Continental'), + ('Chile/EasterIsland', 'Chile/EasterIsland'), ('Cuba', 'Cuba'), + ('EET', 'EET'), ('EST', 'EST'), ('EST5EDT', 'EST5EDT'), + ('Egypt', 'Egypt'), ('Eire', 'Eire'), ('Etc/GMT', 'Etc/GMT'), + ('Etc/GMT+0', 'Etc/GMT+0'), ('Etc/GMT+1', 'Etc/GMT+1'), + ('Etc/GMT+10', 'Etc/GMT+10'), ('Etc/GMT+11', 'Etc/GMT+11'), + ('Etc/GMT+12', 'Etc/GMT+12'), ('Etc/GMT+2', 'Etc/GMT+2'), + ('Etc/GMT+3', 'Etc/GMT+3'), ('Etc/GMT+4', 'Etc/GMT+4'), + ('Etc/GMT+5', 'Etc/GMT+5'), ('Etc/GMT+6', 'Etc/GMT+6'), + ('Etc/GMT+7', 'Etc/GMT+7'), ('Etc/GMT+8', 'Etc/GMT+8'), + ('Etc/GMT+9', 'Etc/GMT+9'), ('Etc/GMT-0', 'Etc/GMT-0'), + ('Etc/GMT-1', 'Etc/GMT-1'), ('Etc/GMT-10', 'Etc/GMT-10'), + ('Etc/GMT-11', 'Etc/GMT-11'), ('Etc/GMT-12', 'Etc/GMT-12'), + ('Etc/GMT-13', 'Etc/GMT-13'), ('Etc/GMT-14', 'Etc/GMT-14'), + ('Etc/GMT-2', 'Etc/GMT-2'), ('Etc/GMT-3', 'Etc/GMT-3'), + ('Etc/GMT-4', 'Etc/GMT-4'), ('Etc/GMT-5', 'Etc/GMT-5'), + ('Etc/GMT-6', 'Etc/GMT-6'), ('Etc/GMT-7', 'Etc/GMT-7'), + ('Etc/GMT-8', 'Etc/GMT-8'), ('Etc/GMT-9', 'Etc/GMT-9'), + ('Etc/GMT0', 'Etc/GMT0'), ('Etc/Greenwich', 'Etc/Greenwich'), + ('Etc/UCT', 'Etc/UCT'), ('Etc/UTC', 'Etc/UTC'), + ('Etc/Universal', 'Etc/Universal'), ('Etc/Zulu', 'Etc/Zulu'), + ('Europe/Amsterdam', 'Europe/Amsterdam'), + ('Europe/Andorra', 'Europe/Andorra'), + ('Europe/Astrakhan', 'Europe/Astrakhan'), + ('Europe/Athens', 'Europe/Athens'), + ('Europe/Belfast', 'Europe/Belfast'), + ('Europe/Belgrade', 'Europe/Belgrade'), + ('Europe/Berlin', 'Europe/Berlin'), + ('Europe/Bratislava', 'Europe/Bratislava'), + ('Europe/Brussels', 'Europe/Brussels'), + ('Europe/Bucharest', 'Europe/Bucharest'), + ('Europe/Budapest', 'Europe/Budapest'), + ('Europe/Busingen', 'Europe/Busingen'), + ('Europe/Chisinau', 'Europe/Chisinau'), + ('Europe/Copenhagen', 'Europe/Copenhagen'), + ('Europe/Dublin', 'Europe/Dublin'), + ('Europe/Gibraltar', 'Europe/Gibraltar'), + ('Europe/Guernsey', 'Europe/Guernsey'), + ('Europe/Helsinki', 'Europe/Helsinki'), + ('Europe/Isle_of_Man', 'Europe/Isle_of_Man'), + ('Europe/Istanbul', 'Europe/Istanbul'), + ('Europe/Jersey', 'Europe/Jersey'), + ('Europe/Kaliningrad', 'Europe/Kaliningrad'), + ('Europe/Kiev', 'Europe/Kiev'), ('Europe/Kirov', 'Europe/Kirov'), + ('Europe/Lisbon', 'Europe/Lisbon'), + ('Europe/Ljubljana', 'Europe/Ljubljana'), + ('Europe/London', 'Europe/London'), + ('Europe/Luxembourg', 'Europe/Luxembourg'), + ('Europe/Madrid', 'Europe/Madrid'), ('Europe/Malta', 'Europe/Malta'), + ('Europe/Mariehamn', 'Europe/Mariehamn'), + ('Europe/Minsk', 'Europe/Minsk'), ('Europe/Monaco', 'Europe/Monaco'), + ('Europe/Moscow', 'Europe/Moscow'), + ('Europe/Nicosia', 'Europe/Nicosia'), ('Europe/Oslo', 'Europe/Oslo'), + ('Europe/Paris', 'Europe/Paris'), + ('Europe/Podgorica', 'Europe/Podgorica'), + ('Europe/Prague', 'Europe/Prague'), ('Europe/Riga', 'Europe/Riga'), + ('Europe/Rome', 'Europe/Rome'), ('Europe/Samara', 'Europe/Samara'), + ('Europe/San_Marino', 'Europe/San_Marino'), + ('Europe/Sarajevo', 'Europe/Sarajevo'), + ('Europe/Saratov', 'Europe/Saratov'), + ('Europe/Simferopol', 'Europe/Simferopol'), + ('Europe/Skopje', 'Europe/Skopje'), ('Europe/Sofia', 'Europe/Sofia'), + ('Europe/Stockholm', 'Europe/Stockholm'), + ('Europe/Tallinn', 'Europe/Tallinn'), + ('Europe/Tirane', 'Europe/Tirane'), + ('Europe/Tiraspol', 'Europe/Tiraspol'), + ('Europe/Ulyanovsk', 'Europe/Ulyanovsk'), + ('Europe/Uzhgorod', 'Europe/Uzhgorod'), + ('Europe/Vaduz', + 'Europe/Vaduz'), ('Europe/Vatican', 'Europe/Vatican'), + ('Europe/Vienna', 'Europe/Vienna'), + ('Europe/Vilnius', 'Europe/Vilnius'), + ('Europe/Volgograd', 'Europe/Volgograd'), + ('Europe/Warsaw', + 'Europe/Warsaw'), ('Europe/Zagreb', 'Europe/Zagreb'), + ('Europe/Zaporozhye', 'Europe/Zaporozhye'), + ('Europe/Zurich', 'Europe/Zurich'), ('GB', 'GB'), + ('GB-Eire', 'GB-Eire'), ('GMT', 'GMT'), ('GMT+0', 'GMT+0'), + ('GMT-0', 'GMT-0'), ('GMT0', 'GMT0'), ('Greenwich', 'Greenwich'), + ('HST', 'HST'), ('Hongkong', 'Hongkong'), ('Iceland', 'Iceland'), + ('Indian/Antananarivo', 'Indian/Antananarivo'), + ('Indian/Chagos', 'Indian/Chagos'), + ('Indian/Christmas', 'Indian/Christmas'), + ('Indian/Cocos', 'Indian/Cocos'), ('Indian/Comoro', 'Indian/Comoro'), + ('Indian/Kerguelen', 'Indian/Kerguelen'), + ('Indian/Mahe', + 'Indian/Mahe'), ('Indian/Maldives', 'Indian/Maldives'), + ('Indian/Mauritius', 'Indian/Mauritius'), + ('Indian/Mayotte', 'Indian/Mayotte'), + ('Indian/Reunion', 'Indian/Reunion'), ('Iran', 'Iran'), + ('Israel', 'Israel'), ('Jamaica', 'Jamaica'), ('Japan', 'Japan'), + ('Kwajalein', 'Kwajalein'), ('Libya', 'Libya'), ('MET', 'MET'), + ('MST', 'MST'), ('MST7MDT', 'MST7MDT'), + ('Mexico/BajaNorte', 'Mexico/BajaNorte'), + ('Mexico/BajaSur', 'Mexico/BajaSur'), + ('Mexico/General', 'Mexico/General'), ('NZ', 'NZ'), + ('NZ-CHAT', 'NZ-CHAT'), ('Navajo', 'Navajo'), ('PRC', 'PRC'), + ('PST8PDT', 'PST8PDT'), ('Pacific/Apia', 'Pacific/Apia'), + ('Pacific/Auckland', 'Pacific/Auckland'), + ('Pacific/Bougainville', 'Pacific/Bougainville'), + ('Pacific/Chatham', 'Pacific/Chatham'), + ('Pacific/Chuuk', 'Pacific/Chuuk'), + ('Pacific/Easter', 'Pacific/Easter'), + ('Pacific/Efate', 'Pacific/Efate'), + ('Pacific/Enderbury', 'Pacific/Enderbury'), + ('Pacific/Fakaofo', 'Pacific/Fakaofo'), + ('Pacific/Fiji', 'Pacific/Fiji'), + ('Pacific/Funafuti', 'Pacific/Funafuti'), + ('Pacific/Galapagos', 'Pacific/Galapagos'), + ('Pacific/Gambier', 'Pacific/Gambier'), + ('Pacific/Guadalcanal', 'Pacific/Guadalcanal'), + ('Pacific/Guam', 'Pacific/Guam'), + ('Pacific/Honolulu', 'Pacific/Honolulu'), + ('Pacific/Johnston', 'Pacific/Johnston'), + ('Pacific/Kiritimati', 'Pacific/Kiritimati'), + ('Pacific/Kosrae', 'Pacific/Kosrae'), + ('Pacific/Kwajalein', 'Pacific/Kwajalein'), + ('Pacific/Majuro', 'Pacific/Majuro'), + ('Pacific/Marquesas', 'Pacific/Marquesas'), + ('Pacific/Midway', 'Pacific/Midway'), + ('Pacific/Nauru', 'Pacific/Nauru'), ('Pacific/Niue', 'Pacific/Niue'), + ('Pacific/Norfolk', 'Pacific/Norfolk'), + ('Pacific/Noumea', 'Pacific/Noumea'), + ('Pacific/Pago_Pago', 'Pacific/Pago_Pago'), + ('Pacific/Palau', 'Pacific/Palau'), + ('Pacific/Pitcairn', 'Pacific/Pitcairn'), + ('Pacific/Pohnpei', 'Pacific/Pohnpei'), + ('Pacific/Ponape', 'Pacific/Ponape'), + ('Pacific/Port_Moresby', 'Pacific/Port_Moresby'), + ('Pacific/Rarotonga', 'Pacific/Rarotonga'), + ('Pacific/Saipan', 'Pacific/Saipan'), + ('Pacific/Samoa', 'Pacific/Samoa'), + ('Pacific/Tahiti', 'Pacific/Tahiti'), + ('Pacific/Tarawa', 'Pacific/Tarawa'), + ('Pacific/Tongatapu', 'Pacific/Tongatapu'), + ('Pacific/Truk', 'Pacific/Truk'), ('Pacific/Wake', 'Pacific/Wake'), + ('Pacific/Wallis', 'Pacific/Wallis'), ('Pacific/Yap', 'Pacific/Yap'), + ('Poland', 'Poland'), ('Portugal', 'Portugal'), ('ROC', 'ROC'), + ('ROK', 'ROK'), ('Singapore', 'Singapore'), ('Turkey', 'Turkey'), + ('UCT', 'UCT'), ('US/Alaska', 'US/Alaska'), + ('US/Aleutian', 'US/Aleutian'), ('US/Arizona', 'US/Arizona'), + ('US/Central', 'US/Central'), ('US/East-Indiana', 'US/East-Indiana'), + ('US/Eastern', 'US/Eastern'), ('US/Hawaii', 'US/Hawaii'), + ('US/Indiana-Starke', 'US/Indiana-Starke'), + ('US/Michigan', 'US/Michigan'), ('US/Mountain', 'US/Mountain'), + ('US/Pacific', 'US/Pacific'), ('US/Samoa', + 'US/Samoa'), ('UTC', 'UTC'), + ('Universal', 'Universal'), ('W-SU', 'W-SU'), ('WET', 'WET'), + ('Zulu', 'Zulu') + ], + default='UTC', + max_length=100 + ), + ), + ] diff --git a/src/newsreader/news/collection/migrations/0005_auto_20190521_1941.py b/src/newsreader/news/collection/migrations/0005_auto_20190521_1941.py new file mode 100644 index 0000000..e9ab3d4 --- /dev/null +++ b/src/newsreader/news/collection/migrations/0005_auto_20190521_1941.py @@ -0,0 +1,24 @@ +# Generated by Django 2.2 on 2019-05-21 19:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('collection', '0004_collectionrule_timezone'), + ] + + operations = [ + migrations.AddField( + model_name='collectionrule', + name='favicon', + field=models.ImageField(blank=True, null=True, upload_to=''), + ), + migrations.AddField( + model_name='collectionrule', + name='source', + field=models.CharField(default='source', max_length=100), + preserve_default=False, + ), + ] diff --git a/src/newsreader/news/collection/migrations/0006_collectionrule_error.py b/src/newsreader/news/collection/migrations/0006_collectionrule_error.py new file mode 100644 index 0000000..78843b1 --- /dev/null +++ b/src/newsreader/news/collection/migrations/0006_collectionrule_error.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2 on 2019-06-08 14:13 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('collection', '0005_auto_20190521_1941'), + ] + + operations = [ + migrations.AddField( + model_name='collectionrule', + name='error', + field=models.CharField(blank=True, max_length=255, null=True), + ), + ] diff --git a/src/newsreader/news/collection/migrations/__init__.py b/src/newsreader/news/collection/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/news/collection/models.py b/src/newsreader/news/collection/models.py new file mode 100644 index 0000000..ffb4131 --- /dev/null +++ b/src/newsreader/news/collection/models.py @@ -0,0 +1,35 @@ +import pytz + +from django.db import models +from django.utils.translation import gettext as _ + + +class CollectionRule(models.Model): + name = models.CharField(max_length=100) + source = models.CharField(max_length=100) + + url = models.URLField() + favicon = models.ImageField(blank=True, null=True) + + timezone = models.CharField( + choices=((timezone, timezone) for timezone in pytz.all_timezones), + max_length=100, + default="UTC", + ) + + category = models.ForeignKey( + "posts.Category", + blank=True, + null=True, + verbose_name=_("Category"), + help_text=_("Posts from this rule will be tagged with this category"), + on_delete=models.SET_NULL + ) + + last_suceeded = models.DateTimeField(blank=True, null=True) + succeeded = models.BooleanField(default=False) + + error = models.CharField(max_length=255, blank=True, null=True) + + def __str__(self): + return self.name diff --git a/src/newsreader/news/collection/response_handler.py b/src/newsreader/news/collection/response_handler.py new file mode 100644 index 0000000..dc33190 --- /dev/null +++ b/src/newsreader/news/collection/response_handler.py @@ -0,0 +1,30 @@ +from newsreader.news.collection.exceptions import ( + StreamDeniedException, + StreamForbiddenException, + StreamNotFoundException, + StreamTimeOutException, +) + + +class ResponseHandler: + message_mapping = { + 404: StreamNotFoundException, + 401: StreamDeniedException, + 403: StreamForbiddenException, + 408: StreamTimeOutException, + } + + def __init__(self, response): + self.response = response + + def __enter__(self): + return self + + def handle_response(self): + status_code = self.response.status_code + + if status_code in self.message_mapping: + raise self.message_mapping[status_code] + + def __exit__(self, *args, **kwargs): + self.response = None diff --git a/src/newsreader/news/collection/tests/__init__.py b/src/newsreader/news/collection/tests/__init__.py new file mode 100644 index 0000000..fb6723f --- /dev/null +++ b/src/newsreader/news/collection/tests/__init__.py @@ -0,0 +1 @@ +from .feed import * diff --git a/src/newsreader/news/collection/tests/factories.py b/src/newsreader/news/collection/tests/factories.py new file mode 100644 index 0000000..6b42292 --- /dev/null +++ b/src/newsreader/news/collection/tests/factories.py @@ -0,0 +1,12 @@ +import factory + +from newsreader.news.collection.models import CollectionRule + + +class CollectionRuleFactory(factory.django.DjangoModelFactory): + class Meta: + model = CollectionRule + + name = factory.Sequence(lambda n: "CollectionRule-{}".format(n)) + source = factory.Faker("name") + url = factory.Faker("url") diff --git a/src/newsreader/news/collection/tests/feed/__init__.py b/src/newsreader/news/collection/tests/feed/__init__.py new file mode 100644 index 0000000..50cea54 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/__init__.py @@ -0,0 +1,5 @@ +from .builder import * +from .client import * +from .collector import * +from .duplicate_handler import * +from .stream import * diff --git a/src/newsreader/news/collection/tests/feed/builder/__init__.py b/src/newsreader/news/collection/tests/feed/builder/__init__.py new file mode 100644 index 0000000..8baa6e5 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/builder/__init__.py @@ -0,0 +1 @@ +from .tests import * diff --git a/src/newsreader/news/collection/tests/feed/builder/mock_html.py b/src/newsreader/news/collection/tests/feed/builder/mock_html.py new file mode 100644 index 0000000..788495f --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/builder/mock_html.py @@ -0,0 +1,10 @@ +html_summary = ''' + +
+This is clickbait
+" in post.body) diff --git a/src/newsreader/news/collection/tests/feed/client/__init__.py b/src/newsreader/news/collection/tests/feed/client/__init__.py new file mode 100644 index 0000000..8baa6e5 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/client/__init__.py @@ -0,0 +1 @@ +from .tests import * diff --git a/src/newsreader/news/collection/tests/feed/client/mocks.py b/src/newsreader/news/collection/tests/feed/client/mocks.py new file mode 100644 index 0000000..5853eb7 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/client/mocks.py @@ -0,0 +1,61 @@ +from time import struct_time + +simple_mock = { + 'bozo': 0, + 'encoding': 'utf-8', + 'entries': [{ + 'guidislink': False, + 'href': '', + 'id': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'link': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '1152', + 'url': 'http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg', + 'width': '2048' + }], + 'published': 'Mon, 20 May 2019 16:07:37 GMT', + 'published_parsed': struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + 'summary': 'Foreign Minister Mohammad Javad Zarif says the US ' + 'president should try showing Iranians some respect.', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': 'Foreign Minister Mohammad Javad ' + 'Zarif says the US president should ' + 'try showing Iranians some ' + 'respect.' + }, + 'title': "Trump's 'genocidal taunts' will not end Iran - Zarif", + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': "Trump's 'genocidal taunts' will not " + 'end Iran - Zarif' + } + }], + 'feed': { + 'image': { + 'href': 'https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif', + 'link': 'https://www.bbc.co.uk/news/', + 'title': 'BBC News - Home', + 'language': 'en-gb', + 'link': 'https://www.bbc.co.uk/news/' + }, + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'title': 'BBC News - Home', + }, + 'href': 'http://feeds.bbci.co.uk/news/rss.xml', + 'status': 200, + 'version': 'rss20' +} diff --git a/src/newsreader/news/collection/tests/feed/client/tests.py b/src/newsreader/news/collection/tests/feed/client/tests.py new file mode 100644 index 0000000..0bb4cdd --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/client/tests.py @@ -0,0 +1,90 @@ +from unittest.mock import MagicMock, patch + +from django.test import TestCase +from django.utils import timezone + +from newsreader.news.collection.exceptions import ( + StreamDeniedException, + StreamException, + StreamFieldException, + StreamNotFoundException, + StreamTimeOutException, +) +from newsreader.news.collection.feed import FeedClient +from newsreader.news.collection.tests.factories import CollectionRuleFactory +from newsreader.news.collection.tests.feed.client.mocks import simple_mock + + +class FeedClientTestCase(TestCase): + def setUp(self): + self.patched_read = patch( + 'newsreader.news.collection.feed.FeedStream.read' + ) + self.mocked_read = self.patched_read.start() + + def tearDown(self): + patch.stopall() + + def test_client_retrieves_single_rules(self): + rule = CollectionRuleFactory.create() + mock_stream = MagicMock(rule=rule) + self.mocked_read.return_value = (simple_mock, mock_stream) + + with FeedClient([rule]) as client: + for data, stream in client: + self.assertEquals(data, simple_mock) + self.assertEquals(stream, mock_stream) + + self.mocked_read.assert_called_once_with() + + def test_client_catches_stream_exception(self): + rule = CollectionRuleFactory.create() + mock_stream = MagicMock(rule=rule) + self.mocked_read.side_effect = StreamException("Stream exception") + + with FeedClient([rule]) as client: + for data, stream in client: + self.assertEquals(data, {"entries": []}) + self.assertEquals(stream.rule.error, "Stream exception") + self.assertEquals(stream.rule.succeeded, False) + + self.mocked_read.assert_called_once_with() + + def test_client_catches_stream_not_found_exception(self): + rule = CollectionRuleFactory.create() + mock_stream = MagicMock(rule=rule) + self.mocked_read.side_effect = StreamNotFoundException("Stream not found") + + with FeedClient([rule]) as client: + for data, stream in client: + self.assertEquals(data, {"entries": []}) + self.assertEquals(stream.rule.error, "Stream not found") + self.assertEquals(stream.rule.succeeded, False) + + self.mocked_read.assert_called_once_with() + + def test_client_catches_stream_denied_exception(self): + rule = CollectionRuleFactory.create() + mock_stream = MagicMock(rule=rule) + self.mocked_read.side_effect = StreamDeniedException("Stream denied") + + with FeedClient([rule]) as client: + for data, stream in client: + self.assertEquals(data, {"entries": []}) + self.assertEquals(stream.rule.error, "Stream denied") + self.assertEquals(stream.rule.succeeded, False) + + self.mocked_read.assert_called_once_with() + + def test_client_catches_stream_timed_out(self): + rule = CollectionRuleFactory.create() + mock_stream = MagicMock(rule=rule) + self.mocked_read.side_effect = StreamTimeOutException("Stream timed out") + + with FeedClient([rule]) as client: + for data, stream in client: + self.assertEquals(data, {"entries": []}) + self.assertEquals(stream.rule.error, "Stream timed out") + self.assertEquals(stream.rule.succeeded, False) + + self.mocked_read.assert_called_once_with() diff --git a/src/newsreader/news/collection/tests/feed/collector/__init__.py b/src/newsreader/news/collection/tests/feed/collector/__init__.py new file mode 100644 index 0000000..8baa6e5 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/collector/__init__.py @@ -0,0 +1 @@ +from .tests import * diff --git a/src/newsreader/news/collection/tests/feed/collector/mocks.py b/src/newsreader/news/collection/tests/feed/collector/mocks.py new file mode 100644 index 0000000..930e977 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/collector/mocks.py @@ -0,0 +1,430 @@ +from time import struct_time + +multiple_mock = { + 'bozo': 0, + 'encoding': 'utf-8', + 'entries': [ + { + 'guidislink': False, + 'href': '', + 'id': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'link': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '1152', + 'url': 'http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg', + 'width': '2048' + }], + 'published': 'Mon, 20 May 2019 16:07:37 GMT', + 'published_parsed': struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + 'summary': 'Foreign Minister Mohammad Javad Zarif says the US ' + 'president should try showing Iranians some respect.', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': 'Foreign Minister Mohammad Javad ' + 'Zarif says the US president should ' + 'try showing Iranians some ' + 'respect.' + }, + 'title': "Trump's 'genocidal taunts' will not end Iran - Zarif", + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': "Trump's 'genocidal taunts' will not " + 'end Iran - Zarif' + } + }, + { + 'guidislink': False, + 'href': '', + 'id': 'https://www.bbc.co.uk/news/technology-48334739', + 'link': 'https://www.bbc.co.uk/news/technology-48334739', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/technology-48334739', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '432', + 'url': 'http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg', + 'width': '768' + }], + 'published': 'Mon, 20 May 2019 12:19:19 GMT', + 'published_parsed': struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0,)), + 'summary': "Google's move to end business ties with Huawei will " + 'affect current devices and future purchases.', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': "Google's move to end business ties " + 'with Huawei will affect current ' + 'devices and future purchases.' + }, + 'title': "Huawei's Android loss: How it affects you", + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': "Huawei's Android loss: How it " + 'affects you' + } + }, + { + 'guidislink': False, + 'href': '', + 'id': 'https://www.bbc.co.uk/news/uk-england-birmingham-48339080', + 'link': 'https://www.bbc.co.uk/news/uk-england-birmingham-48339080', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/uk-england-birmingham-48339080', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '549', + 'url': 'http://c.files.bbci.co.uk/11D67/production/_107036037_lgbtheadjpg.jpg', + 'width': '976' + }], + 'published': 'Mon, 20 May 2019 16:32:38 GMT', + 'published_parsed': struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), + 'summary': 'Police are investigating the messages while an MP ' + 'calls for a protest exclusion zone "to protect ' + 'children".', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': 'Police are investigating the ' + 'messages while an MP calls for a ' + 'protest exclusion zone "to protect ' + 'children".' + }, + 'title': 'Birmingham head teacher threatened over LGBT lessons', + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': 'Birmingham head teacher threatened ' + 'over LGBT lessons' + } + }, + ], + 'feed': { + 'image': { + 'href': 'https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif', + 'link': 'https://www.bbc.co.uk/news/', + 'title': 'BBC News - Home', + 'language': 'en-gb', + 'link': 'https://www.bbc.co.uk/news/' + }, + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'title': 'BBC News - Home', + }, + 'href': 'http://feeds.bbci.co.uk/news/rss.xml', + 'status': 200, + 'version': 'rss20' +} + +empty_mock = { + 'bozo': 0, + 'encoding': 'utf-8', + 'entries': [], + 'feed': { + 'image': { + 'href': 'https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif', + 'link': 'https://www.bbc.co.uk/news/', + 'title': 'BBC News - Home', + 'language': 'en-gb', + 'link': 'https://www.bbc.co.uk/news/' + }, + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'title': 'BBC News - Home', + }, + 'href': 'http://feeds.bbci.co.uk/news/rss.xml', + 'status': 200, + 'version': 'rss20' +} + +duplicate_mock = { + 'bozo': 0, + 'encoding': 'utf-8', + 'entries': [ + { + 'guidislink': False, + 'href': '', + 'link': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '1152', + 'url': 'http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg', + 'width': '2048' + }], + 'published': 'Mon, 20 May 2019 16:07:37 GMT', + 'published_parsed': struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + 'summary': 'Foreign Minister Mohammad Javad Zarif says the US ' + 'president should try showing Iranians some respect.', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': 'Foreign Minister Mohammad Javad ' + 'Zarif says the US president should ' + 'try showing Iranians some ' + 'respect.' + }, + 'title': "Trump's 'genocidal taunts' will not end Iran - Zarif", + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': "Trump's 'genocidal taunts' will not " + 'end Iran - Zarif' + } + }, + { + 'guidislink': False, + 'href': '', + 'link': 'https://www.bbc.co.uk/news/technology-48334739', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/technology-48334739', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '432', + 'url': 'http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg', + 'width': '768' + }], + 'published': 'Mon, 20 May 2019 12:19:19 GMT', + 'published_parsed': struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0,)), + 'summary': "Google's move to end business ties with Huawei will " + 'affect current devices and future purchases.', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': "Google's move to end business ties " + 'with Huawei will affect current ' + 'devices and future purchases.' + }, + 'title': "Huawei's Android loss: How it affects you", + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': "Huawei's Android loss: How it " + 'affects you' + } + }, + { + 'guidislink': False, + 'href': '', + 'link': 'https://www.bbc.co.uk/news/uk-england-birmingham-48339080', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/uk-england-birmingham-48339080', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '549', + 'url': 'http://c.files.bbci.co.uk/11D67/production/_107036037_lgbtheadjpg.jpg', + 'width': '976' + }], + 'published': 'Mon, 20 May 2019 16:32:38 GMT', + 'published_parsed': struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), + 'summary': 'Police are investigating the messages while an MP ' + 'calls for a protest exclusion zone "to protect ' + 'children".', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': 'Police are investigating the ' + 'messages while an MP calls for a ' + 'protest exclusion zone "to protect ' + 'children".' + }, + 'title': 'Birmingham head teacher threatened over LGBT lessons', + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': 'Birmingham head teacher threatened ' + 'over LGBT lessons' + } + }, + ], + 'feed': { + 'image': { + 'href': 'https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif', + 'link': 'https://www.bbc.co.uk/news/', + 'title': 'BBC News - Home', + 'language': 'en-gb', + 'link': 'https://www.bbc.co.uk/news/' + }, + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'title': 'BBC News - Home', + }, + 'href': 'http://feeds.bbci.co.uk/news/rss.xml', + 'status': 200, + 'version': 'rss20' +} + +multiple_update_mock = { + 'bozo': 0, + 'encoding': 'utf-8', + 'entries': [ + { + 'guidislink': False, + 'href': '', + 'id': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'link': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '1152', + 'url': 'http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg', + 'width': '2048' + }], + 'published': 'Mon, 20 May 2019 16:07:37 GMT', + 'published_parsed': struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + 'summary': 'Foreign Minister Mohammad Javad Zarif says the US ' + 'president should try showing Iranians some respect.', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': 'Foreign Minister Mohammad Javad ' + 'Zarif says the US president should ' + 'try showing Iranians some ' + 'respect.' + }, + 'title': "Trump's 'genocidal taunts' will not end Iran - Zarif", + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': "Trump's 'genocidal taunts' will not " + 'end Iran - Zarif' + } + }, + { + 'guidislink': False, + 'href': '', + 'id': 'https://www.bbc.co.uk/news/technology-48334739', + 'link': 'https://www.bbc.co.uk/news/technology-48334739', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/technology-48334739', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '432', + 'url': 'http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg', + 'width': '768' + }], + 'published': 'Mon, 20 May 2019 12:19:19 GMT', + 'published_parsed': struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0,)), + 'summary': "Google's move to end business ties with Huawei will " + 'affect current devices and future purchases.', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': "Google's move to end business ties " + 'with Huawei will affect current ' + 'devices and future purchases.' + }, + 'title': "Huawei's Android loss: How it affects you", + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': "Huawei's Android loss: How it " + 'affects you' + } + }, + { + 'guidislink': False, + 'href': '', + 'id': 'https://www.bbc.co.uk/news/uk-england-birmingham-48339080', + 'link': 'https://www.bbc.co.uk/news/uk-england-birmingham-48339080', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/uk-england-birmingham-48339080', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '549', + 'url': 'http://c.files.bbci.co.uk/11D67/production/_107036037_lgbtheadjpg.jpg', + 'width': '976' + }], + 'published': 'Mon, 20 May 2019 16:32:38 GMT', + 'published_parsed': struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), + 'summary': 'Police are investigating the messages while an MP ' + 'calls for a protest exclusion zone "to protect ' + 'children".', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': 'Police are investigating the ' + 'messages while an MP calls for a ' + 'protest exclusion zone "to protect ' + 'children".' + }, + 'title': 'Birmingham head teacher threatened over LGBT lessons', + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': 'Birmingham head teacher threatened ' + 'over LGBT lessons' + } + }, + ], + 'feed': { + 'image': { + 'href': 'https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif', + 'link': 'https://www.bbc.co.uk/news/', + 'title': 'BBC News - Home', + 'language': 'en-gb', + 'link': 'https://www.bbc.co.uk/news/' + }, + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'title': 'BBC News - Home', + }, + 'href': 'http://feeds.bbci.co.uk/news/rss.xml', + 'status': 200, + 'version': 'rss20' +} diff --git a/src/newsreader/news/collection/tests/feed/collector/tests.py b/src/newsreader/news/collection/tests/feed/collector/tests.py new file mode 100644 index 0000000..db95ccb --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/collector/tests.py @@ -0,0 +1,251 @@ +from datetime import date, datetime, time +from time import struct_time +from unittest.mock import MagicMock, patch + +import pytz + +from freezegun import freeze_time + +from django.test import TestCase +from django.utils import timezone + +from newsreader.news.collection.feed import FeedCollector +from newsreader.news.collection.tests.factories import CollectionRuleFactory +from newsreader.news.collection.tests.feed.collector.mocks import ( + duplicate_mock, + empty_mock, + multiple_mock, + multiple_update_mock, +) +from newsreader.news.collection.utils import build_publication_date +from newsreader.news.posts.models import Post +from newsreader.news.posts.tests.factories import PostFactory + + +class FeedCollectorTestCase(TestCase): + def setUp(self): + self.patched_get = patch( + 'newsreader.news.collection.feed.requests.get' + ) + self.mocked_get = self.patched_get.start() + + self.patched_parse = patch( + 'newsreader.news.collection.feed.FeedStream.parse' + ) + self.mocked_parse = self.patched_parse.start() + + def tearDown(self): + patch.stopall() + + @freeze_time("2019-10-30 12:30:00") + def test_simple_batch(self): + self.mocked_parse.return_value = multiple_mock + rule = CollectionRuleFactory() + + collector = FeedCollector() + collector.collect() + + rule.refresh_from_db() + + self.assertEquals(Post.objects.count(), 3) + self.assertEquals(rule.succeeded, True) + self.assertEquals(rule.last_suceeded, timezone.now()) + self.assertEquals(rule.error, None) + + @freeze_time("2019-10-30 12:30:00") + def test_emtpy_batch(self): + self.mocked_get.return_value = MagicMock(status_code=200) + self.mocked_parse.return_value = empty_mock + rule = CollectionRuleFactory() + + collector = FeedCollector() + collector.collect() + + rule.refresh_from_db() + + self.assertEquals(Post.objects.count(), 0) + self.assertEquals(rule.succeeded, True) + self.assertEquals(rule.error, None) + self.assertEquals(rule.last_suceeded, timezone.now()) + + def test_not_found(self): + self.mocked_get.return_value = MagicMock(status_code=404) + rule = CollectionRuleFactory() + + collector = FeedCollector() + collector.collect() + + rule.refresh_from_db() + + self.assertEquals(Post.objects.count(), 0) + self.assertEquals(rule.succeeded, False) + self.assertEquals(rule.error, "Stream not found") + + def test_denied(self): + self.mocked_get.return_value = MagicMock(status_code=404) + last_suceeded = timezone.make_aware( + datetime.combine(date=date(2019, 10, 30), time=time(12, 30)) + ) + rule = CollectionRuleFactory(last_suceeded=last_suceeded) + + collector = FeedCollector() + collector.collect() + + rule.refresh_from_db() + + self.assertEquals(Post.objects.count(), 0) + self.assertEquals(rule.succeeded, False) + self.assertEquals(rule.error, "Stream not found") + self.assertEquals(rule.last_suceeded, last_suceeded) + + def test_forbidden(self): + self.mocked_get.return_value = MagicMock(status_code=403) + last_suceeded = timezone.make_aware( + datetime.combine(date=date(2019, 10, 30), time=time(12, 30)) + ) + rule = CollectionRuleFactory(last_suceeded=last_suceeded) + + collector = FeedCollector() + collector.collect() + + rule.refresh_from_db() + + self.assertEquals(Post.objects.count(), 0) + self.assertEquals(rule.succeeded, False) + self.assertEquals(rule.error, "Stream forbidden") + self.assertEquals(rule.last_suceeded, last_suceeded) + + def test_timed_out(self): + self.mocked_get.return_value = MagicMock(status_code=408) + last_suceeded = timezone.make_aware( + datetime.combine(date=date(2019, 10, 30), time=time(12, 30)) + ) + rule = CollectionRuleFactory(last_suceeded=last_suceeded) + + collector = FeedCollector() + collector.collect() + + rule.refresh_from_db() + + self.assertEquals(Post.objects.count(), 0) + self.assertEquals(rule.succeeded, False) + self.assertEquals(rule.error, "Stream timed out") + self.assertEquals(rule.last_suceeded, last_suceeded) + + @freeze_time("2019-10-30 12:30:00") + def test_duplicates(self): + self.mocked_parse.return_value = duplicate_mock + rule = CollectionRuleFactory() + + _, aware_datetime = build_publication_date( + struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + pytz.utc + ) + + first_post = PostFactory( + url="https://www.bbc.co.uk/news/world-us-canada-48338168", + title="Trump's 'genocidal taunts' will not end Iran - Zarif", + body="Foreign Minister Mohammad Javad Zarif says the US " + "president should try showing Iranians some respect.", + publication_date=aware_datetime, + rule=rule + ) + + _, aware_datetime = build_publication_date( + struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0,)), + pytz.utc + ) + + second_post = PostFactory( + url="https://www.bbc.co.uk/news/technology-48334739", + title="Huawei's Android loss: How it affects you", + body="Google's move to end business ties with Huawei will " + "affect current devices and future purchases.", + publication_date=aware_datetime, + rule=rule + ) + + _, aware_datetime = build_publication_date( + struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), + pytz.utc + ) + + third_post = PostFactory( + url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080", + title="Birmingham head teacher threatened over LGBT lessons", + body="Police are investigating the messages while an MP " + "calls for a protest exclusion zone \"to protect " + "children\".", + publication_date=aware_datetime, + rule=rule + ) + + collector = FeedCollector() + collector.collect(rules=[rule]) + + rule.refresh_from_db() + + self.assertEquals(Post.objects.count(), 3) + self.assertEquals(rule.succeeded, True) + self.assertEquals(rule.last_suceeded, timezone.now()) + self.assertEquals(rule.error, None) + + @freeze_time("2019-02-22 12:30:00") + def test_items_with_identifiers_get_updated(self): + self.mocked_parse.return_value = multiple_update_mock + rule = CollectionRuleFactory() + + first_post = PostFactory( + remote_identifier="https://www.bbc.co.uk/news/world-us-canada-48338168", + url="https://www.bbc.co.uk/", + title="Trump", + body="Foreign Minister Mohammad Javad Zarif", + publication_date=timezone.now(), + rule=rule + ) + + second_post = PostFactory( + remote_identifier="https://www.bbc.co.uk/news/technology-48334739", + url="https://www.bbc.co.uk/", + title="Huawei's Android loss: How it affects you", + body="Google's move to end business ties with Huawei will", + publication_date=timezone.now(), + rule=rule + ) + + third_post = PostFactory( + remote_identifier="https://www.bbc.co.uk/news/uk-england-birmingham-48339080", + url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080", + title="Birmingham head teacher threatened over LGBT lessons", + body="Police are investigating the messages while an MP", + publication_date=timezone.now(), + rule=rule + ) + + collector = FeedCollector() + collector.collect(rules=[rule]) + + rule.refresh_from_db() + first_post.refresh_from_db() + second_post.refresh_from_db() + third_post.refresh_from_db() + + self.assertEquals(Post.objects.count(), 3) + self.assertEquals(rule.succeeded, True) + self.assertEquals(rule.last_suceeded, timezone.now()) + self.assertEquals(rule.error, None) + + self.assertEquals( + first_post.title, + "Trump's 'genocidal taunts' will not end Iran - Zarif" + ) + + self.assertEquals( + second_post.title, + "Huawei's Android loss: How it affects you" + ) + + self.assertEquals( + third_post.title, + 'Birmingham head teacher threatened over LGBT lessons' + ) diff --git a/src/newsreader/news/collection/tests/feed/duplicate_handler/__init__.py b/src/newsreader/news/collection/tests/feed/duplicate_handler/__init__.py new file mode 100644 index 0000000..8baa6e5 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/duplicate_handler/__init__.py @@ -0,0 +1 @@ +from .tests import * diff --git a/src/newsreader/news/collection/tests/feed/duplicate_handler/tests.py b/src/newsreader/news/collection/tests/feed/duplicate_handler/tests.py new file mode 100644 index 0000000..a8600af --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/duplicate_handler/tests.py @@ -0,0 +1,63 @@ +from django.test import TestCase +from django.utils import timezone + +from newsreader.news.collection.feed import FeedDuplicateHandler +from newsreader.news.collection.tests.factories import CollectionRuleFactory +from newsreader.news.posts.models import Post +from newsreader.news.posts.tests.factories import PostFactory + + +class FeedDuplicateHandlerTestCase(TestCase): + def setUp(self): + pass + + def test_duplicate_entries_with_remote_identifiers(self): + rule = CollectionRuleFactory() + existing_post = PostFactory.create( + remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", rule=rule + ) + new_post = PostFactory.build( + remote_identifier="28f79ae4-8f9a-11e9-b143-00163ef6bee7", + title="title got updated", + rule=rule + ) + + with FeedDuplicateHandler(rule) as duplicate_handler: + posts_gen = duplicate_handler.check([new_post]) + posts = list(posts_gen) + + post = posts[0] + + self.assertEquals(len(posts), 1) + self.assertEquals(post.publication_date, new_post.publication_date) + self.assertTrue(post.publication_date != existing_post.publication_date) + self.assertTrue(post.title != existing_post.title) + + def test_duplicate_entries_in_recent_database(self): + PostFactory.create_batch(size=20) + + publication_date = timezone.now() + + rule = CollectionRuleFactory() + existing_post = PostFactory.create( + url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080", + title="Birmingham head teacher threatened over LGBT lessons", + body="Google's move to end business ties with Huawei will affect current devices", + publication_date=publication_date, + remote_identifier=None, + rule=rule + ) + new_post = PostFactory.build( + url="https://www.bbc.co.uk/news/uk-england-birmingham-48339080", + title="Birmingham head teacher threatened over LGBT lessons", + body="Google's move to end business ties with Huawei will affect current devices", + publication_date=publication_date, + remote_identifier=None, + rule=rule + ) + + with FeedDuplicateHandler(rule) as duplicate_handler: + posts_gen = duplicate_handler.check([new_post]) + posts = list(posts_gen) + + self.assertEquals(len(posts), 0) diff --git a/src/newsreader/news/collection/tests/feed/stream/__init__.py b/src/newsreader/news/collection/tests/feed/stream/__init__.py new file mode 100644 index 0000000..8baa6e5 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/stream/__init__.py @@ -0,0 +1 @@ +from .tests import * diff --git a/src/newsreader/news/collection/tests/feed/stream/mocks.py b/src/newsreader/news/collection/tests/feed/stream/mocks.py new file mode 100644 index 0000000..5853eb7 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/stream/mocks.py @@ -0,0 +1,61 @@ +from time import struct_time + +simple_mock = { + 'bozo': 0, + 'encoding': 'utf-8', + 'entries': [{ + 'guidislink': False, + 'href': '', + 'id': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'link': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/world-us-canada-48338168', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'media_thumbnail': [{ + 'height': '1152', + 'url': 'http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg', + 'width': '2048' + }], + 'published': 'Mon, 20 May 2019 16:07:37 GMT', + 'published_parsed': struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + 'summary': 'Foreign Minister Mohammad Javad Zarif says the US ' + 'president should try showing Iranians some respect.', + 'summary_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/html', + 'value': 'Foreign Minister Mohammad Javad ' + 'Zarif says the US president should ' + 'try showing Iranians some ' + 'respect.' + }, + 'title': "Trump's 'genocidal taunts' will not end Iran - Zarif", + 'title_detail': { + 'base': 'http://feeds.bbci.co.uk/news/rss.xml', + 'language': None, + 'type': 'text/plain', + 'value': "Trump's 'genocidal taunts' will not " + 'end Iran - Zarif' + } + }], + 'feed': { + 'image': { + 'href': 'https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif', + 'link': 'https://www.bbc.co.uk/news/', + 'title': 'BBC News - Home', + 'language': 'en-gb', + 'link': 'https://www.bbc.co.uk/news/' + }, + 'links': [{ + 'href': 'https://www.bbc.co.uk/news/', + 'rel': 'alternate', + 'type': 'text/html' + }], + 'title': 'BBC News - Home', + }, + 'href': 'http://feeds.bbci.co.uk/news/rss.xml', + 'status': 200, + 'version': 'rss20' +} diff --git a/src/newsreader/news/collection/tests/feed/stream/tests.py b/src/newsreader/news/collection/tests/feed/stream/tests.py new file mode 100644 index 0000000..6e15194 --- /dev/null +++ b/src/newsreader/news/collection/tests/feed/stream/tests.py @@ -0,0 +1,109 @@ +from unittest.mock import MagicMock, patch + +from django.test import TestCase +from django.utils import timezone + +from newsreader.news.collection.exceptions import ( + StreamDeniedException, + StreamException, + StreamForbiddenException, + StreamNotFoundException, + StreamParseException, + StreamTimeOutException, +) +from newsreader.news.collection.feed import FeedStream +from newsreader.news.collection.tests.factories import CollectionRuleFactory +from newsreader.news.collection.tests.feed.stream.mocks import simple_mock + + +class FeedStreamTestCase(TestCase): + def setUp(self): + self.patched_get = patch( + 'newsreader.news.collection.feed.requests.get' + ) + self.mocked_get = self.patched_get.start() + + self.patched_parse = patch( + 'newsreader.news.collection.feed.FeedStream.parse' + ) + self.mocked_parse = self.patched_parse.start() + + def tearDown(self): + patch.stopall() + + def test_simple_stream(self): + self.mocked_parse.return_value = simple_mock + + rule = CollectionRuleFactory() + stream = FeedStream(rule) + return_value = stream.read() + + self.mocked_get.assert_called_once_with(rule.url) + self.assertEquals(return_value, (simple_mock, stream)) + + def test_stream_raises_exception(self): + self.mocked_parse.side_effect = StreamException + + rule = CollectionRuleFactory() + stream = FeedStream(rule) + + with self.assertRaises(StreamException): + stream.read() + + self.mocked_get.assert_called_once_with(rule.url) + + def test_stream_raises_denied_exception(self): + self.mocked_get.return_value = MagicMock(status_code=401) + + rule = CollectionRuleFactory() + stream = FeedStream(rule) + + with self.assertRaises(StreamDeniedException): + stream.read() + + self.mocked_get.assert_called_once_with(rule.url) + + def test_stream_raises_not_found_exception(self): + self.mocked_get.return_value = MagicMock(status_code=404) + + rule = CollectionRuleFactory() + stream = FeedStream(rule) + + with self.assertRaises(StreamNotFoundException): + stream.read() + + self.mocked_get.assert_called_once_with(rule.url) + + def test_stream_raises_time_out_exception(self): + self.mocked_get.return_value = MagicMock(status_code=408) + + rule = CollectionRuleFactory() + stream = FeedStream(rule) + + with self.assertRaises(StreamTimeOutException): + stream.read() + + self.mocked_get.assert_called_once_with(rule.url) + + def test_stream_raises_forbidden_exception(self): + self.mocked_get.return_value = MagicMock(status_code=403) + + rule = CollectionRuleFactory() + stream = FeedStream(rule) + + with self.assertRaises(StreamForbiddenException): + stream.read() + + self.mocked_get.assert_called_once_with(rule.url) + + @patch("newsreader.news.collection.feed.parse") + def test_stream_raises_parse_exception(self, mocked_parse): + self.mocked_get.return_value = MagicMock(status_code=200) + mocked_parse.side_effect = TypeError + self.patched_parse.stop() + + rule = CollectionRuleFactory() + stream = FeedStream(rule) + + with self.assertRaises(StreamParseException): + stream.read() diff --git a/src/newsreader/news/collection/utils.py b/src/newsreader/news/collection/utils.py new file mode 100644 index 0000000..6a80ed7 --- /dev/null +++ b/src/newsreader/news/collection/utils.py @@ -0,0 +1,13 @@ +from datetime import datetime +from time import mktime + +from django.utils import timezone + + +def build_publication_date(dt, tz): + try: + naive_datetime = datetime.fromtimestamp(mktime(dt)) + published_parsed = timezone.make_aware(naive_datetime, timezone=tz) + except TypeError: + return False, None + return True, published_parsed diff --git a/src/newsreader/news/collection/views.py b/src/newsreader/news/collection/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/src/newsreader/news/collection/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/src/newsreader/news/posts/__init__.py b/src/newsreader/news/posts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/news/posts/admin.py b/src/newsreader/news/posts/admin.py new file mode 100644 index 0000000..2ba7c81 --- /dev/null +++ b/src/newsreader/news/posts/admin.py @@ -0,0 +1,39 @@ +from django.contrib import admin + +from newsreader.news.posts.models import Category, Post + + +class PostAdmin(admin.ModelAdmin): + list_display = ( + "publication_date", + "author", + "rule", + "title", + ) + list_display_links = ("title", ) + list_filter = ("rule", ) + + ordering = ("-publication_date", "title") + + fields = ( + "title", + "body", + "author", + "publication_date", + "url", + "remote_identifier", + "category", + ) + + search_fields = ["title"] + + def rule(self, obj): + return obj.rule + + +class CategoryAdmin(admin.ModelAdmin): + pass + + +admin.site.register(Post, PostAdmin) +admin.site.register(Category, CategoryAdmin) diff --git a/src/newsreader/news/posts/apps.py b/src/newsreader/news/posts/apps.py new file mode 100644 index 0000000..2c2b982 --- /dev/null +++ b/src/newsreader/news/posts/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class PostsConfig(AppConfig): + name = 'posts' diff --git a/src/newsreader/news/posts/migrations/0001_initial.py b/src/newsreader/news/posts/migrations/0001_initial.py new file mode 100644 index 0000000..36666b3 --- /dev/null +++ b/src/newsreader/news/posts/migrations/0001_initial.py @@ -0,0 +1,78 @@ +# Generated by Django 2.2 on 2019-04-10 20:10 + +import django.db.models.deletion + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('collection', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Category', + fields=[ + ( + 'id', + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name='ID' + ) + ), + ('created', models.DateTimeField(auto_now_add=True)), + ('modified', models.DateTimeField(auto_now=True)), + ('name', models.CharField(max_length=50)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='Post', + fields=[ + ( + 'id', + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name='ID' + ) + ), + ('created', models.DateTimeField(auto_now_add=True)), + ('modified', models.DateTimeField(auto_now=True)), + ('title', models.CharField(max_length=200)), + ('body', models.TextField()), + ('source', models.CharField(max_length=200)), + ('publication_date', models.DateTimeField()), + ('url', models.URLField()), + ('remote_identifier', models.CharField(max_length=500)), + ( + 'category', + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.PROTECT, + to='posts.Category' + ) + ), + ( + 'rule', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to='collection.CollectionRule' + ) + ), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/src/newsreader/news/posts/migrations/0002_auto_20190520_2206.py b/src/newsreader/news/posts/migrations/0002_auto_20190520_2206.py new file mode 100644 index 0000000..cb86d51 --- /dev/null +++ b/src/newsreader/news/posts/migrations/0002_auto_20190520_2206.py @@ -0,0 +1,20 @@ +# Generated by Django 2.2 on 2019-05-20 20:06 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('posts', '0001_initial'), + ] + + operations = [ + migrations.AlterModelOptions( + name='category', + options={ + 'verbose_name': 'Category', + 'verbose_name_plural': 'Categories' + }, + ), + ] diff --git a/src/newsreader/news/posts/migrations/0003_auto_20190520_2031.py b/src/newsreader/news/posts/migrations/0003_auto_20190520_2031.py new file mode 100644 index 0000000..a790477 --- /dev/null +++ b/src/newsreader/news/posts/migrations/0003_auto_20190520_2031.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2 on 2019-05-20 20:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('posts', '0002_auto_20190520_2206'), + ] + + operations = [ + migrations.AlterField( + model_name='category', + name='name', + field=models.CharField(max_length=50, unique=True), + ), + ] diff --git a/src/newsreader/news/posts/migrations/0004_auto_20190521_1941.py b/src/newsreader/news/posts/migrations/0004_auto_20190521_1941.py new file mode 100644 index 0000000..a14c636 --- /dev/null +++ b/src/newsreader/news/posts/migrations/0004_auto_20190521_1941.py @@ -0,0 +1,22 @@ +# Generated by Django 2.2 on 2019-05-21 19:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('posts', '0003_auto_20190520_2031'), + ] + + operations = [ + migrations.RemoveField( + model_name='post', + name='source', + ), + migrations.AddField( + model_name='post', + name='author', + field=models.CharField(blank=True, max_length=100, null=True), + ), + ] diff --git a/src/newsreader/news/posts/migrations/0005_auto_20190608_1054.py b/src/newsreader/news/posts/migrations/0005_auto_20190608_1054.py new file mode 100644 index 0000000..96c9d8c --- /dev/null +++ b/src/newsreader/news/posts/migrations/0005_auto_20190608_1054.py @@ -0,0 +1,23 @@ +# Generated by Django 2.2 on 2019-06-08 10:54 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('posts', '0004_auto_20190521_1941'), + ] + + operations = [ + migrations.AlterField( + model_name='post', + name='body', + field=models.TextField(blank=True), + ), + migrations.AlterField( + model_name='post', + name='remote_identifier', + field=models.CharField(blank=True, max_length=500, null=True), + ), + ] diff --git a/src/newsreader/news/posts/migrations/0006_auto_20190608_1520.py b/src/newsreader/news/posts/migrations/0006_auto_20190608_1520.py new file mode 100644 index 0000000..8215ea9 --- /dev/null +++ b/src/newsreader/news/posts/migrations/0006_auto_20190608_1520.py @@ -0,0 +1,33 @@ +# Generated by Django 2.2 on 2019-06-08 15:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('posts', '0005_auto_20190608_1054'), + ] + + operations = [ + migrations.AlterField( + model_name='post', + name='body', + field=models.TextField(blank=True, null=True), + ), + migrations.AlterField( + model_name='post', + name='publication_date', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AlterField( + model_name='post', + name='title', + field=models.CharField(blank=True, max_length=200, null=True), + ), + migrations.AlterField( + model_name='post', + name='url', + field=models.URLField(blank=True, null=True), + ), + ] diff --git a/src/newsreader/news/posts/migrations/__init__.py b/src/newsreader/news/posts/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/news/posts/models.py b/src/newsreader/news/posts/models.py new file mode 100644 index 0000000..0528187 --- /dev/null +++ b/src/newsreader/news/posts/models.py @@ -0,0 +1,34 @@ +from django.db import models +from django.utils.translation import gettext as _ + +from newsreader.core.models import TimeStampedModel +from newsreader.news.collection.models import CollectionRule + + +class Post(TimeStampedModel): + title = models.CharField(max_length=200, blank=True, null=True) + body = models.TextField(blank=True, null=True) + author = models.CharField(max_length=100, blank=True, null=True) + publication_date = models.DateTimeField(blank=True, null=True) + url = models.URLField(blank=True, null=True) + + rule = models.ForeignKey(CollectionRule, on_delete=models.CASCADE) + remote_identifier = models.CharField(max_length=500, blank=True, null=True) + + category = models.ForeignKey( + 'Category', blank=True, null=True, on_delete=models.PROTECT + ) + + def __str__(self): + return "Post-{}".format(self.pk) + + +class Category(TimeStampedModel): + name = models.CharField(max_length=50, unique=True) + + class Meta: + verbose_name = _("Category") + verbose_name_plural = _("Categories") + + def __str__(self): + return self.name diff --git a/src/newsreader/news/posts/tests/__init__.py b/src/newsreader/news/posts/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/newsreader/news/posts/tests/factories.py b/src/newsreader/news/posts/tests/factories.py new file mode 100644 index 0000000..d059335 --- /dev/null +++ b/src/newsreader/news/posts/tests/factories.py @@ -0,0 +1,28 @@ +import factory +import pytz + +from newsreader.news.collection.tests.factories import CollectionRuleFactory +from newsreader.news.posts.models import Category, Post + + +class CategoryFactory(factory.django.DjangoModelFactory): + class Meta: + model = Category + + name = factory.Sequence(lambda n: "Category-{}".format(n)) + + +class PostFactory(factory.django.DjangoModelFactory): + class Meta: + model = Post + + title = factory.Faker("sentence") + body = factory.Faker("paragraph") + author = factory.Faker("name") + publication_date = factory.Faker('date_time_this_year', tzinfo=pytz.utc) + url = factory.Faker('url') + remote_identifier = factory.Faker("url") + + rule = factory.SubFactory(CollectionRuleFactory) + + category = factory.SubFactory(CategoryFactory) diff --git a/src/newsreader/news/posts/views.py b/src/newsreader/news/posts/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/src/newsreader/news/posts/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/src/newsreader/urls.py b/src/newsreader/urls.py new file mode 100644 index 0000000..fdd5749 --- /dev/null +++ b/src/newsreader/urls.py @@ -0,0 +1,6 @@ +from django.contrib import admin +from django.urls import include, path + +urlpatterns = [ + path("admin/", admin.site.urls), +] diff --git a/src/newsreader/utils/formatter.sh b/src/newsreader/utils/formatter.sh new file mode 100644 index 0000000..de70b4a --- /dev/null +++ b/src/newsreader/utils/formatter.sh @@ -0,0 +1,10 @@ +#!/bin/bash +FILES=$(git diff --cached --name-only --diff-filter=ACM "*.py" | sed 's| |\\ |g') + +if [ ! -z "$FILES" ]; then + # Format all selected files + echo "$FILES" | xargs ./env/bin/isort + + # Add back the modified/prettified files to staging + echo "$FILES" | xargs git add +fi diff --git a/src/newsreader/utils/pre-commit b/src/newsreader/utils/pre-commit new file mode 100644 index 0000000..d1e29b9 --- /dev/null +++ b/src/newsreader/utils/pre-commit @@ -0,0 +1,13 @@ +#!/bin/bash + +# Check if the directory is the root directory +if [ ! -d ".git/" ]; then + echo "Please commit from within the root directory" + exit 1 +fi + +# Run every file inside the pre-commit.d directory +for file in .git/hooks/pre-commit.d/* +do + . $file +done diff --git a/src/newsreader/wsgi.py b/src/newsreader/wsgi.py new file mode 100644 index 0000000..4c5ea26 --- /dev/null +++ b/src/newsreader/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for newsreader project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/2.2/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'newsreader.settings') + +application = get_wsgi_application()