0.2.3 #99

Merged
sonny merged 112 commits from development into master 2020-05-23 16:58:42 +02:00
5 changed files with 200 additions and 7 deletions
Showing only changes of commit a95db91726 - Show all commits

View file

@ -1,6 +1,8 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import ContextManager, Dict, Generator, List, Optional, Tuple
from django.db.models.fields import CharField, TextField
from django.template.defaultfilters import truncatechars
from django.utils import timezone
import bleach
@ -63,16 +65,18 @@ class FeedBuilder(Builder):
for entry in entries:
data = {"rule_id": rule.pk}
for field, value in field_mapping.items():
for field, model_field in field_mapping.items():
if field in entry:
value = self.truncate_text(model_field, entry[field])
if field == "published_parsed":
created, aware_datetime = build_publication_date(entry[field], tz)
data[value] = aware_datetime if created else None
created, aware_datetime = build_publication_date(value, tz)
data[model_field] = aware_datetime if created else None
elif field == "summary":
summary = self.sanitize_summary(entry[field])
data[value] = summary
summary = self.sanitize_summary(value)
data[model_field] = summary
else:
data[value] = entry[field]
data[model_field] = value
yield Post(**data)
@ -82,6 +86,22 @@ class FeedBuilder(Builder):
return bleach.clean(summary, tags=tags, attributes=attrs) if summary else None
def truncate_text(self, field_name, value):
field = Post._meta.get_field(field_name)
max_length = field.max_length
cls = type(field)
if not value or not max_length:
return value
elif not bool(issubclass(cls, CharField) or issubclass(cls, TextField)):
return value
if len(value) > max_length:
print(f"Truncated {field_name}")
return truncatechars(value, max_length)
return value
def save(self) -> None:
for post in self.instances:
post.save()

View file

@ -890,3 +890,132 @@ mock_with_html = {
"status": 200,
"version": "rss20",
}
mock_with_long_author = {
"bozo": 0,
"encoding": "utf-8",
"entries": [
{
"author": "A. Author but this author name is way to long for an actual surname.",
"guidislink": False,
"href": "",
"id": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"link": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"links": [
{
"href": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"rel": "alternate",
"type": "text/html",
}
],
"media_thumbnail": [
{
"height": "1152",
"url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg",
"width": "2048",
}
],
"published": "Mon, 20 May 2019 16:07:37 GMT",
"published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
"summary": "Foreign Minister Mohammad Javad Zarif says the US "
"president should try showing Iranians some respect.",
"summary_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"language": None,
"type": "text/html",
"value": "Foreign Minister Mohammad Javad "
"Zarif says the US president should "
"try showing Iranians some "
"respect.",
},
"title": "Trump's 'genocidal taunts' will not end Iran - Zarif",
"title_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"language": None,
"type": "text/plain",
"value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif",
},
}
],
"feed": {
"image": {
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
"link": "https://www.bbc.co.uk/news/",
"title": "BBC News - Home",
"language": "en-gb",
"link": "https://www.bbc.co.uk/news/",
},
"links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}],
"title": "BBC News - Home",
},
"href": "http://feeds.bbci.co.uk/news/rss.xml",
"status": 200,
"version": "rss20",
}
mock_with_long_title = {
"bozo": 0,
"encoding": "utf-8",
"entries": [
{
"author": "A. Author",
"guidislink": False,
"href": "",
"id": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"link": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"links": [
{
"href": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"rel": "alternate",
"type": "text/html",
}
],
"media_thumbnail": [
{
"height": "1152",
"url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg",
"width": "2048",
}
],
"published": "Mon, 20 May 2019 16:07:37 GMT",
"published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
"summary": "Foreign Minister Mohammad Javad Zarif says the US "
"president should try showing Iranians some respect.",
"summary_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"language": None,
"type": "text/html",
"value": "Foreign Minister Mohammad Javad "
"Zarif says the US president should "
"try showing Iranians some "
"respect.",
},
"title": "Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif",
"title_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"language": None,
"type": "text/plain",
"value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif",
},
}
],
"feed": {
"image": {
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
"link": "https://www.bbc.co.uk/news/",
"title": "BBC News - Home",
"language": "en-gb",
"link": "https://www.bbc.co.uk/news/",
},
"links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}],
"title": "BBC News - Home",
},
"href": "http://feeds.bbci.co.uk/news/rss.xml",
"status": 200,
"version": "rss20",
}

View file

@ -265,3 +265,31 @@ class FeedBuilderTestCase(TestCase):
self.assertTrue("<strong>" not in post.body)
self.assertTrue('<a href="https://www.bbc.com">' in post.body)
self.assertTrue("<p>" in post.body)
def test_long_author_text_is_truncated(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_long_author, mock_stream)) as builder:
builder.save()
post = Post.objects.get()
self.assertEquals(Post.objects.count(), 1)
self.assertEquals(len(post.author), 40)
def test_long_title_text_is_truncated(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_long_title, mock_stream)) as builder:
builder.save()
post = Post.objects.get()
self.assertEquals(Post.objects.count(), 1)
self.assertEquals(len(post.title), 200)

View file

@ -0,0 +1,16 @@
# Generated by Django 2.2 on 2019-07-10 18:22
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("core", "0002_category_user")]
operations = [
migrations.AlterField(
model_name="post",
name="author",
field=models.CharField(blank=True, max_length=40, null=True),
)
]

View file

@ -8,7 +8,7 @@ from newsreader.news.collection.models import CollectionRule
class Post(TimeStampedModel):
title = models.CharField(max_length=200, blank=True, null=True)
body = models.TextField(blank=True, null=True)
author = models.CharField(max_length=200, blank=True, null=True)
author = models.CharField(max_length=40, blank=True, null=True)
publication_date = models.DateTimeField(blank=True, null=True)
url = models.URLField(max_length=1024, blank=True, null=True)