0.2.3 #99

Merged
sonny merged 112 commits from development into master 2020-05-23 16:58:42 +02:00
5 changed files with 200 additions and 7 deletions
Showing only changes of commit a95db91726 - Show all commits

View file

@ -1,6 +1,8 @@
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import ContextManager, Dict, Generator, List, Optional, Tuple from typing import ContextManager, Dict, Generator, List, Optional, Tuple
from django.db.models.fields import CharField, TextField
from django.template.defaultfilters import truncatechars
from django.utils import timezone from django.utils import timezone
import bleach import bleach
@ -63,16 +65,18 @@ class FeedBuilder(Builder):
for entry in entries: for entry in entries:
data = {"rule_id": rule.pk} data = {"rule_id": rule.pk}
for field, value in field_mapping.items(): for field, model_field in field_mapping.items():
if field in entry: if field in entry:
value = self.truncate_text(model_field, entry[field])
if field == "published_parsed": if field == "published_parsed":
created, aware_datetime = build_publication_date(entry[field], tz) created, aware_datetime = build_publication_date(value, tz)
data[value] = aware_datetime if created else None data[model_field] = aware_datetime if created else None
elif field == "summary": elif field == "summary":
summary = self.sanitize_summary(entry[field]) summary = self.sanitize_summary(value)
data[value] = summary data[model_field] = summary
else: else:
data[value] = entry[field] data[model_field] = value
yield Post(**data) yield Post(**data)
@ -82,6 +86,22 @@ class FeedBuilder(Builder):
return bleach.clean(summary, tags=tags, attributes=attrs) if summary else None return bleach.clean(summary, tags=tags, attributes=attrs) if summary else None
def truncate_text(self, field_name, value):
field = Post._meta.get_field(field_name)
max_length = field.max_length
cls = type(field)
if not value or not max_length:
return value
elif not bool(issubclass(cls, CharField) or issubclass(cls, TextField)):
return value
if len(value) > max_length:
print(f"Truncated {field_name}")
return truncatechars(value, max_length)
return value
def save(self) -> None: def save(self) -> None:
for post in self.instances: for post in self.instances:
post.save() post.save()

View file

@ -890,3 +890,132 @@ mock_with_html = {
"status": 200, "status": 200,
"version": "rss20", "version": "rss20",
} }
mock_with_long_author = {
"bozo": 0,
"encoding": "utf-8",
"entries": [
{
"author": "A. Author but this author name is way to long for an actual surname.",
"guidislink": False,
"href": "",
"id": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"link": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"links": [
{
"href": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"rel": "alternate",
"type": "text/html",
}
],
"media_thumbnail": [
{
"height": "1152",
"url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg",
"width": "2048",
}
],
"published": "Mon, 20 May 2019 16:07:37 GMT",
"published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
"summary": "Foreign Minister Mohammad Javad Zarif says the US "
"president should try showing Iranians some respect.",
"summary_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"language": None,
"type": "text/html",
"value": "Foreign Minister Mohammad Javad "
"Zarif says the US president should "
"try showing Iranians some "
"respect.",
},
"title": "Trump's 'genocidal taunts' will not end Iran - Zarif",
"title_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"language": None,
"type": "text/plain",
"value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif",
},
}
],
"feed": {
"image": {
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
"link": "https://www.bbc.co.uk/news/",
"title": "BBC News - Home",
"language": "en-gb",
"link": "https://www.bbc.co.uk/news/",
},
"links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}],
"title": "BBC News - Home",
},
"href": "http://feeds.bbci.co.uk/news/rss.xml",
"status": 200,
"version": "rss20",
}
mock_with_long_title = {
"bozo": 0,
"encoding": "utf-8",
"entries": [
{
"author": "A. Author",
"guidislink": False,
"href": "",
"id": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"link": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"links": [
{
"href": "https://www.bbc.co.uk/news/world-us-canada-48338168",
"rel": "alternate",
"type": "text/html",
}
],
"media_thumbnail": [
{
"height": "1152",
"url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg",
"width": "2048",
}
],
"published": "Mon, 20 May 2019 16:07:37 GMT",
"published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
"summary": "Foreign Minister Mohammad Javad Zarif says the US "
"president should try showing Iranians some respect.",
"summary_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"language": None,
"type": "text/html",
"value": "Foreign Minister Mohammad Javad "
"Zarif says the US president should "
"try showing Iranians some "
"respect.",
},
"title": "Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif"
"Trump's 'genocidal taunts' will not end Iran - Zarif",
"title_detail": {
"base": "http://feeds.bbci.co.uk/news/rss.xml",
"language": None,
"type": "text/plain",
"value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif",
},
}
],
"feed": {
"image": {
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
"link": "https://www.bbc.co.uk/news/",
"title": "BBC News - Home",
"language": "en-gb",
"link": "https://www.bbc.co.uk/news/",
},
"links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}],
"title": "BBC News - Home",
},
"href": "http://feeds.bbci.co.uk/news/rss.xml",
"status": 200,
"version": "rss20",
}

View file

@ -265,3 +265,31 @@ class FeedBuilderTestCase(TestCase):
self.assertTrue("<strong>" not in post.body) self.assertTrue("<strong>" not in post.body)
self.assertTrue('<a href="https://www.bbc.com">' in post.body) self.assertTrue('<a href="https://www.bbc.com">' in post.body)
self.assertTrue("<p>" in post.body) self.assertTrue("<p>" in post.body)
def test_long_author_text_is_truncated(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_long_author, mock_stream)) as builder:
builder.save()
post = Post.objects.get()
self.assertEquals(Post.objects.count(), 1)
self.assertEquals(len(post.author), 40)
def test_long_title_text_is_truncated(self):
builder = FeedBuilder
rule = CollectionRuleFactory()
mock_stream = MagicMock(rule=rule)
with builder((mock_with_long_title, mock_stream)) as builder:
builder.save()
post = Post.objects.get()
self.assertEquals(Post.objects.count(), 1)
self.assertEquals(len(post.title), 200)

View file

@ -0,0 +1,16 @@
# Generated by Django 2.2 on 2019-07-10 18:22
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("core", "0002_category_user")]
operations = [
migrations.AlterField(
model_name="post",
name="author",
field=models.CharField(blank=True, max_length=40, null=True),
)
]

View file

@ -8,7 +8,7 @@ from newsreader.news.collection.models import CollectionRule
class Post(TimeStampedModel): class Post(TimeStampedModel):
title = models.CharField(max_length=200, blank=True, null=True) title = models.CharField(max_length=200, blank=True, null=True)
body = models.TextField(blank=True, null=True) body = models.TextField(blank=True, null=True)
author = models.CharField(max_length=200, blank=True, null=True) author = models.CharField(max_length=40, blank=True, null=True)
publication_date = models.DateTimeField(blank=True, null=True) publication_date = models.DateTimeField(blank=True, null=True)
url = models.URLField(max_length=1024, blank=True, null=True) url = models.URLField(max_length=1024, blank=True, null=True)