0.2.3 #99
5 changed files with 200 additions and 7 deletions
|
|
@ -1,6 +1,8 @@
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from typing import ContextManager, Dict, Generator, List, Optional, Tuple
|
from typing import ContextManager, Dict, Generator, List, Optional, Tuple
|
||||||
|
|
||||||
|
from django.db.models.fields import CharField, TextField
|
||||||
|
from django.template.defaultfilters import truncatechars
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
import bleach
|
import bleach
|
||||||
|
|
@ -63,16 +65,18 @@ class FeedBuilder(Builder):
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
data = {"rule_id": rule.pk}
|
data = {"rule_id": rule.pk}
|
||||||
|
|
||||||
for field, value in field_mapping.items():
|
for field, model_field in field_mapping.items():
|
||||||
if field in entry:
|
if field in entry:
|
||||||
|
value = self.truncate_text(model_field, entry[field])
|
||||||
|
|
||||||
if field == "published_parsed":
|
if field == "published_parsed":
|
||||||
created, aware_datetime = build_publication_date(entry[field], tz)
|
created, aware_datetime = build_publication_date(value, tz)
|
||||||
data[value] = aware_datetime if created else None
|
data[model_field] = aware_datetime if created else None
|
||||||
elif field == "summary":
|
elif field == "summary":
|
||||||
summary = self.sanitize_summary(entry[field])
|
summary = self.sanitize_summary(value)
|
||||||
data[value] = summary
|
data[model_field] = summary
|
||||||
else:
|
else:
|
||||||
data[value] = entry[field]
|
data[model_field] = value
|
||||||
|
|
||||||
yield Post(**data)
|
yield Post(**data)
|
||||||
|
|
||||||
|
|
@ -82,6 +86,22 @@ class FeedBuilder(Builder):
|
||||||
|
|
||||||
return bleach.clean(summary, tags=tags, attributes=attrs) if summary else None
|
return bleach.clean(summary, tags=tags, attributes=attrs) if summary else None
|
||||||
|
|
||||||
|
def truncate_text(self, field_name, value):
|
||||||
|
field = Post._meta.get_field(field_name)
|
||||||
|
max_length = field.max_length
|
||||||
|
cls = type(field)
|
||||||
|
|
||||||
|
if not value or not max_length:
|
||||||
|
return value
|
||||||
|
elif not bool(issubclass(cls, CharField) or issubclass(cls, TextField)):
|
||||||
|
return value
|
||||||
|
|
||||||
|
if len(value) > max_length:
|
||||||
|
print(f"Truncated {field_name}")
|
||||||
|
return truncatechars(value, max_length)
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
def save(self) -> None:
|
def save(self) -> None:
|
||||||
for post in self.instances:
|
for post in self.instances:
|
||||||
post.save()
|
post.save()
|
||||||
|
|
|
||||||
|
|
@ -890,3 +890,132 @@ mock_with_html = {
|
||||||
"status": 200,
|
"status": 200,
|
||||||
"version": "rss20",
|
"version": "rss20",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mock_with_long_author = {
|
||||||
|
"bozo": 0,
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"entries": [
|
||||||
|
{
|
||||||
|
"author": "A. Author but this author name is way to long for an actual surname.",
|
||||||
|
"guidislink": False,
|
||||||
|
"href": "",
|
||||||
|
"id": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||||
|
"link": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||||
|
"links": [
|
||||||
|
{
|
||||||
|
"href": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||||
|
"rel": "alternate",
|
||||||
|
"type": "text/html",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"media_thumbnail": [
|
||||||
|
{
|
||||||
|
"height": "1152",
|
||||||
|
"url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg",
|
||||||
|
"width": "2048",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"published": "Mon, 20 May 2019 16:07:37 GMT",
|
||||||
|
"published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
|
||||||
|
"summary": "Foreign Minister Mohammad Javad Zarif says the US "
|
||||||
|
"president should try showing Iranians some respect.",
|
||||||
|
"summary_detail": {
|
||||||
|
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||||
|
"language": None,
|
||||||
|
"type": "text/html",
|
||||||
|
"value": "Foreign Minister Mohammad Javad "
|
||||||
|
"Zarif says the US president should "
|
||||||
|
"try showing Iranians some "
|
||||||
|
"respect.",
|
||||||
|
},
|
||||||
|
"title": "Trump's 'genocidal taunts' will not end Iran - Zarif",
|
||||||
|
"title_detail": {
|
||||||
|
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||||
|
"language": None,
|
||||||
|
"type": "text/plain",
|
||||||
|
"value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"feed": {
|
||||||
|
"image": {
|
||||||
|
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
|
||||||
|
"link": "https://www.bbc.co.uk/news/",
|
||||||
|
"title": "BBC News - Home",
|
||||||
|
"language": "en-gb",
|
||||||
|
"link": "https://www.bbc.co.uk/news/",
|
||||||
|
},
|
||||||
|
"links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}],
|
||||||
|
"title": "BBC News - Home",
|
||||||
|
},
|
||||||
|
"href": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||||
|
"status": 200,
|
||||||
|
"version": "rss20",
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_with_long_title = {
|
||||||
|
"bozo": 0,
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"entries": [
|
||||||
|
{
|
||||||
|
"author": "A. Author",
|
||||||
|
"guidislink": False,
|
||||||
|
"href": "",
|
||||||
|
"id": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||||
|
"link": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||||
|
"links": [
|
||||||
|
{
|
||||||
|
"href": "https://www.bbc.co.uk/news/world-us-canada-48338168",
|
||||||
|
"rel": "alternate",
|
||||||
|
"type": "text/html",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"media_thumbnail": [
|
||||||
|
{
|
||||||
|
"height": "1152",
|
||||||
|
"url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg",
|
||||||
|
"width": "2048",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"published": "Mon, 20 May 2019 16:07:37 GMT",
|
||||||
|
"published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)),
|
||||||
|
"summary": "Foreign Minister Mohammad Javad Zarif says the US "
|
||||||
|
"president should try showing Iranians some respect.",
|
||||||
|
"summary_detail": {
|
||||||
|
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||||
|
"language": None,
|
||||||
|
"type": "text/html",
|
||||||
|
"value": "Foreign Minister Mohammad Javad "
|
||||||
|
"Zarif says the US president should "
|
||||||
|
"try showing Iranians some "
|
||||||
|
"respect.",
|
||||||
|
},
|
||||||
|
"title": "Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||||
|
"Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||||
|
"Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||||
|
"Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||||
|
"Trump's 'genocidal taunts' will not end Iran - Zarif"
|
||||||
|
"Trump's 'genocidal taunts' will not end Iran - Zarif",
|
||||||
|
"title_detail": {
|
||||||
|
"base": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||||
|
"language": None,
|
||||||
|
"type": "text/plain",
|
||||||
|
"value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"feed": {
|
||||||
|
"image": {
|
||||||
|
"href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif",
|
||||||
|
"link": "https://www.bbc.co.uk/news/",
|
||||||
|
"title": "BBC News - Home",
|
||||||
|
"language": "en-gb",
|
||||||
|
"link": "https://www.bbc.co.uk/news/",
|
||||||
|
},
|
||||||
|
"links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}],
|
||||||
|
"title": "BBC News - Home",
|
||||||
|
},
|
||||||
|
"href": "http://feeds.bbci.co.uk/news/rss.xml",
|
||||||
|
"status": 200,
|
||||||
|
"version": "rss20",
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -265,3 +265,31 @@ class FeedBuilderTestCase(TestCase):
|
||||||
self.assertTrue("<strong>" not in post.body)
|
self.assertTrue("<strong>" not in post.body)
|
||||||
self.assertTrue('<a href="https://www.bbc.com">' in post.body)
|
self.assertTrue('<a href="https://www.bbc.com">' in post.body)
|
||||||
self.assertTrue("<p>" in post.body)
|
self.assertTrue("<p>" in post.body)
|
||||||
|
|
||||||
|
def test_long_author_text_is_truncated(self):
|
||||||
|
builder = FeedBuilder
|
||||||
|
rule = CollectionRuleFactory()
|
||||||
|
mock_stream = MagicMock(rule=rule)
|
||||||
|
|
||||||
|
with builder((mock_with_long_author, mock_stream)) as builder:
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
post = Post.objects.get()
|
||||||
|
|
||||||
|
self.assertEquals(Post.objects.count(), 1)
|
||||||
|
|
||||||
|
self.assertEquals(len(post.author), 40)
|
||||||
|
|
||||||
|
def test_long_title_text_is_truncated(self):
|
||||||
|
builder = FeedBuilder
|
||||||
|
rule = CollectionRuleFactory()
|
||||||
|
mock_stream = MagicMock(rule=rule)
|
||||||
|
|
||||||
|
with builder((mock_with_long_title, mock_stream)) as builder:
|
||||||
|
builder.save()
|
||||||
|
|
||||||
|
post = Post.objects.get()
|
||||||
|
|
||||||
|
self.assertEquals(Post.objects.count(), 1)
|
||||||
|
|
||||||
|
self.assertEquals(len(post.title), 200)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Generated by Django 2.2 on 2019-07-10 18:22
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [("core", "0002_category_user")]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="post",
|
||||||
|
name="author",
|
||||||
|
field=models.CharField(blank=True, max_length=40, null=True),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
@ -8,7 +8,7 @@ from newsreader.news.collection.models import CollectionRule
|
||||||
class Post(TimeStampedModel):
|
class Post(TimeStampedModel):
|
||||||
title = models.CharField(max_length=200, blank=True, null=True)
|
title = models.CharField(max_length=200, blank=True, null=True)
|
||||||
body = models.TextField(blank=True, null=True)
|
body = models.TextField(blank=True, null=True)
|
||||||
author = models.CharField(max_length=200, blank=True, null=True)
|
author = models.CharField(max_length=40, blank=True, null=True)
|
||||||
publication_date = models.DateTimeField(blank=True, null=True)
|
publication_date = models.DateTimeField(blank=True, null=True)
|
||||||
url = models.URLField(max_length=1024, blank=True, null=True)
|
url = models.URLField(max_length=1024, blank=True, null=True)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue