diff --git a/src/newsreader/news/collection/feed.py b/src/newsreader/news/collection/feed.py index a7ab2fc..c7e9b6d 100644 --- a/src/newsreader/news/collection/feed.py +++ b/src/newsreader/news/collection/feed.py @@ -1,6 +1,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from typing import ContextManager, Dict, Generator, List, Optional, Tuple +from django.db.models.fields import CharField, TextField +from django.template.defaultfilters import truncatechars from django.utils import timezone import bleach @@ -63,16 +65,18 @@ class FeedBuilder(Builder): for entry in entries: data = {"rule_id": rule.pk} - for field, value in field_mapping.items(): + for field, model_field in field_mapping.items(): if field in entry: + value = self.truncate_text(model_field, entry[field]) + if field == "published_parsed": - created, aware_datetime = build_publication_date(entry[field], tz) - data[value] = aware_datetime if created else None + created, aware_datetime = build_publication_date(value, tz) + data[model_field] = aware_datetime if created else None elif field == "summary": - summary = self.sanitize_summary(entry[field]) - data[value] = summary + summary = self.sanitize_summary(value) + data[model_field] = summary else: - data[value] = entry[field] + data[model_field] = value yield Post(**data) @@ -82,6 +86,22 @@ class FeedBuilder(Builder): return bleach.clean(summary, tags=tags, attributes=attrs) if summary else None + def truncate_text(self, field_name, value): + field = Post._meta.get_field(field_name) + max_length = field.max_length + cls = type(field) + + if not value or not max_length: + return value + elif not bool(issubclass(cls, CharField) or issubclass(cls, TextField)): + return value + + if len(value) > max_length: + print(f"Truncated {field_name}") + return truncatechars(value, max_length) + + return value + def save(self) -> None: for post in self.instances: post.save() diff --git a/src/newsreader/news/collection/tests/feed/builder/mocks.py b/src/newsreader/news/collection/tests/feed/builder/mocks.py index a486626..9003f86 100644 --- a/src/newsreader/news/collection/tests/feed/builder/mocks.py +++ b/src/newsreader/news/collection/tests/feed/builder/mocks.py @@ -890,3 +890,132 @@ mock_with_html = { "status": 200, "version": "rss20", } + +mock_with_long_author = { + "bozo": 0, + "encoding": "utf-8", + "entries": [ + { + "author": "A. Author but this author name is way to long for an actual surname.", + "guidislink": False, + "href": "", + "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "links": [ + { + "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "rel": "alternate", + "type": "text/html", + } + ], + "media_thumbnail": [ + { + "height": "1152", + "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", + "width": "2048", + } + ], + "published": "Mon, 20 May 2019 16:07:37 GMT", + "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + "summary": "Foreign Minister Mohammad Javad Zarif says the US " + "president should try showing Iranians some respect.", + "summary_detail": { + "base": "http://feeds.bbci.co.uk/news/rss.xml", + "language": None, + "type": "text/html", + "value": "Foreign Minister Mohammad Javad " + "Zarif says the US president should " + "try showing Iranians some " + "respect.", + }, + "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", + "title_detail": { + "base": "http://feeds.bbci.co.uk/news/rss.xml", + "language": None, + "type": "text/plain", + "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", + }, + } + ], + "feed": { + "image": { + "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", + "link": "https://www.bbc.co.uk/news/", + "title": "BBC News - Home", + "language": "en-gb", + "link": "https://www.bbc.co.uk/news/", + }, + "links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}], + "title": "BBC News - Home", + }, + "href": "http://feeds.bbci.co.uk/news/rss.xml", + "status": 200, + "version": "rss20", +} + +mock_with_long_title = { + "bozo": 0, + "encoding": "utf-8", + "entries": [ + { + "author": "A. Author", + "guidislink": False, + "href": "", + "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "links": [ + { + "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "rel": "alternate", + "type": "text/html", + } + ], + "media_thumbnail": [ + { + "height": "1152", + "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", + "width": "2048", + } + ], + "published": "Mon, 20 May 2019 16:07:37 GMT", + "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + "summary": "Foreign Minister Mohammad Javad Zarif says the US " + "president should try showing Iranians some respect.", + "summary_detail": { + "base": "http://feeds.bbci.co.uk/news/rss.xml", + "language": None, + "type": "text/html", + "value": "Foreign Minister Mohammad Javad " + "Zarif says the US president should " + "try showing Iranians some " + "respect.", + }, + "title": "Trump's 'genocidal taunts' will not end Iran - Zarif" + "Trump's 'genocidal taunts' will not end Iran - Zarif" + "Trump's 'genocidal taunts' will not end Iran - Zarif" + "Trump's 'genocidal taunts' will not end Iran - Zarif" + "Trump's 'genocidal taunts' will not end Iran - Zarif" + "Trump's 'genocidal taunts' will not end Iran - Zarif", + "title_detail": { + "base": "http://feeds.bbci.co.uk/news/rss.xml", + "language": None, + "type": "text/plain", + "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", + }, + } + ], + "feed": { + "image": { + "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", + "link": "https://www.bbc.co.uk/news/", + "title": "BBC News - Home", + "language": "en-gb", + "link": "https://www.bbc.co.uk/news/", + }, + "links": [{"href": "https://www.bbc.co.uk/news/", "rel": "alternate", "type": "text/html"}], + "title": "BBC News - Home", + }, + "href": "http://feeds.bbci.co.uk/news/rss.xml", + "status": 200, + "version": "rss20", +} diff --git a/src/newsreader/news/collection/tests/feed/builder/tests.py b/src/newsreader/news/collection/tests/feed/builder/tests.py index 94e84ea..33aae7f 100644 --- a/src/newsreader/news/collection/tests/feed/builder/tests.py +++ b/src/newsreader/news/collection/tests/feed/builder/tests.py @@ -265,3 +265,31 @@ class FeedBuilderTestCase(TestCase): self.assertTrue("" not in post.body) self.assertTrue('' in post.body) self.assertTrue("

" in post.body) + + def test_long_author_text_is_truncated(self): + builder = FeedBuilder + rule = CollectionRuleFactory() + mock_stream = MagicMock(rule=rule) + + with builder((mock_with_long_author, mock_stream)) as builder: + builder.save() + + post = Post.objects.get() + + self.assertEquals(Post.objects.count(), 1) + + self.assertEquals(len(post.author), 40) + + def test_long_title_text_is_truncated(self): + builder = FeedBuilder + rule = CollectionRuleFactory() + mock_stream = MagicMock(rule=rule) + + with builder((mock_with_long_title, mock_stream)) as builder: + builder.save() + + post = Post.objects.get() + + self.assertEquals(Post.objects.count(), 1) + + self.assertEquals(len(post.title), 200) diff --git a/src/newsreader/news/core/migrations/0003_auto_20190710_2022.py b/src/newsreader/news/core/migrations/0003_auto_20190710_2022.py new file mode 100644 index 0000000..3c7fe84 --- /dev/null +++ b/src/newsreader/news/core/migrations/0003_auto_20190710_2022.py @@ -0,0 +1,16 @@ +# Generated by Django 2.2 on 2019-07-10 18:22 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [("core", "0002_category_user")] + + operations = [ + migrations.AlterField( + model_name="post", + name="author", + field=models.CharField(blank=True, max_length=40, null=True), + ) + ] diff --git a/src/newsreader/news/core/models.py b/src/newsreader/news/core/models.py index 704e752..c7d2638 100644 --- a/src/newsreader/news/core/models.py +++ b/src/newsreader/news/core/models.py @@ -8,7 +8,7 @@ from newsreader.news.collection.models import CollectionRule class Post(TimeStampedModel): title = models.CharField(max_length=200, blank=True, null=True) body = models.TextField(blank=True, null=True) - author = models.CharField(max_length=200, blank=True, null=True) + author = models.CharField(max_length=40, blank=True, null=True) publication_date = models.DateTimeField(blank=True, null=True) url = models.URLField(max_length=1024, blank=True, null=True)