From c13f968234749935e200d3b2f93cc18df4265c13 Mon Sep 17 00:00:00 2001 From: sonny Date: Mon, 3 Feb 2020 20:42:31 +0100 Subject: [PATCH] Resolve "Whitelist more HTML elements" --- src/newsreader/news/collection/constants.py | 28 + src/newsreader/news/collection/feed.py | 48 +- .../tests/feed/builder/mock_html.py | 8 +- .../collection/tests/feed/builder/mocks.py | 930 ++---------------- .../collection/tests/feed/builder/tests.py | 55 +- 5 files changed, 211 insertions(+), 858 deletions(-) create mode 100644 src/newsreader/news/collection/constants.py diff --git a/src/newsreader/news/collection/constants.py b/src/newsreader/news/collection/constants.py new file mode 100644 index 0000000..eade898 --- /dev/null +++ b/src/newsreader/news/collection/constants.py @@ -0,0 +1,28 @@ +from bleach.sanitizer import ALLOWED_ATTRIBUTES as BLEACH_ATTRIBUTES +from bleach.sanitizer import ALLOWED_TAGS as BLEACH_TAGS + + +WHITELISTED_TAGS = ( + *BLEACH_TAGS, + "h1", + "h2", + "h3", + "article", + "p", + "img", + "figure", + "small", + "picture", + "b", + "video", + "source", + "div", + "body", +) + +WHITELISTED_ATTRIBUTES = { + **BLEACH_ATTRIBUTES, + "a": ["href", "rel"], + "img": ["alt", "src"], + "source": ["srcset", "media", "src", "type"], +} diff --git a/src/newsreader/news/collection/feed.py b/src/newsreader/news/collection/feed.py index 09acac0..5d80256 100644 --- a/src/newsreader/news/collection/feed.py +++ b/src/newsreader/news/collection/feed.py @@ -11,6 +11,7 @@ import pytz from feedparser import parse from newsreader.news.collection.base import Builder, Client, Collector, Stream +from newsreader.news.collection.constants import WHITELISTED_ATTRIBUTES, WHITELISTED_TAGS from newsreader.news.collection.exceptions import ( StreamDeniedException, StreamException, @@ -66,25 +67,40 @@ class FeedBuilder(Builder): data = {"rule_id": rule.pk} for field, model_field in field_mapping.items(): - if field in entry: - value = self.truncate_text(model_field, entry[field]) + if not field in entry: + continue - if field == "published_parsed": - aware_datetime, created = build_publication_date(value, tz) - data[model_field] = aware_datetime if created else None - elif field == "summary": - summary = self.sanitize_summary(value) - data[model_field] = summary - else: - data[model_field] = value + value = self.truncate_text(model_field, entry[field]) + + if field == "published_parsed": + aware_datetime, created = build_publication_date(value, tz) + data[model_field] = aware_datetime if created else None + elif field == "summary": + summary = self.sanitize_fragment(value) + data[model_field] = summary + else: + data[model_field] = value + + if "content" in entry: + content = self.get_content(entry["content"]) + body = data.get("body", "") + + if not body or len(body) < len(content): + data["body"] = content yield Post(**data) - def sanitize_summary(self, summary: str) -> Optional[str]: - attrs = {"a": ["href", "rel"], "img": ["alt", "src"]} - tags = ["a", "img", "p"] + def sanitize_fragment(self, fragment: str) -> Optional[str]: + if not fragment: + return "" - return bleach.clean(summary, tags=tags, attributes=attrs) if summary else None + return bleach.clean( + fragment, + tags=WHITELISTED_TAGS, + attributes=WHITELISTED_ATTRIBUTES, + strip=True, + strip_comments=True, + ) def truncate_text(self, field_name, value): field = Post._meta.get_field(field_name) @@ -101,6 +117,10 @@ class FeedBuilder(Builder): return value + def get_content(self, items: List) -> str: + content = "\n ".join([item.get("value") for item in items]) + return self.sanitize_fragment(content) + def save(self) -> None: for post in self.instances: post.save() diff --git a/src/newsreader/news/collection/tests/feed/builder/mock_html.py b/src/newsreader/news/collection/tests/feed/builder/mock_html.py index 44d46f7..0b814a4 100644 --- a/src/newsreader/news/collection/tests/feed/builder/mock_html.py +++ b/src/newsreader/news/collection/tests/feed/builder/mock_html.py @@ -2,9 +2,13 @@ html_summary = """
-

This is clickbait

-

This is clickbait

+

This is clickbait

+

This is clickbait

+
+ + + """ diff --git a/src/newsreader/news/collection/tests/feed/builder/mocks.py b/src/newsreader/news/collection/tests/feed/builder/mocks.py index 945347b..83f7d0b 100644 --- a/src/newsreader/news/collection/tests/feed/builder/mocks.py +++ b/src/newsreader/news/collection/tests/feed/builder/mocks.py @@ -4,1084 +4,340 @@ from .mock_html import html_summary simple_mock = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "summary": "Foreign Minister Mohammad Javad Zarif says the US " "president should try showing Iranians some respect.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, } - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } multiple_mock = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "summary": "Foreign Minister Mohammad Javad Zarif says the US " "president should try showing Iranians some respect.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, }, { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/technology-48334739", "link": "https://www.bbc.co.uk/news/technology-48334739", - "links": [ - { - "href": "https://www.bbc.co.uk/news/technology-48334739", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "432", - "url": "http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg", - "width": "768", - } - ], "published": "Mon, 20 May 2019 12:19:19 GMT", "published_parsed": struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0)), "summary": "Google's move to end business ties with Huawei will " "affect current devices and future purchases.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Google's move to end business ties " - "with Huawei will affect current " - "devices and future purchases.", - }, "title": "Huawei's Android loss: How it affects you", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Huawei's Android loss: How it " "affects you", - }, }, { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080", "link": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080", - "links": [ - { - "href": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "549", - "url": "http://c.files.bbci.co.uk/11D67/production/_107036037_lgbtheadjpg.jpg", - "width": "976", - } - ], "published": "Mon, 20 May 2019 16:32:38 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), "summary": "Police are investigating the messages while an MP " 'calls for a protest exclusion zone "to protect ' 'children".', - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Police are investigating the " - "messages while an MP calls for a " - 'protest exclusion zone "to protect ' - 'children".', - }, "title": "Birmingham head teacher threatened over LGBT lessons", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Birmingham head teacher threatened " "over LGBT lessons", - }, }, - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } mock_without_identifier = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author", - "guidislink": False, - "href": "", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "summary": "Foreign Minister Mohammad Javad Zarif says the US " "president should try showing Iranians some respect.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, }, { "author": "A. Author", - "guidislink": False, - "href": "", "id": None, "link": "https://www.bbc.co.uk/news/technology-48334739", - "links": [ - { - "href": "https://www.bbc.co.uk/news/technology-48334739", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "432", - "url": "http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg", - "width": "768", - } - ], "published": "Mon, 20 May 2019 12:19:19 GMT", "published_parsed": struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0)), "summary": "Google's move to end business ties with Huawei will " "affect current devices and future purchases.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Google's move to end business ties " - "with Huawei will affect current " - "devices and future purchases.", - }, "title": "Huawei's Android loss: How it affects you", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Huawei's Android loss: How it " "affects you", - }, }, - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } mock_without_publish_date = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": None, "published_parsed": None, "summary": "Foreign Minister Mohammad Javad Zarif says the US " "president should try showing Iranians some respect.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, }, { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/technology-48334739", "link": "https://www.bbc.co.uk/news/technology-48334739", - "links": [ - { - "href": "https://www.bbc.co.uk/news/technology-48334739", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "432", - "url": "http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg", - "width": "768", - } - ], "summary": "Google's move to end business ties with Huawei will " "affect current devices and future purchases.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Google's move to end business ties " - "with Huawei will affect current " - "devices and future purchases.", - }, "title": "Huawei's Android loss: How it affects you", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Huawei's Android loss: How it " "affects you", - }, }, - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } mock_without_url = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "published": None, "published_parsed": None, "summary": "Foreign Minister Mohammad Javad Zarif says the US " "president should try showing Iranians some respect.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, }, { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/technology-48334739", "link": None, - "links": [], - "media_thumbnail": [ - { - "height": "432", - "url": "http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg", - "width": "768", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "summary": "Google's move to end business ties with Huawei will " "affect current devices and future purchases.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Google's move to end business ties " - "with Huawei will affect current " - "devices and future purchases.", - }, "title": "Huawei's Android loss: How it affects you", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Huawei's Android loss: How it " "affects you", - }, }, - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } mock_without_body = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, }, { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080", "link": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080", - "links": [ - { - "href": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "549", - "url": "http://c.files.bbci.co.uk/11D67/production/_107036037_lgbtheadjpg.jpg", - "width": "976", - } - ], "published": "Mon, 20 May 2019 16:32:38 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), "summary": None, - "summary_detail": {}, "title": "Birmingham head teacher threatened over LGBT lessons", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Birmingham head teacher threatened " "over LGBT lessons", - }, }, - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } mock_without_author = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "summary": "Foreign Minister Mohammad Javad Zarif says the US " "president should try showing Iranians some respect.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, }, { "author": None, - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/technology-48334739", "link": "https://www.bbc.co.uk/news/technology-48334739", - "links": [ - { - "href": "https://www.bbc.co.uk/news/technology-48334739", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "432", - "url": "http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg", - "width": "768", - } - ], "published": "Mon, 20 May 2019 12:19:19 GMT", "published_parsed": struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0)), "summary": "Google's move to end business ties with Huawei will " "affect current devices and future purchases.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Google's move to end business ties " - "with Huawei will affect current " - "devices and future purchases.", - }, "title": "Huawei's Android loss: How it affects you", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Huawei's Android loss: How it " "affects you", - }, }, - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } mock_without_entries = {"entries": []} mock_with_update_entries = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author", - "guidislink": False, - "href": "", "id": "28f79ae4-8f9a-11e9-b143-00163ef6bee7", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "summary": "Foreign Minister Mohammad Javad Zarif says the US " "president should try showing Iranians some respect.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, }, { "author": "A. Author", - "guidislink": False, - "href": "", "id": "a5479c66-8fae-11e9-8422-00163ef6bee7", "link": "https://www.bbc.co.uk/news/technology-48334739", - "links": [ - { - "href": "https://www.bbc.co.uk/news/technology-48334739", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "432", - "url": "http://c.files.bbci.co.uk/4789/production/_107031381_mediaitem107028670.jpg", - "width": "768", - } - ], "published": "Mon, 20 May 2019 12:19:19 GMT", "published_parsed": struct_time((2019, 5, 20, 12, 19, 19, 0, 140, 0)), "summary": "Google's move to end business ties with Huawei will " "affect current devices and future purchases.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Google's move to end business ties " - "with Huawei will affect current " - "devices and future purchases.", - }, "title": "Huawei's Android loss: How it affects you", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Huawei's Android loss: How it " "affects you", - }, }, { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080", "link": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080", - "links": [ - { - "href": "https://www.bbc.co.uk/news/uk-england-birmingham-48339080", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "549", - "url": "http://c.files.bbci.co.uk/11D67/production/_107036037_lgbtheadjpg.jpg", - "width": "976", - } - ], "published": "Mon, 20 May 2019 16:32:38 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 32, 38, 0, 140, 0)), "summary": "Police are investigating the messages while an MP " 'calls for a protest exclusion zone "to protect ' 'children".', - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Police are investigating the " - "messages while an MP calls for a " - 'protest exclusion zone "to protect ' - 'children".', - }, "title": "Birmingham head teacher threatened over LGBT lessons", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Birmingham head teacher threatened " "over LGBT lessons", - }, }, - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } mock_with_html = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "summary": html_summary, - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, } - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } mock_with_long_author = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author but this author name is way to long for an actual surname.", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "summary": "Foreign Minister Mohammad Javad Zarif says the US " "president should try showing Iranians some respect.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, } - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] } mock_with_long_title = { - "bozo": 0, - "encoding": "utf-8", "entries": [ { "author": "A. Author", - "guidislink": False, - "href": "", "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "links": [ - { - "href": "https://www.bbc.co.uk/news/world-us-canada-48338168", - "rel": "alternate", - "type": "text/html", - } - ], - "media_thumbnail": [ - { - "height": "1152", - "url": "http://c.files.bbci.co.uk/7605/production/_107031203_mediaitem107031202.jpg", - "width": "2048", - } - ], "published": "Mon, 20 May 2019 16:07:37 GMT", "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), "summary": "Foreign Minister Mohammad Javad Zarif says the US " "president should try showing Iranians some respect.", - "summary_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/html", - "value": "Foreign Minister Mohammad Javad " - "Zarif says the US president should " - "try showing Iranians some " - "respect.", - }, "title": "Trump's 'genocidal taunts' will not end Iran - Zarif" "Trump's 'genocidal taunts' will not end Iran - Zarif" "Trump's 'genocidal taunts' will not end Iran - Zarif" "Trump's 'genocidal taunts' will not end Iran - Zarif" "Trump's 'genocidal taunts' will not end Iran - Zarif" "Trump's 'genocidal taunts' will not end Iran - Zarif", - "title_detail": { - "base": "http://feeds.bbci.co.uk/news/rss.xml", - "language": None, - "type": "text/plain", - "value": "Trump's 'genocidal taunts' will not " "end Iran - Zarif", - }, } - ], - "feed": { - "image": { - "href": "https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif", - "link": "https://www.bbc.co.uk/news/", - "title": "BBC News - Home", - "language": "en-gb", - "link": "https://www.bbc.co.uk/news/", - }, - "links": [ - { - "href": "https://www.bbc.co.uk/news/", - "rel": "alternate", - "type": "text/html", - } - ], - "title": "BBC News - Home", - }, - "href": "http://feeds.bbci.co.uk/news/rss.xml", - "status": 200, - "version": "rss20", + ] +} + +mock_with_longer_content_detail = { + "entries": [ + { + "author": "A. Author", + "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "published": "Mon, 20 May 2019 16:07:37 GMT", + "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + "summary": "Foreign Minister Mohammad Javad Zarif says the US " + "president should try showing Iranians some respect.", + "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", + "content": [ + { + "base": "", + "language": None, + "type": "text/html", + "value": '
\n' + '

Enlarge / Ajit Pai, chairman ' + "of the Federal Communications Commission, " + "during an interview in New York, on " + "Tuesday, Nov. 5, 2019. (credit: Getty ' + "Images | Bloomberg)

" + "
", + } + ], + } + ] +} + +mock_with_shorter_content_detail = { + "entries": [ + { + "author": "A. Author", + "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "published": "Mon, 20 May 2019 16:07:37 GMT", + "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + "summary": "Foreign Minister Mohammad Javad Zarif says the US " + "president should try showing Iranians some respect.", + "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", + "content": [ + { + "base": "", + "language": None, + "type": "text/html", + "value": '
', + } + ], + } + ] +} + +mock_with_multiple_content_detail = { + "entries": [ + { + "author": "A. Author", + "id": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "link": "https://www.bbc.co.uk/news/world-us-canada-48338168", + "published": "Mon, 20 May 2019 16:07:37 GMT", + "published_parsed": struct_time((2019, 5, 20, 16, 7, 37, 0, 140, 0)), + "summary": "Foreign Min", + "title": "Trump's 'genocidal taunts' will not end Iran - Zarif", + "content": [ + {"base": "", "language": None, "type": "text/html", "value": "Yippie"}, + {"base": "", "language": None, "type": "text/html", "value": "Ya"}, + {"base": "", "language": None, "type": "text/html", "value": "Yee"}, + ], + } + ] } diff --git a/src/newsreader/news/collection/tests/feed/builder/tests.py b/src/newsreader/news/collection/tests/feed/builder/tests.py index 2f09591..519a047 100644 --- a/src/newsreader/news/collection/tests/feed/builder/tests.py +++ b/src/newsreader/news/collection/tests/feed/builder/tests.py @@ -282,14 +282,16 @@ class FeedBuilderTestCase(TestCase): self.assertEquals(Post.objects.count(), 1) - self.assertTrue("" not in post.body) - self.assertTrue("" not in post.body) - self.assertTrue("
" not in post.body) - self.assertTrue("

" not in post.body) - self.assertTrue("" not in post.body) + self.assertTrue("
" in post.body) + self.assertTrue("

" in post.body) + self.assertTrue("" in post.body) self.assertTrue('' in post.body) self.assertTrue("

" in post.body) + self.assertTrue("" not in post.body) + self.assertTrue("