0.3.3

- Update static configuration - Builder refactor - Fix for images stretching to far
2020-10-17 13:19:49 +02:00 · 2020-10-17 13:19:49 +02:00 · 9e5e05c056
commit 9e5e05c056
parent b6921a20e7
15 changed files with 568 additions and 341 deletions
--- a/src/newsreader/fixtures/default-fixture.json
+++ b/src/newsreader/fixtures/default-fixture.json
@ -3427,7 +3427,6 @@
        "is_active": true,
        "date_joined": "2019-07-18T18:52:36.080Z",
        "email": "sonny@bakker.nl",
-        "task": 10,
        "reddit_refresh_token": null,
        "reddit_access_token": null,
        "groups": [],
--- a/src/newsreader/news/collection/exceptions/init.py
+++ b/src/newsreader/news/collection/exceptions/init.py
@ -0,0 +1,16 @@
+from newsreader.news.collection.exceptions.builder import (
+    BuilderDuplicateException,
+    BuilderException,
+    BuilderMissingDataException,
+    BuilderParseException,
+)
+from newsreader.news.collection.exceptions.stream import (
+    StreamConnectionException,
+    StreamDeniedException,
+    StreamException,
+    StreamForbiddenException,
+    StreamNotFoundException,
+    StreamParseException,
+    StreamTimeOutException,
+    StreamTooManyException,
+)
--- a/src/newsreader/news/collection/exceptions/builder.py
+++ b/src/newsreader/news/collection/exceptions/builder.py
@ -0,0 +1,21 @@
+class BuilderException(Exception):
+    message = "Builder exception"
+
+    def __init__(self, payload=None, message=None):
+        self.payload = payload
+        self.message = message if message else self.message
+
+    def __str__(self):
+        return self.message
+
+
+class BuilderMissingDataException(BuilderException):
+    message = "Payload contains missing data"
+
+
+class BuilderDuplicateException(BuilderException):
+    message = "Payload contains duplicate entry"
+
+
+class BuilderParseException(BuilderException):
+    message = "Failed to parse payload"
--- a/src/newsreader/news/collection/exceptions/stream.py
+++ b/src/newsreader/news/collection/exceptions/stream.py
--- a/src/newsreader/news/collection/feed.py
+++ b/src/newsreader/news/collection/feed.py
@ -39,6 +39,18 @@ class FeedBuilder(PostBuilder):
    rule__type = RuleTypeChoices.feed

    def build(self):
+        instances = []
+
+        with FeedDuplicateHandler(self.stream.rule) as duplicate_handler:
+            entries = self.payload.get("entries", [])
+
+            for entry in entries:
+                post = self.build_post(entry)
+                instances.append(post)
+
+            self.instances = duplicate_handler.check(instances)
+
+    def build_post(self, entry):
        field_mapping = {
            "id": "remote_identifier",
            "title": "title",
@ -48,41 +60,37 @@ class FeedBuilder(PostBuilder):
            "author": "author",
        }
        tz = pytz.timezone(self.stream.rule.timezone)
-        instances = []
+        data = {"rule_id": self.stream.rule.pk}

-        with FeedDuplicateHandler(self.stream.rule) as duplicate_handler:
-            entries = self.payload.get("entries", [])
+        for field, model_field in field_mapping.items():
+            if not field in entry:
+                continue

-            for entry in entries:
-                data = {"rule_id": self.stream.rule.pk}
+            value = truncate_text(Post, model_field, entry[field])

-                for field, model_field in field_mapping.items():
-                    if not field in entry:
-                        continue
+            if field == "published_parsed":
+                data[model_field] = build_publication_date(value, tz)
+            elif field == "summary":
+                data[model_field] = self.sanitize_fragment(value)
+            else:
+                data[model_field] = value

-                    value = truncate_text(Post, model_field, entry[field])
+        content_details = self.get_content_details(entry)

-                    if field == "published_parsed":
-                        data[model_field] = build_publication_date(value, tz)
-                    elif field == "summary":
-                        data[model_field] = self.sanitize_fragment(value)
-                    else:
-                        data[model_field] = value
+        # use content details key if it contains more information
+        if not "body" in data or len(data["body"]) < len(content_details):
+            data["body"] = content_details

-                if "content" in entry:
-                    content = self.get_content(entry["content"])
-                    body = data.get("body", "")
+        return Post(**data)

-                    if not body or len(body) < len(content):
-                        data["body"] = content
+    def get_content_details(self, entry):
+        content_items = entry.get("content")

-                instances.append(Post(**data))
+        if not content_items:
+            return ""

-            self.instances = duplicate_handler.check(instances)
-
-    def get_content(self, items):
-        content = "\n ".join([item.get("value") for item in items])
-        return self.sanitize_fragment(content)
+        content_details = "\n ".join([item.get("value") for item in content_items])
+        return self.sanitize_fragment(content_details)


 class FeedStream(PostStream):
--- a/src/newsreader/news/collection/reddit.py
+++ b/src/newsreader/news/collection/reddit.py
@ -28,6 +28,10 @@ from newsreader.news.collection.constants import (
    WHITELISTED_TAGS,
 )
 from newsreader.news.collection.exceptions import (
+    BuilderDuplicateException,
+    BuilderException,
+    BuilderMissingDataException,
+    BuilderParseException,
    StreamDeniedException,
    StreamException,
    StreamParseException,
@ -122,99 +126,136 @@ class RedditBuilder(PostBuilder):
        if not "data" in self.payload or not "children" in self.payload["data"]:
            return

-        posts = self.payload["data"]["children"]
-        rule = self.stream.rule
-
-        for post in posts:
-            if not "data" in post or post["kind"] != REDDIT_POST:
-                continue
-
-            data = post["data"]
-
-            remote_identifier = data["id"]
-            title = truncate_text(Post, "title", data["title"])
-            author = truncate_text(Post, "author", data["author"])
-            post_url_fragment = data["permalink"]
-            direct_url = data["url"]
-            is_text_post = data["is_self"]
-
-            if remote_identifier in results:
-                continue
-
-            if is_text_post:
-                uncleaned_body = data["selftext_html"]
-                unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
-                body = self.sanitize_fragment(unescaped_body) if unescaped_body else ""
-            elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
-                body = format_html(
-                    "<div><img alt='{title}' src='{url}' loading='lazy' /></div>",
-                    url=direct_url,
-                    title=title,
-                )
-            elif data["is_video"]:
-                video_info = data["secure_media"]["reddit_video"]
-
-                body = format_html(
-                    "<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
-                    url=video_info["fallback_url"],
-                )
-            elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
-                extension = next(
-                    extension.replace(".", "")
-                    for extension in REDDIT_VIDEO_EXTENSIONS
-                    if direct_url.endswith(extension)
-                )
-
-                if extension == "gifv":
-                    body = format_html(
-                        "<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
-                        url=direct_url.replace(extension, "mp4"),
-                    )
-                else:
-                    body = format_html(
-                        "<div><video controls muted><source src='{url}' type='video/{extension}' /></video></div>",
-                        url=direct_url,
-                        extension=extension,
-                    )
-            else:
-                body = format_html(
-                    "<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>",
-                    url=direct_url,
-                    title=title,
-                )
+        entries = self.payload["data"]["children"]

+        for entry in entries:
            try:
-                parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
-                created_date = pytz.utc.localize(parsed_date)
-            except (OverflowError, OSError):
-                logging.warning(
-                    f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}"
-                )
-                created_date = timezone.now()
-
-            post_data = {
-                "remote_identifier": remote_identifier,
-                "title": title,
-                "body": body,
-                "author": author,
-                "url": f"{REDDIT_URL}{post_url_fragment}",
-                "publication_date": created_date,
-                "rule": rule,
-            }
-
-            if remote_identifier in self.existing_posts:
-                existing_post = self.existing_posts[remote_identifier]
-
-                for key, value in post_data.items():
-                    setattr(existing_post, key, value)
-
-                results[existing_post.remote_identifier] = existing_post
+                post = self.build_post(entry)
+            except BuilderException:
+                logger.exception("Failed building post")
                continue

-            results[remote_identifier] = Post(**post_data)
+            identifier = post.remote_identifier
+            results[identifier] = post

        self.instances = results.values()

+    def build_post(self, entry):
+        rule = self.stream.rule
+        entry_data = entry.get("data", {})
+        remote_identifier = entry_data.get("id", "")
+        kind = entry.get("kind")
+
+        if remote_identifier in self.existing_posts:
+            raise BuilderDuplicateException(payload=entry)
+        elif kind != REDDIT_POST:
+            raise BuilderParseException(
+                message=f"Payload is not an reddit post, its of kind {kind}",
+                payload=entry,
+            )
+        elif not entry_data:
+            raise BuilderMissingDataException(
+                message=f"Post {remote_identifier} did not contain any data",
+                payload=entry,
+            )
+
+        try:
+            title = entry_data["title"]
+            author = entry_data["author"]
+            post_url_fragment = entry_data["permalink"]
+            direct_url = entry_data["url"]
+            is_text = entry_data["is_self"]
+            is_video = entry_data["is_video"]
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=entry) from e
+
+        title = truncate_text(Post, "title", title)
+        author = truncate_text(Post, "author", author)
+
+        if is_text:
+            body = self.get_text_post(entry_data)
+        elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
+            body = self.get_image_post(title, direct_url)
+        elif is_video:
+            body = self.get_native_video_post(entry_data)
+        elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
+            body = self.get_video_post(direct_url)
+        else:
+            body = self.get_url_post(title, direct_url)
+
+        try:
+            parsed_date = datetime.fromtimestamp(entry_data["created_utc"])
+            created_date = pytz.utc.localize(parsed_date)
+        except (OverflowError, OSError) as e:
+            raise BuilderParseException(payload=entry) from e
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=entry) from e
+
+        post_entry = {
+            "remote_identifier": remote_identifier,
+            "title": title,
+            "body": body,
+            "author": author,
+            "url": f"{REDDIT_URL}{post_url_fragment}",
+            "publication_date": created_date,
+            "rule": rule,
+        }
+
+        return Post(**post_entry)
+
+    def get_text_post(self, entry):
+        try:
+            uncleaned_body = entry["selftext_html"]
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=entry) from e
+
+        unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
+        return self.sanitize_fragment(unescaped_body) if unescaped_body else ""
+
+    def get_image_post(self, title, url):
+        return format_html(
+            "<div><img alt='{title}' src='{url}' loading='lazy' /></div>",
+            url=url,
+            title=title,
+        )
+
+    def get_native_video_post(self, entry):
+        try:
+            video_info = entry["secure_media"]["reddit_video"]
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=entry) from e
+
+        return format_html(
+            "<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
+            url=video_info["fallback_url"],
+        )
+
+    def get_video_post(self, url):
+        extension = next(
+            extension.replace(".", "")
+            for extension in REDDIT_VIDEO_EXTENSIONS
+            if url.endswith(extension)
+        )
+
+        if extension == "gifv":
+            return format_html(
+                "<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
+                url=url.replace(extension, "mp4"),
+            )
+
+        return format_html(
+            "<div><video controls muted><source src='{url}' type='video/{extension}' /></video></div>",
+            url=url,
+            extension=extension,
+        )
+
+    def get_url_post(self, title, url):
+        return format_html(
+            "<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>",
+            url=url,
+            title=title,
+        )
+

 class RedditStream(PostStream):
    rule_type = RuleTypeChoices.subreddit
--- a/src/newsreader/news/collection/templates/news/collection/views/rules.html
+++ b/src/newsreader/news/collection/templates/news/collection/views/rules.html
@ -6,19 +6,21 @@
    <form class="form rules-form">
      {% csrf_token %}

+      <section class="section form__section form__section--actions">
+        <div class="form__actions">
+          <a class="link button button--confirm" href="{% url "news:collection:feed-create" %}">{% trans "Add a feed" %}</a>
+          <a class="link button button--confirm" href="{% url "news:collection:import" %}">{% trans "Import feeds" %}</a>
+          <a class="link button button--reddit" href="{% url "news:collection:subreddit-create" %}">{% trans "Add a subreddit" %}</a>
+          <a class="link button button--twitter" href="{% url "news:collection:twitter-timeline-create" %}">{% trans "Add a Twitter profile" %}</a>
+        </div>
+      </section>
+
      <section class="section form__section form__section--actions">
        <fieldset class="fieldset form__fieldset">
          <input type="submit" class="button button--primary" formaction="{% url "news:collection:rules-enable" %}" formmethod="post" value="{% trans "Enable" %}" />
          <input type="submit" class="button button--primary" formaction="{% url "news:collection:rules-disable" %}" formmethod="post" value="{% trans "Disable" %}" />
          <input type="submit" class="button button--error" formaction="{% url "news:collection:rules-delete" %}" formmethod="post" value="{% trans "Delete" %}"/>
        </fieldset>
-
-        <div class="form__actions">
-          <a class="link button button--confirm" href="{% url "news:collection:feed-create" %}">{% trans "Add a feed" %}</a>
-          <a class="link button button--reddit" href="{% url "news:collection:subreddit-create" %}">{% trans "Add a subreddit" %}</a>
-          <a class="link button button--twitter" href="{% url "news:collection:twitter-timeline-create" %}">{% trans "Add a Twitter profile" %}</a>
-          <a class="link button button--confirm" href="{% url "news:collection:import" %}">{% trans "Import rules" %}</a>
-        </div>
      </section>

      <section class="section form__section">
--- a/src/newsreader/news/collection/tests/feed/builder/tests.py
+++ b/src/newsreader/news/collection/tests/feed/builder/tests.py
@ -1,4 +1,4 @@
-from datetime import date, datetime, time
+from datetime import datetime
 from unittest.mock import Mock

 from django.test import TestCase
@ -21,277 +21,233 @@ class FeedBuilderTestCase(TestCase):
    def setUp(self):
        self.maxDiff = None

-    def test_basic_entry(self):
-        builder = FeedBuilder
-        rule = FeedFactory()
-        mock_stream = Mock(rule=rule)
-
-        with builder(simple_mock, mock_stream) as builder:
-            builder.build()
-            builder.save()
-
-        post = Post.objects.get()
-
-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=16, minute=7, second=37)
-        )
-        aware_date = pytz.utc.localize(publication_date)
-
-        self.assertEquals(post.publication_date, aware_date)
-        self.assertEquals(Post.objects.count(), 1)
-
-        self.assertEquals(
-            post.remote_identifier,
-            "https://www.bbc.co.uk/news/world-us-canada-48338168",
-        )
-
-        self.assertEquals(
-            post.url, "https://www.bbc.co.uk/news/world-us-canada-48338168"
-        )
-
-        self.assertEquals(
-            post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif"
-        )
-
    def test_multiple_entries(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(multiple_mock, mock_stream) as builder:
+        with FeedBuilder(multiple_mock, mock_stream) as builder:
            builder.build()
            builder.save()

        posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 3)
+        self.assertEqual(Post.objects.count(), 3)

        post = posts[0]

-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=16, minute=32, second=38)
+        publication_date = datetime(
+            2019, 5, 20, hour=16, minute=32, second=38, tzinfo=pytz.utc
        )
-        aware_date = pytz.utc.localize(publication_date)

-        self.assertEquals(
+        self.assertEqual(
            post.publication_date.strftime("%Y-%m-%d %H:%M:%S"),
-            aware_date.strftime("%Y-%m-%d %H:%M:%S"),
+            publication_date.strftime("%Y-%m-%d %H:%M:%S"),
        )

-        self.assertEquals(
+        self.assertEqual(
            post.remote_identifier,
            "https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
        )

-        self.assertEquals(
+        self.assertEqual(
            post.url, "https://www.bbc.co.uk/news/uk-england-birmingham-48339080"
        )

-        self.assertEquals(
+        self.assertEqual(
            post.title, "Birmingham head teacher threatened over LGBT lessons"
        )

        post = posts[1]

-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=16, minute=7, second=37)
+        publication_date = datetime(
+            2019, 5, 20, hour=16, minute=7, second=37, tzinfo=pytz.utc
        )
-        aware_date = pytz.utc.localize(publication_date)

-        self.assertEquals(
+        self.assertEqual(
            post.publication_date.strftime("%Y-%m-%d %H:%M:%S"),
-            aware_date.strftime("%Y-%m-%d %H:%M:%S"),
+            publication_date.strftime("%Y-%m-%d %H:%M:%S"),
        )

-        self.assertEquals(
+        self.assertEqual(
            post.remote_identifier,
            "https://www.bbc.co.uk/news/world-us-canada-48338168",
        )

-        self.assertEquals(
+        self.assertEqual(
            post.url, "https://www.bbc.co.uk/news/world-us-canada-48338168"
        )

-        self.assertEquals(
+        self.assertEqual(
            post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif"
        )

    def test_entries_without_remote_identifier(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_without_identifier, mock_stream) as builder:
+        with FeedBuilder(mock_without_identifier, mock_stream) as builder:
            builder.build()
            builder.save()

        posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)

        post = posts[0]

-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=16, minute=7, second=37)
+        publication_date = datetime(
+            2019, 5, 20, hour=16, minute=7, second=37, tzinfo=pytz.utc
        )
-        aware_date = pytz.utc.localize(publication_date)

-        self.assertEquals(post.publication_date, aware_date)
-        self.assertEquals(post.remote_identifier, None)
-        self.assertEquals(
+        self.assertEqual(post.publication_date, publication_date)
+        self.assertEqual(post.remote_identifier, None)
+        self.assertEqual(
            post.url, "https://www.bbc.co.uk/news/world-us-canada-48338168"
        )
-        self.assertEquals(
+        self.assertEqual(
            post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif"
        )

        post = posts[1]

-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=12, minute=19, second=19)
+        publication_date = datetime(
+            2019, 5, 20, hour=12, minute=19, second=19, tzinfo=pytz.utc
        )
-        aware_date = pytz.utc.localize(publication_date)

-        self.assertEquals(post.publication_date, aware_date)
-        self.assertEquals(post.remote_identifier, None)
-        self.assertEquals(post.url, "https://www.bbc.co.uk/news/technology-48334739")
-        self.assertEquals(post.title, "Huawei's Android loss: How it affects you")
+        self.assertEqual(post.publication_date, publication_date)
+        self.assertEqual(post.remote_identifier, None)
+        self.assertEqual(post.url, "https://www.bbc.co.uk/news/technology-48334739")
+        self.assertEqual(post.title, "Huawei's Android loss: How it affects you")

    def test_entry_without_publication_date(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_without_publish_date, mock_stream) as builder:
+        with FeedBuilder(mock_without_publish_date, mock_stream) as builder:
            builder.build()
            builder.save()

        posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)

        post = posts[0]

-        self.assertEquals(
+        self.assertEqual(
            post.publication_date.strftime("%Y-%m-%d %H:%M"), "2019-10-30 12:30"
        )
-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
            post.remote_identifier,
            "https://www.bbc.co.uk/news/world-us-canada-48338168",
        )

        post = posts[1]

-        self.assertEquals(
+        self.assertEqual(
            post.publication_date.strftime("%Y-%m-%d %H:%M"), "2019-10-30 12:30"
        )
-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
            post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
        )

    def test_entry_without_url(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_without_url, mock_stream) as builder:
+        with FeedBuilder(mock_without_url, mock_stream) as builder:
            builder.build()
            builder.save()

        posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)

        post = posts[0]

-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
            post.remote_identifier,
            "https://www.bbc.co.uk/news/world-us-canada-48338168",
        )

        post = posts[1]

-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
            post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
        )

    def test_entry_without_body(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_without_body, mock_stream) as builder:
+        with FeedBuilder(mock_without_body, mock_stream) as builder:
            builder.build()
            builder.save()

        posts = Post.objects.order_by("-publication_date")

-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)

        post = posts[0]

-        self.assertEquals(
+        self.assertEqual(
            post.created.strftime("%Y-%m-%d %H:%M:%S"), "2019-10-30 12:30:00"
        )
-        self.assertEquals(
+        self.assertEqual(
            post.remote_identifier,
            "https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
        )
-        self.assertEquals(post.body, "")
+        self.assertEqual(post.body, "")

        post = posts[1]

-        self.assertEquals(
+        self.assertEqual(
            post.created.strftime("%Y-%m-%d %H:%M:%S"), "2019-10-30 12:30:00"
        )
-        self.assertEquals(
+        self.assertEqual(
            post.remote_identifier,
            "https://www.bbc.co.uk/news/world-us-canada-48338168",
        )
-        self.assertEquals(post.body, "")
+        self.assertEqual(post.body, "")

    def test_entry_without_author(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_without_author, mock_stream) as builder:
+        with FeedBuilder(mock_without_author, mock_stream) as builder:
            builder.build()
            builder.save()

        posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)

        post = posts[0]

-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
            post.remote_identifier,
            "https://www.bbc.co.uk/news/world-us-canada-48338168",
        )
-        self.assertEquals(post.author, None)
+        self.assertEqual(post.author, None)

        post = posts[1]

-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
            post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
        )
-        self.assertEquals(post.author, None)
+        self.assertEqual(post.author, None)

    def test_empty_entries(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_without_entries, mock_stream) as builder:
+        with FeedBuilder(mock_without_entries, mock_stream) as builder:
            builder.build()
            builder.save()

-        self.assertEquals(Post.objects.count(), 0)
+        self.assertEqual(Post.objects.count(), 0)

    def test_update_entries(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

@ -303,36 +259,35 @@ class FeedBuilderTestCase(TestCase):
            remote_identifier="a5479c66-8fae-11e9-8422-00163ef6bee7", rule=rule
        )

-        with builder(mock_with_update_entries, mock_stream) as builder:
+        with FeedBuilder(mock_with_update_entries, mock_stream) as builder:
            builder.build()
            builder.save()

-        self.assertEquals(Post.objects.count(), 3)
+        self.assertEqual(Post.objects.count(), 3)

        existing_first_post.refresh_from_db()
        existing_second_post.refresh_from_db()

-        self.assertEquals(
+        self.assertEqual(
            existing_first_post.title,
            "Trump's 'genocidal taunts' will not end Iran - Zarif",
        )

-        self.assertEquals(
+        self.assertEqual(
            existing_second_post.title, "Huawei's Android loss: How it affects you"
        )

    def test_html_sanitizing(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_with_html, mock_stream) as builder:
+        with FeedBuilder(mock_with_html, mock_stream) as builder:
            builder.build()
            builder.save()

        post = Post.objects.get()

-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)

        self.assertTrue("<article>" in post.body)
        self.assertTrue("<h1>" in post.body)
@ -345,64 +300,60 @@ class FeedBuilderTestCase(TestCase):
        self.assertTrue("<iframe>" not in post.body)

    def test_long_author_text_is_truncated(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_with_long_author, mock_stream) as builder:
+        with FeedBuilder(mock_with_long_author, mock_stream) as builder:
            builder.build()
            builder.save()

        post = Post.objects.get()

-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)

-        self.assertEquals(len(post.author), 40)
+        self.assertEqual(len(post.author), 40)

    def test_long_title_text_is_truncated(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_with_long_title, mock_stream) as builder:
+        with FeedBuilder(mock_with_long_title, mock_stream) as builder:
            builder.build()
            builder.save()

        post = Post.objects.get()

-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)

-        self.assertEquals(len(post.title), 200)
+        self.assertEqual(len(post.title), 200)
        self.assertTrue(post.title.endswith("…"))

    def test_long_title_exotic_title(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_with_long_exotic_title, mock_stream) as builder:
+        with FeedBuilder(mock_with_long_exotic_title, mock_stream) as builder:
            builder.build()
            builder.save()

        post = Post.objects.get()

-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)

-        self.assertEquals(len(post.title), 200)
+        self.assertEqual(len(post.title), 200)
        self.assertTrue(post.title.endswith("…"))

    def test_content_detail_is_prioritized_if_longer(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_with_longer_content_detail, mock_stream) as builder:
+        with FeedBuilder(mock_with_longer_content_detail, mock_stream) as builder:
            builder.build()
            builder.save()

        post = Post.objects.get()

-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)

        self.assertFalse(
            "Foreign Minister Mohammad Javad Zarif says the US" in post.body
@ -410,33 +361,31 @@ class FeedBuilderTestCase(TestCase):
        self.assertTrue("Federal Communications Commission" in post.body)

    def test_content_detail_is_not_prioritized_if_shorter(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_with_shorter_content_detail, mock_stream) as builder:
+        with FeedBuilder(mock_with_shorter_content_detail, mock_stream) as builder:
            builder.build()
            builder.save()

        post = Post.objects.get()

-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)

        self.assertTrue(
            "Foreign Minister Mohammad Javad Zarif says the US" in post.body
        )

    def test_content_detail_is_concatinated(self):
-        builder = FeedBuilder
        rule = FeedFactory()
        mock_stream = Mock(rule=rule)

-        with builder(mock_with_multiple_content_detail, mock_stream) as builder:
+        with FeedBuilder(mock_with_multiple_content_detail, mock_stream) as builder:
            builder.build()
            builder.save()

        post = Post.objects.get()

-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)

-        self.assertEquals(post.body, "Yippie\n Ya\n Yee")
+        self.assertEqual(post.body, "Yippie\n Ya\n Yee")
--- a/src/newsreader/news/collection/tests/reddit/builder/tests.py
+++ b/src/newsreader/news/collection/tests/reddit/builder/tests.py
@ -86,52 +86,6 @@ class RedditBuilderTestCase(TestCase):

        self.assertEquals(Post.objects.count(), 0)

-    def test_update_posts(self):
-        subreddit = SubredditFactory()
-        existing_post = RedditPostFactory(
-            remote_identifier="hm0qct",
-            author="Old author",
-            title="Old title",
-            body="Old body",
-            url="https://bbc.com/",
-            rule=subreddit,
-        )
-
-        builder = RedditBuilder
-        mock_stream = Mock(rule=subreddit)
-
-        with builder(simple_mock, mock_stream) as builder:
-            builder.build()
-            builder.save()
-
-        posts = {post.remote_identifier: post for post in Post.objects.all()}
-
-        self.assertCountEqual(
-            ("hm0qct", "hna75r", "hngs71", "hngsj8", "hnd7cy"), posts.keys()
-        )
-
-        existing_post.refresh_from_db()
-
-        self.assertEquals(existing_post.remote_identifier, "hm0qct")
-        self.assertEquals(existing_post.author, "AutoModerator")
-        self.assertEquals(
-            existing_post.title,
-            "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020",
-        )
-        self.assertIn(
-            "This megathread is also to hear opinions from anyone just starting out "
-            "with Linux or those that have used Linux (GNU or otherwise) for a long time.",
-            existing_post.body,
-        )
-        self.assertEquals(
-            existing_post.publication_date,
-            pytz.utc.localize(datetime(2020, 7, 6, 6, 11, 22)),
-        )
-        self.assertEquals(
-            existing_post.url,
-            "https://www.reddit.com/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/",
-        )
-
    def test_html_sanitizing(self):
        builder = RedditBuilder

@ -225,17 +179,6 @@ class RedditBuilderTestCase(TestCase):
            ("hm0qct", "hna75r", "hngs71", "hngsj8", "hnd7cy"), posts.keys()
        )

-        duplicate_post.refresh_from_db()
-
-        self.assertEquals(
-            duplicate_post.publication_date,
-            pytz.utc.localize(datetime(2020, 7, 6, 6, 11, 22)),
-        )
-        self.assertEquals(
-            duplicate_post.title,
-            "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020",
-        )
-
    def test_image_post(self):
        builder = RedditBuilder

--- a/src/newsreader/news/collection/tests/twitter/builder/mocks.py
+++ b/src/newsreader/news/collection/tests/twitter/builder/mocks.py
@ -2185,3 +2185,202 @@ unsanitized_mock = [
        },
    }
 ]
+
+broken_mock = [
+    {
+        "contributors": None,
+        "coordinates": None,
+        "created_at": "Fri Aug 07 00:17:05 +0000 2020",
+        "display_text_range": [11, 59],
+        "entities": {
+            "hashtags": [],
+            "symbols": [],
+            "urls": [
+                {
+                    "display_url": "youtu.be/rDy7tPf6CT8",
+                    "expanded_url": "https://youtu.be/rDy7tPf6CT8",
+                    "indices": [36, 59],
+                    "url": "https://t.co/trAcIxBMlX",
+                }
+            ],
+            "user_mentions": [
+                {
+                    "id": 975844884606275587,
+                    "id_str": "975844884606275587",
+                    "indices": [0, 10],
+                    "name": "ArieNeo",
+                    "screen_name": "ArieNeoSC",
+                }
+            ],
+        },
+        "favorite_count": 19,
+        "favorited": False,
+        # Note the missing full_text key here
+        "geo": None,
+        "id": 1291528756373286914,
+        "id_str": "1291528756373286914",
+        "in_reply_to_screen_name": "ArieNeoSC",
+        "in_reply_to_status_id": 1291507356313038850,
+        "in_reply_to_status_id_str": "1291507356313038850",
+        "in_reply_to_user_id": 975844884606275587,
+        "in_reply_to_user_id_str": "975844884606275587",
+        "is_quote_status": False,
+        "lang": "en",
+        "place": None,
+        "possibly_sensitive": False,
+        "retweet_count": 5,
+        "retweeted": False,
+        "source": '<a href="https://mobile.twitter.com" rel="nofollow">Twitter Web App</a>',
+        "truncated": False,
+        "user": {
+            "contributors_enabled": False,
+            "created_at": "Wed Sep 05 00:58:11 +0000 2012",
+            "default_profile": False,
+            "default_profile_image": False,
+            "description": "The official Twitter profile for #StarCitizen and Roberts Space Industries.",
+            "entities": {
+                "description": {"urls": []},
+                "url": {
+                    "urls": [
+                        {
+                            "display_url": "robertsspaceindustries.com",
+                            "expanded_url": "http://www.robertsspaceindustries.com",
+                            "indices": [0, 23],
+                            "url": "https://t.co/iqO6apof3y",
+                        }
+                    ]
+                },
+            },
+            "favourites_count": 4588,
+            "follow_request_sent": None,
+            "followers_count": 106169,
+            "following": None,
+            "friends_count": 201,
+            "geo_enabled": False,
+            "has_extended_profile": False,
+            "id": 803542770,
+            "id_str": "803542770",
+            "is_translation_enabled": False,
+            "is_translator": False,
+            "lang": None,
+            "listed_count": 890,
+            "location": "Roberts Space Industries",
+            "name": "Star Citizen",
+            "notifications": None,
+            "profile_background_color": "131516",
+            "profile_background_image_url": "http://abs.twimg.com/images/themes/theme14/bg.gif",
+            "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme14/bg.gif",
+            "profile_background_tile": False,
+            "profile_banner_url": "https://pbs.twimg.com/profile_banners/803542770/1596651186",
+            "profile_image_url": "http://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg",
+            "profile_image_url_https": "https://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg",
+            "profile_link_color": "0A5485",
+            "profile_sidebar_border_color": "FFFFFF",
+            "profile_sidebar_fill_color": "EFEFEF",
+            "profile_text_color": "333333",
+            "profile_use_background_image": True,
+            "protected": False,
+            "screen_name": "RobertsSpaceInd",
+            "statuses_count": 6210,
+            "time_zone": None,
+            "translator_type": "none",
+            "url": "https://t.co/iqO6apof3y",
+            "utc_offset": None,
+            "verified": True,
+        },
+    },
+    {
+        "contributors": None,
+        "coordinates": None,
+        "created_at": "Wed Jul 29 19:01:47 +0000 2020",
+        "display_text_range": [10, 98],
+        "entities": {
+            "hashtags": [],
+            "symbols": [],
+            "urls": [],
+            "user_mentions": [
+                {
+                    "id": 435221600,
+                    "id_str": "435221600",
+                    "indices": [0, 9],
+                    "name": "Christopher Blough",
+                    "screen_name": "RelicCcb",
+                }
+            ],
+        },
+        "favorite_count": 1,
+        "favorited": False,
+        "full_text": "@RelicCcb Hi Christoper, we have checked the status of your investigation and it is still ongoing.",
+        "geo": None,
+        "id": 1288550304095416320,
+        "id_str": "1288550304095416320",
+        "in_reply_to_screen_name": "RelicCcb",
+        "in_reply_to_status_id": 1288475147951898625,
+        "in_reply_to_status_id_str": "1288475147951898625",
+        "in_reply_to_user_id": 435221600,
+        "in_reply_to_user_id_str": "435221600",
+        "is_quote_status": False,
+        "lang": "en",
+        "place": None,
+        "retweet_count": 0,
+        "retweeted": False,
+        "source": '<a href="https://mobile.twitter.com" rel="nofollow">Twitter Web App</a>',
+        "truncated": False,
+        "user": {
+            "contributors_enabled": False,
+            "created_at": "Wed Sep 05 00:58:11 +0000 2012",
+            "default_profile": False,
+            "default_profile_image": False,
+            "description": "The official Twitter profile for #StarCitizen and Roberts Space Industries.",
+            "entities": {
+                "description": {"urls": []},
+                "url": {
+                    "urls": [
+                        {
+                            "display_url": "robertsspaceindustries.com",
+                            "expanded_url": "http://www.robertsspaceindustries.com",
+                            "indices": [0, 23],
+                            "url": "https://t.co/iqO6apof3y",
+                        }
+                    ]
+                },
+            },
+            "favourites_count": 4588,
+            "follow_request_sent": None,
+            "followers_count": 106169,
+            "following": None,
+            "friends_count": 201,
+            "geo_enabled": False,
+            "has_extended_profile": False,
+            "id": 803542770,
+            "id_str": "803542770",
+            "is_translation_enabled": False,
+            "is_translator": False,
+            "lang": None,
+            "listed_count": 890,
+            "location": "Roberts Space Industries",
+            "name": "Star Citizen",
+            "notifications": None,
+            "profile_background_color": "131516",
+            "profile_background_image_url": "http://abs.twimg.com/images/themes/theme14/bg.gif",
+            "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme14/bg.gif",
+            "profile_background_tile": False,
+            "profile_banner_url": "https://pbs.twimg.com/profile_banners/803542770/1596651186",
+            "profile_image_url": "http://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg",
+            "profile_image_url_https": "https://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg",
+            "profile_link_color": "0A5485",
+            "profile_sidebar_border_color": "FFFFFF",
+            "profile_sidebar_fill_color": "EFEFEF",
+            "profile_text_color": "333333",
+            "profile_use_background_image": True,
+            "protected": False,
+            "screen_name": "RobertsSpaceInd",
+            "statuses_count": 6210,
+            "time_zone": None,
+            "translator_type": "none",
+            "url": "https://t.co/iqO6apof3y",
+            "utc_offset": None,
+            "verified": True,
+        },
+    },
+]
--- a/src/newsreader/news/collection/tests/twitter/builder/tests.py
+++ b/src/newsreader/news/collection/tests/twitter/builder/tests.py
@ -10,6 +10,7 @@ from ftfy import fix_text

 from newsreader.news.collection.tests.factories import TwitterTimelineFactory
 from newsreader.news.collection.tests.twitter.builder.mocks import (
+    broken_mock,
    gif_mock,
    image_mock,
    quoted_mock,
@ -410,3 +411,21 @@ class TwitterBuilderTestCase(TestCase):
            builder.save()

        self.assertEquals(Post.objects.count(), 2)
+
+    def test_bad_post(self):
+        """
+        Tests that the builder will ignore posts which miss data
+        """
+        builder = TwitterBuilder
+
+        profile = TwitterTimelineFactory(screen_name="RobertsSpaceInd")
+        mock_stream = Mock(rule=profile)
+
+        with builder(broken_mock, mock_stream) as builder:
+            builder.build()
+            builder.save()
+
+        self.assertCountEqual(
+            Post.objects.values_list("remote_identifier", flat=True),
+            ["1288550304095416320"],
+        )
--- a/src/newsreader/news/collection/twitter.py
+++ b/src/newsreader/news/collection/twitter.py
@ -22,6 +22,10 @@ from newsreader.news.collection.base import (
 )
 from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
 from newsreader.news.collection.exceptions import (
+    BuilderDuplicateException,
+    BuilderException,
+    BuilderMissingDataException,
+    BuilderParseException,
    StreamDeniedException,
    StreamException,
    StreamNotFoundException,
@ -48,43 +52,69 @@ class TwitterBuilder(PostBuilder):

    def build(self):
        results = {}
-        rule = self.stream.rule

        for post in self.payload:
-            remote_identifier = post["id_str"]
-
-            if remote_identifier in self.existing_posts:
+            try:
+                post = self.build_post(post)
+            except BuilderException:
+                logger.exception("Failed building post")
                continue

-            url = f"{TWITTER_URL}/{rule.screen_name}/status/{remote_identifier}"
-            body = urlize(post["full_text"], nofollow=True)
+            identifier = post.remote_identifier
+            results[identifier] = post
+
+        self.instances = results.values()
+
+    def build_post(self, data):
+        remote_identifier = data.get("id_str", "")
+        rule = self.stream.rule
+
+        if remote_identifier in self.existing_posts:
+            raise BuilderDuplicateException(payload=data)
+
+        try:
+            body = urlize(data["full_text"], nofollow=True)
            title = truncate_text(
-                Post, "title", self.sanitize_fragment(post["full_text"])
+                Post, "title", self.sanitize_fragment(data["full_text"])
            )

            publication_date = pytz.utc.localize(
-                datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
+                datetime.strptime(data["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
            )
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=data) from e
+        except (OverflowError, OSError) as e:
+            raise BuilderParseException(payload=data) from e

-            if "extended_entities" in post:
-                try:
-                    media_entities = self.get_media_entities(post)
-                    body += media_entities
-                except KeyError:
-                    logger.exception(f"Failed parsing media_entities for {url}")
+        url = f"{TWITTER_URL}/{rule.screen_name}/status/{remote_identifier}"

-            if "retweeted_status" in post:
-                original_post = post["retweeted_status"]
+        if "extended_entities" in data:
+            try:
+                media_entities = self.get_media_entities(data)
+                body += media_entities
+            except KeyError as e:
+                raise BuilderMissingDataException(
+                    message="Failed parsing data for media entities", payload=data
+                ) from e
+
+        try:
+            if "retweeted_status" in data:
+                original_post = data["retweeted_status"]
                original_tweet = urlize(original_post["full_text"], nofollow=True)
                body = f"{body} <br><div>Original tweet: {original_tweet}</div>"
-            if "quoted_status" in post:
-                original_post = post["quoted_status"]
+            if "quoted_status" in data:
+                original_post = data["quoted_status"]
                original_tweet = urlize(original_post["full_text"], nofollow=True)
                body = f"{body} <br><div>Quoted tweet: {original_tweet}</div>"
+        except KeyError as e:
+            raise BuilderMissingDataException(
+                message="Failed parsing data for original tweet", payload=data
+            ) from e

-            body = self.sanitize_fragment(body)
+        body = self.sanitize_fragment(body)

-            data = {
+        return Post(
+            **{
                "remote_identifier": remote_identifier,
                "title": fix_text(title),
                "body": fix_text(body),
@ -93,13 +123,10 @@ class TwitterBuilder(PostBuilder):
                "url": url,
                "rule": rule,
            }
+        )

-            results[remote_identifier] = Post(**data)
-
-        self.instances = results.values()
-
-    def get_media_entities(self, post):
-        media_entities = post["extended_entities"]["media"]
+    def get_media_entities(self, data):
+        media_entities = data["extended_entities"]["media"]
        formatted_entities = ""

        for media_entity in media_entities:
--- a/src/newsreader/scss/components/post/_post.scss
+++ b/src/newsreader/scss/components/post/_post.scss
@ -70,6 +70,8 @@

    & img, video {
      padding: 10px 0;
+
+      width: max-content;
      max-width: 100%;
    }

--- a/src/newsreader/templates/base.html
+++ b/src/newsreader/templates/base.html
@ -16,7 +16,7 @@
        {% if request.user.is_authenticated %}
          <li class="nav__item"><a href="{% url 'index' %}">Home</a></li>
          <li class="nav__item"><a href="{% url 'news:core:categories' %}">Categories</a></li>
-          <li class="nav__item"><a href="{% url 'news:collection:rules' %}">Feeds</a></li>
+          <li class="nav__item"><a href="{% url 'news:collection:rules' %}">Sources</a></li>
          <li class="nav__item"><a href="{% url 'accounts:settings:home' %}">Settings</a></li>
          {% if request.user.is_superuser %}
            <li class="nav__item"><a href="{% url 'admin:index' %}">Admin</a></li>