From 9e5e05c056dff8ac39bea39d2e38ea795a44ce32 Mon Sep 17 00:00:00 2001
From: Sonny Bakker <sonny871@hotmail.com>
Date: Sat, 17 Oct 2020 13:19:49 +0200
Subject: [PATCH] 0.3.3

- Update static configuration
- Builder refactor
- Fix for images stretching to far
---
 src/newsreader/fixtures/default-fixture.json  |   1 -
 .../news/collection/exceptions/__init__.py    |  16 ++
 .../news/collection/exceptions/builder.py     |  21 ++
 .../{exceptions.py => exceptions/stream.py}   |   0
 src/newsreader/news/collection/feed.py        |  60 ++---
 src/newsreader/news/collection/reddit.py      | 213 ++++++++++-------
 .../news/collection/views/rules.html          |  16 +-
 .../collection/tests/feed/builder/tests.py    | 221 +++++++-----------
 .../collection/tests/reddit/builder/tests.py  |  57 -----
 .../collection/tests/twitter/builder/mocks.py | 199 ++++++++++++++++
 .../collection/tests/twitter/builder/tests.py |  19 ++
 src/newsreader/news/collection/twitter.py     |  79 ++++---
 .../scss/components/post/_post.scss           |   2 +
 src/newsreader/templates/base.html            |   2 +-
 webpack.common.babel.js                       |   3 +-
 15 files changed, 568 insertions(+), 341 deletions(-)
 create mode 100644 src/newsreader/news/collection/exceptions/__init__.py
 create mode 100644 src/newsreader/news/collection/exceptions/builder.py
 rename src/newsreader/news/collection/{exceptions.py => exceptions/stream.py} (100%)

diff --git a/src/newsreader/fixtures/default-fixture.json b/src/newsreader/fixtures/default-fixture.json
index 1794742..880db4c 100644
--- a/src/newsreader/fixtures/default-fixture.json
+++ b/src/newsreader/fixtures/default-fixture.json
@@ -3427,7 +3427,6 @@
         "is_active": true,
         "date_joined": "2019-07-18T18:52:36.080Z",
         "email": "sonny@bakker.nl",
-        "task": 10,
         "reddit_refresh_token": null,
         "reddit_access_token": null,
         "groups": [],
diff --git a/src/newsreader/news/collection/exceptions/__init__.py b/src/newsreader/news/collection/exceptions/__init__.py
new file mode 100644
index 0000000..35ce72d
--- /dev/null
+++ b/src/newsreader/news/collection/exceptions/__init__.py
@@ -0,0 +1,16 @@
+from newsreader.news.collection.exceptions.builder import (
+    BuilderDuplicateException,
+    BuilderException,
+    BuilderMissingDataException,
+    BuilderParseException,
+)
+from newsreader.news.collection.exceptions.stream import (
+    StreamConnectionException,
+    StreamDeniedException,
+    StreamException,
+    StreamForbiddenException,
+    StreamNotFoundException,
+    StreamParseException,
+    StreamTimeOutException,
+    StreamTooManyException,
+)
diff --git a/src/newsreader/news/collection/exceptions/builder.py b/src/newsreader/news/collection/exceptions/builder.py
new file mode 100644
index 0000000..6fb2d60
--- /dev/null
+++ b/src/newsreader/news/collection/exceptions/builder.py
@@ -0,0 +1,21 @@
+class BuilderException(Exception):
+    message = "Builder exception"
+
+    def __init__(self, payload=None, message=None):
+        self.payload = payload
+        self.message = message if message else self.message
+
+    def __str__(self):
+        return self.message
+
+
+class BuilderMissingDataException(BuilderException):
+    message = "Payload contains missing data"
+
+
+class BuilderDuplicateException(BuilderException):
+    message = "Payload contains duplicate entry"
+
+
+class BuilderParseException(BuilderException):
+    message = "Failed to parse payload"
diff --git a/src/newsreader/news/collection/exceptions.py b/src/newsreader/news/collection/exceptions/stream.py
similarity index 100%
rename from src/newsreader/news/collection/exceptions.py
rename to src/newsreader/news/collection/exceptions/stream.py
diff --git a/src/newsreader/news/collection/feed.py b/src/newsreader/news/collection/feed.py
index ae6cd42..379f18e 100644
--- a/src/newsreader/news/collection/feed.py
+++ b/src/newsreader/news/collection/feed.py
@@ -39,6 +39,18 @@ class FeedBuilder(PostBuilder):
     rule__type = RuleTypeChoices.feed
 
     def build(self):
+        instances = []
+
+        with FeedDuplicateHandler(self.stream.rule) as duplicate_handler:
+            entries = self.payload.get("entries", [])
+
+            for entry in entries:
+                post = self.build_post(entry)
+                instances.append(post)
+
+            self.instances = duplicate_handler.check(instances)
+
+    def build_post(self, entry):
         field_mapping = {
             "id": "remote_identifier",
             "title": "title",
@@ -48,41 +60,37 @@ class FeedBuilder(PostBuilder):
             "author": "author",
         }
         tz = pytz.timezone(self.stream.rule.timezone)
-        instances = []
+        data = {"rule_id": self.stream.rule.pk}
 
-        with FeedDuplicateHandler(self.stream.rule) as duplicate_handler:
-            entries = self.payload.get("entries", [])
+        for field, model_field in field_mapping.items():
+            if not field in entry:
+                continue
 
-            for entry in entries:
-                data = {"rule_id": self.stream.rule.pk}
+            value = truncate_text(Post, model_field, entry[field])
 
-                for field, model_field in field_mapping.items():
-                    if not field in entry:
-                        continue
+            if field == "published_parsed":
+                data[model_field] = build_publication_date(value, tz)
+            elif field == "summary":
+                data[model_field] = self.sanitize_fragment(value)
+            else:
+                data[model_field] = value
 
-                    value = truncate_text(Post, model_field, entry[field])
+        content_details = self.get_content_details(entry)
 
-                    if field == "published_parsed":
-                        data[model_field] = build_publication_date(value, tz)
-                    elif field == "summary":
-                        data[model_field] = self.sanitize_fragment(value)
-                    else:
-                        data[model_field] = value
+        # use content details key if it contains more information
+        if not "body" in data or len(data["body"]) < len(content_details):
+            data["body"] = content_details
 
-                if "content" in entry:
-                    content = self.get_content(entry["content"])
-                    body = data.get("body", "")
+        return Post(**data)
 
-                    if not body or len(body) < len(content):
-                        data["body"] = content
+    def get_content_details(self, entry):
+        content_items = entry.get("content")
 
-                instances.append(Post(**data))
+        if not content_items:
+            return ""
 
-            self.instances = duplicate_handler.check(instances)
-
-    def get_content(self, items):
-        content = "\n ".join([item.get("value") for item in items])
-        return self.sanitize_fragment(content)
+        content_details = "\n ".join([item.get("value") for item in content_items])
+        return self.sanitize_fragment(content_details)
 
 
 class FeedStream(PostStream):
diff --git a/src/newsreader/news/collection/reddit.py b/src/newsreader/news/collection/reddit.py
index daeb85f..1fbffe2 100644
--- a/src/newsreader/news/collection/reddit.py
+++ b/src/newsreader/news/collection/reddit.py
@@ -28,6 +28,10 @@ from newsreader.news.collection.constants import (
     WHITELISTED_TAGS,
 )
 from newsreader.news.collection.exceptions import (
+    BuilderDuplicateException,
+    BuilderException,
+    BuilderMissingDataException,
+    BuilderParseException,
     StreamDeniedException,
     StreamException,
     StreamParseException,
@@ -122,99 +126,136 @@ class RedditBuilder(PostBuilder):
         if not "data" in self.payload or not "children" in self.payload["data"]:
             return
 
-        posts = self.payload["data"]["children"]
-        rule = self.stream.rule
-
-        for post in posts:
-            if not "data" in post or post["kind"] != REDDIT_POST:
-                continue
-
-            data = post["data"]
-
-            remote_identifier = data["id"]
-            title = truncate_text(Post, "title", data["title"])
-            author = truncate_text(Post, "author", data["author"])
-            post_url_fragment = data["permalink"]
-            direct_url = data["url"]
-            is_text_post = data["is_self"]
-
-            if remote_identifier in results:
-                continue
-
-            if is_text_post:
-                uncleaned_body = data["selftext_html"]
-                unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
-                body = self.sanitize_fragment(unescaped_body) if unescaped_body else ""
-            elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
-                body = format_html(
-                    "<div><img alt='{title}' src='{url}' loading='lazy' /></div>",
-                    url=direct_url,
-                    title=title,
-                )
-            elif data["is_video"]:
-                video_info = data["secure_media"]["reddit_video"]
-
-                body = format_html(
-                    "<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
-                    url=video_info["fallback_url"],
-                )
-            elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
-                extension = next(
-                    extension.replace(".", "")
-                    for extension in REDDIT_VIDEO_EXTENSIONS
-                    if direct_url.endswith(extension)
-                )
-
-                if extension == "gifv":
-                    body = format_html(
-                        "<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
-                        url=direct_url.replace(extension, "mp4"),
-                    )
-                else:
-                    body = format_html(
-                        "<div><video controls muted><source src='{url}' type='video/{extension}' /></video></div>",
-                        url=direct_url,
-                        extension=extension,
-                    )
-            else:
-                body = format_html(
-                    "<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>",
-                    url=direct_url,
-                    title=title,
-                )
+        entries = self.payload["data"]["children"]
 
+        for entry in entries:
             try:
-                parsed_date = datetime.fromtimestamp(post["data"]["created_utc"])
-                created_date = pytz.utc.localize(parsed_date)
-            except (OverflowError, OSError):
-                logging.warning(
-                    f"Failed parsing timestamp from {REDDIT_URL}{post_url_fragment}"
-                )
-                created_date = timezone.now()
-
-            post_data = {
-                "remote_identifier": remote_identifier,
-                "title": title,
-                "body": body,
-                "author": author,
-                "url": f"{REDDIT_URL}{post_url_fragment}",
-                "publication_date": created_date,
-                "rule": rule,
-            }
-
-            if remote_identifier in self.existing_posts:
-                existing_post = self.existing_posts[remote_identifier]
-
-                for key, value in post_data.items():
-                    setattr(existing_post, key, value)
-
-                results[existing_post.remote_identifier] = existing_post
+                post = self.build_post(entry)
+            except BuilderException:
+                logger.exception("Failed building post")
                 continue
 
-            results[remote_identifier] = Post(**post_data)
+            identifier = post.remote_identifier
+            results[identifier] = post
 
         self.instances = results.values()
 
+    def build_post(self, entry):
+        rule = self.stream.rule
+        entry_data = entry.get("data", {})
+        remote_identifier = entry_data.get("id", "")
+        kind = entry.get("kind")
+
+        if remote_identifier in self.existing_posts:
+            raise BuilderDuplicateException(payload=entry)
+        elif kind != REDDIT_POST:
+            raise BuilderParseException(
+                message=f"Payload is not an reddit post, its of kind {kind}",
+                payload=entry,
+            )
+        elif not entry_data:
+            raise BuilderMissingDataException(
+                message=f"Post {remote_identifier} did not contain any data",
+                payload=entry,
+            )
+
+        try:
+            title = entry_data["title"]
+            author = entry_data["author"]
+            post_url_fragment = entry_data["permalink"]
+            direct_url = entry_data["url"]
+            is_text = entry_data["is_self"]
+            is_video = entry_data["is_video"]
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=entry) from e
+
+        title = truncate_text(Post, "title", title)
+        author = truncate_text(Post, "author", author)
+
+        if is_text:
+            body = self.get_text_post(entry_data)
+        elif direct_url.endswith(REDDIT_IMAGE_EXTENSIONS):
+            body = self.get_image_post(title, direct_url)
+        elif is_video:
+            body = self.get_native_video_post(entry_data)
+        elif direct_url.endswith(REDDIT_VIDEO_EXTENSIONS):
+            body = self.get_video_post(direct_url)
+        else:
+            body = self.get_url_post(title, direct_url)
+
+        try:
+            parsed_date = datetime.fromtimestamp(entry_data["created_utc"])
+            created_date = pytz.utc.localize(parsed_date)
+        except (OverflowError, OSError) as e:
+            raise BuilderParseException(payload=entry) from e
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=entry) from e
+
+        post_entry = {
+            "remote_identifier": remote_identifier,
+            "title": title,
+            "body": body,
+            "author": author,
+            "url": f"{REDDIT_URL}{post_url_fragment}",
+            "publication_date": created_date,
+            "rule": rule,
+        }
+
+        return Post(**post_entry)
+
+    def get_text_post(self, entry):
+        try:
+            uncleaned_body = entry["selftext_html"]
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=entry) from e
+
+        unescaped_body = unescape(uncleaned_body) if uncleaned_body else ""
+        return self.sanitize_fragment(unescaped_body) if unescaped_body else ""
+
+    def get_image_post(self, title, url):
+        return format_html(
+            "<div><img alt='{title}' src='{url}' loading='lazy' /></div>",
+            url=url,
+            title=title,
+        )
+
+    def get_native_video_post(self, entry):
+        try:
+            video_info = entry["secure_media"]["reddit_video"]
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=entry) from e
+
+        return format_html(
+            "<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
+            url=video_info["fallback_url"],
+        )
+
+    def get_video_post(self, url):
+        extension = next(
+            extension.replace(".", "")
+            for extension in REDDIT_VIDEO_EXTENSIONS
+            if url.endswith(extension)
+        )
+
+        if extension == "gifv":
+            return format_html(
+                "<div><video controls muted><source src='{url}' type='video/mp4' /></video></div>",
+                url=url.replace(extension, "mp4"),
+            )
+
+        return format_html(
+            "<div><video controls muted><source src='{url}' type='video/{extension}' /></video></div>",
+            url=url,
+            extension=extension,
+        )
+
+    def get_url_post(self, title, url):
+        return format_html(
+            "<div><a target='_blank' rel='noopener noreferrer' alt='{title}' href='{url}' class='link'>Direct url</a></div>",
+            url=url,
+            title=title,
+        )
+
 
 class RedditStream(PostStream):
     rule_type = RuleTypeChoices.subreddit
diff --git a/src/newsreader/news/collection/templates/news/collection/views/rules.html b/src/newsreader/news/collection/templates/news/collection/views/rules.html
index 678716e..ef05352 100644
--- a/src/newsreader/news/collection/templates/news/collection/views/rules.html
+++ b/src/newsreader/news/collection/templates/news/collection/views/rules.html
@@ -6,19 +6,21 @@
     <form class="form rules-form">
       {% csrf_token %}
 
+      <section class="section form__section form__section--actions">
+        <div class="form__actions">
+          <a class="link button button--confirm" href="{% url "news:collection:feed-create" %}">{% trans "Add a feed" %}</a>
+          <a class="link button button--confirm" href="{% url "news:collection:import" %}">{% trans "Import feeds" %}</a>
+          <a class="link button button--reddit" href="{% url "news:collection:subreddit-create" %}">{% trans "Add a subreddit" %}</a>
+          <a class="link button button--twitter" href="{% url "news:collection:twitter-timeline-create" %}">{% trans "Add a Twitter profile" %}</a>
+        </div>
+      </section>
+
       <section class="section form__section form__section--actions">
         <fieldset class="fieldset form__fieldset">
           <input type="submit" class="button button--primary" formaction="{% url "news:collection:rules-enable" %}" formmethod="post" value="{% trans "Enable" %}" />
           <input type="submit" class="button button--primary" formaction="{% url "news:collection:rules-disable" %}" formmethod="post" value="{% trans "Disable" %}" />
           <input type="submit" class="button button--error" formaction="{% url "news:collection:rules-delete" %}" formmethod="post" value="{% trans "Delete" %}"/>
         </fieldset>
-
-        <div class="form__actions">
-          <a class="link button button--confirm" href="{% url "news:collection:feed-create" %}">{% trans "Add a feed" %}</a>
-          <a class="link button button--reddit" href="{% url "news:collection:subreddit-create" %}">{% trans "Add a subreddit" %}</a>
-          <a class="link button button--twitter" href="{% url "news:collection:twitter-timeline-create" %}">{% trans "Add a Twitter profile" %}</a>
-          <a class="link button button--confirm" href="{% url "news:collection:import" %}">{% trans "Import rules" %}</a>
-        </div>
       </section>
 
       <section class="section form__section">
diff --git a/src/newsreader/news/collection/tests/feed/builder/tests.py b/src/newsreader/news/collection/tests/feed/builder/tests.py
index 571a7cd..7f4edf0 100644
--- a/src/newsreader/news/collection/tests/feed/builder/tests.py
+++ b/src/newsreader/news/collection/tests/feed/builder/tests.py
@@ -1,4 +1,4 @@
-from datetime import date, datetime, time
+from datetime import datetime
 from unittest.mock import Mock
 
 from django.test import TestCase
@@ -21,277 +21,233 @@ class FeedBuilderTestCase(TestCase):
     def setUp(self):
         self.maxDiff = None
 
-    def test_basic_entry(self):
-        builder = FeedBuilder
-        rule = FeedFactory()
-        mock_stream = Mock(rule=rule)
-
-        with builder(simple_mock, mock_stream) as builder:
-            builder.build()
-            builder.save()
-
-        post = Post.objects.get()
-
-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=16, minute=7, second=37)
-        )
-        aware_date = pytz.utc.localize(publication_date)
-
-        self.assertEquals(post.publication_date, aware_date)
-        self.assertEquals(Post.objects.count(), 1)
-
-        self.assertEquals(
-            post.remote_identifier,
-            "https://www.bbc.co.uk/news/world-us-canada-48338168",
-        )
-
-        self.assertEquals(
-            post.url, "https://www.bbc.co.uk/news/world-us-canada-48338168"
-        )
-
-        self.assertEquals(
-            post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif"
-        )
-
     def test_multiple_entries(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(multiple_mock, mock_stream) as builder:
+        with FeedBuilder(multiple_mock, mock_stream) as builder:
             builder.build()
             builder.save()
 
         posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 3)
+        self.assertEqual(Post.objects.count(), 3)
 
         post = posts[0]
 
-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=16, minute=32, second=38)
+        publication_date = datetime(
+            2019, 5, 20, hour=16, minute=32, second=38, tzinfo=pytz.utc
         )
-        aware_date = pytz.utc.localize(publication_date)
 
-        self.assertEquals(
+        self.assertEqual(
             post.publication_date.strftime("%Y-%m-%d %H:%M:%S"),
-            aware_date.strftime("%Y-%m-%d %H:%M:%S"),
+            publication_date.strftime("%Y-%m-%d %H:%M:%S"),
         )
 
-        self.assertEquals(
+        self.assertEqual(
             post.remote_identifier,
             "https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
         )
 
-        self.assertEquals(
+        self.assertEqual(
             post.url, "https://www.bbc.co.uk/news/uk-england-birmingham-48339080"
         )
 
-        self.assertEquals(
+        self.assertEqual(
             post.title, "Birmingham head teacher threatened over LGBT lessons"
         )
 
         post = posts[1]
 
-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=16, minute=7, second=37)
+        publication_date = datetime(
+            2019, 5, 20, hour=16, minute=7, second=37, tzinfo=pytz.utc
         )
-        aware_date = pytz.utc.localize(publication_date)
 
-        self.assertEquals(
+        self.assertEqual(
             post.publication_date.strftime("%Y-%m-%d %H:%M:%S"),
-            aware_date.strftime("%Y-%m-%d %H:%M:%S"),
+            publication_date.strftime("%Y-%m-%d %H:%M:%S"),
         )
 
-        self.assertEquals(
+        self.assertEqual(
             post.remote_identifier,
             "https://www.bbc.co.uk/news/world-us-canada-48338168",
         )
 
-        self.assertEquals(
+        self.assertEqual(
             post.url, "https://www.bbc.co.uk/news/world-us-canada-48338168"
         )
 
-        self.assertEquals(
+        self.assertEqual(
             post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif"
         )
 
     def test_entries_without_remote_identifier(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_without_identifier, mock_stream) as builder:
+        with FeedBuilder(mock_without_identifier, mock_stream) as builder:
             builder.build()
             builder.save()
 
         posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)
 
         post = posts[0]
 
-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=16, minute=7, second=37)
+        publication_date = datetime(
+            2019, 5, 20, hour=16, minute=7, second=37, tzinfo=pytz.utc
         )
-        aware_date = pytz.utc.localize(publication_date)
 
-        self.assertEquals(post.publication_date, aware_date)
-        self.assertEquals(post.remote_identifier, None)
-        self.assertEquals(
+        self.assertEqual(post.publication_date, publication_date)
+        self.assertEqual(post.remote_identifier, None)
+        self.assertEqual(
             post.url, "https://www.bbc.co.uk/news/world-us-canada-48338168"
         )
-        self.assertEquals(
+        self.assertEqual(
             post.title, "Trump's 'genocidal taunts' will not end Iran - Zarif"
         )
 
         post = posts[1]
 
-        publication_date = datetime.combine(
-            date(2019, 5, 20), time(hour=12, minute=19, second=19)
+        publication_date = datetime(
+            2019, 5, 20, hour=12, minute=19, second=19, tzinfo=pytz.utc
         )
-        aware_date = pytz.utc.localize(publication_date)
 
-        self.assertEquals(post.publication_date, aware_date)
-        self.assertEquals(post.remote_identifier, None)
-        self.assertEquals(post.url, "https://www.bbc.co.uk/news/technology-48334739")
-        self.assertEquals(post.title, "Huawei's Android loss: How it affects you")
+        self.assertEqual(post.publication_date, publication_date)
+        self.assertEqual(post.remote_identifier, None)
+        self.assertEqual(post.url, "https://www.bbc.co.uk/news/technology-48334739")
+        self.assertEqual(post.title, "Huawei's Android loss: How it affects you")
 
     def test_entry_without_publication_date(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_without_publish_date, mock_stream) as builder:
+        with FeedBuilder(mock_without_publish_date, mock_stream) as builder:
             builder.build()
             builder.save()
 
         posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)
 
         post = posts[0]
 
-        self.assertEquals(
+        self.assertEqual(
             post.publication_date.strftime("%Y-%m-%d %H:%M"), "2019-10-30 12:30"
         )
-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
             post.remote_identifier,
             "https://www.bbc.co.uk/news/world-us-canada-48338168",
         )
 
         post = posts[1]
 
-        self.assertEquals(
+        self.assertEqual(
             post.publication_date.strftime("%Y-%m-%d %H:%M"), "2019-10-30 12:30"
         )
-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
             post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
         )
 
     def test_entry_without_url(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_without_url, mock_stream) as builder:
+        with FeedBuilder(mock_without_url, mock_stream) as builder:
             builder.build()
             builder.save()
 
         posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)
 
         post = posts[0]
 
-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
             post.remote_identifier,
             "https://www.bbc.co.uk/news/world-us-canada-48338168",
         )
 
         post = posts[1]
 
-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
             post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
         )
 
     def test_entry_without_body(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_without_body, mock_stream) as builder:
+        with FeedBuilder(mock_without_body, mock_stream) as builder:
             builder.build()
             builder.save()
 
         posts = Post.objects.order_by("-publication_date")
 
-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)
 
         post = posts[0]
 
-        self.assertEquals(
+        self.assertEqual(
             post.created.strftime("%Y-%m-%d %H:%M:%S"), "2019-10-30 12:30:00"
         )
-        self.assertEquals(
+        self.assertEqual(
             post.remote_identifier,
             "https://www.bbc.co.uk/news/uk-england-birmingham-48339080",
         )
-        self.assertEquals(post.body, "")
+        self.assertEqual(post.body, "")
 
         post = posts[1]
 
-        self.assertEquals(
+        self.assertEqual(
             post.created.strftime("%Y-%m-%d %H:%M:%S"), "2019-10-30 12:30:00"
         )
-        self.assertEquals(
+        self.assertEqual(
             post.remote_identifier,
             "https://www.bbc.co.uk/news/world-us-canada-48338168",
         )
-        self.assertEquals(post.body, "")
+        self.assertEqual(post.body, "")
 
     def test_entry_without_author(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_without_author, mock_stream) as builder:
+        with FeedBuilder(mock_without_author, mock_stream) as builder:
             builder.build()
             builder.save()
 
         posts = Post.objects.order_by("-publication_date")
-        self.assertEquals(Post.objects.count(), 2)
+        self.assertEqual(Post.objects.count(), 2)
 
         post = posts[0]
 
-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
             post.remote_identifier,
             "https://www.bbc.co.uk/news/world-us-canada-48338168",
         )
-        self.assertEquals(post.author, None)
+        self.assertEqual(post.author, None)
 
         post = posts[1]
 
-        self.assertEquals(post.created, timezone.now())
-        self.assertEquals(
+        self.assertEqual(post.created, timezone.now())
+        self.assertEqual(
             post.remote_identifier, "https://www.bbc.co.uk/news/technology-48334739"
         )
-        self.assertEquals(post.author, None)
+        self.assertEqual(post.author, None)
 
     def test_empty_entries(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_without_entries, mock_stream) as builder:
+        with FeedBuilder(mock_without_entries, mock_stream) as builder:
             builder.build()
             builder.save()
 
-        self.assertEquals(Post.objects.count(), 0)
+        self.assertEqual(Post.objects.count(), 0)
 
     def test_update_entries(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
@@ -303,36 +259,35 @@ class FeedBuilderTestCase(TestCase):
             remote_identifier="a5479c66-8fae-11e9-8422-00163ef6bee7", rule=rule
         )
 
-        with builder(mock_with_update_entries, mock_stream) as builder:
+        with FeedBuilder(mock_with_update_entries, mock_stream) as builder:
             builder.build()
             builder.save()
 
-        self.assertEquals(Post.objects.count(), 3)
+        self.assertEqual(Post.objects.count(), 3)
 
         existing_first_post.refresh_from_db()
         existing_second_post.refresh_from_db()
 
-        self.assertEquals(
+        self.assertEqual(
             existing_first_post.title,
             "Trump's 'genocidal taunts' will not end Iran - Zarif",
         )
 
-        self.assertEquals(
+        self.assertEqual(
             existing_second_post.title, "Huawei's Android loss: How it affects you"
         )
 
     def test_html_sanitizing(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_with_html, mock_stream) as builder:
+        with FeedBuilder(mock_with_html, mock_stream) as builder:
             builder.build()
             builder.save()
 
         post = Post.objects.get()
 
-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)
 
         self.assertTrue("<article>" in post.body)
         self.assertTrue("<h1>" in post.body)
@@ -345,64 +300,60 @@ class FeedBuilderTestCase(TestCase):
         self.assertTrue("<iframe>" not in post.body)
 
     def test_long_author_text_is_truncated(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_with_long_author, mock_stream) as builder:
+        with FeedBuilder(mock_with_long_author, mock_stream) as builder:
             builder.build()
             builder.save()
 
         post = Post.objects.get()
 
-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)
 
-        self.assertEquals(len(post.author), 40)
+        self.assertEqual(len(post.author), 40)
 
     def test_long_title_text_is_truncated(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_with_long_title, mock_stream) as builder:
+        with FeedBuilder(mock_with_long_title, mock_stream) as builder:
             builder.build()
             builder.save()
 
         post = Post.objects.get()
 
-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)
 
-        self.assertEquals(len(post.title), 200)
+        self.assertEqual(len(post.title), 200)
         self.assertTrue(post.title.endswith("…"))
 
     def test_long_title_exotic_title(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_with_long_exotic_title, mock_stream) as builder:
+        with FeedBuilder(mock_with_long_exotic_title, mock_stream) as builder:
             builder.build()
             builder.save()
 
         post = Post.objects.get()
 
-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)
 
-        self.assertEquals(len(post.title), 200)
+        self.assertEqual(len(post.title), 200)
         self.assertTrue(post.title.endswith("…"))
 
     def test_content_detail_is_prioritized_if_longer(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_with_longer_content_detail, mock_stream) as builder:
+        with FeedBuilder(mock_with_longer_content_detail, mock_stream) as builder:
             builder.build()
             builder.save()
 
         post = Post.objects.get()
 
-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)
 
         self.assertFalse(
             "Foreign Minister Mohammad Javad Zarif says the US" in post.body
@@ -410,33 +361,31 @@ class FeedBuilderTestCase(TestCase):
         self.assertTrue("Federal Communications Commission" in post.body)
 
     def test_content_detail_is_not_prioritized_if_shorter(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_with_shorter_content_detail, mock_stream) as builder:
+        with FeedBuilder(mock_with_shorter_content_detail, mock_stream) as builder:
             builder.build()
             builder.save()
 
         post = Post.objects.get()
 
-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)
 
         self.assertTrue(
             "Foreign Minister Mohammad Javad Zarif says the US" in post.body
         )
 
     def test_content_detail_is_concatinated(self):
-        builder = FeedBuilder
         rule = FeedFactory()
         mock_stream = Mock(rule=rule)
 
-        with builder(mock_with_multiple_content_detail, mock_stream) as builder:
+        with FeedBuilder(mock_with_multiple_content_detail, mock_stream) as builder:
             builder.build()
             builder.save()
 
         post = Post.objects.get()
 
-        self.assertEquals(Post.objects.count(), 1)
+        self.assertEqual(Post.objects.count(), 1)
 
-        self.assertEquals(post.body, "Yippie\n Ya\n Yee")
+        self.assertEqual(post.body, "Yippie\n Ya\n Yee")
diff --git a/src/newsreader/news/collection/tests/reddit/builder/tests.py b/src/newsreader/news/collection/tests/reddit/builder/tests.py
index 11cf549..e326ed6 100644
--- a/src/newsreader/news/collection/tests/reddit/builder/tests.py
+++ b/src/newsreader/news/collection/tests/reddit/builder/tests.py
@@ -86,52 +86,6 @@ class RedditBuilderTestCase(TestCase):
 
         self.assertEquals(Post.objects.count(), 0)
 
-    def test_update_posts(self):
-        subreddit = SubredditFactory()
-        existing_post = RedditPostFactory(
-            remote_identifier="hm0qct",
-            author="Old author",
-            title="Old title",
-            body="Old body",
-            url="https://bbc.com/",
-            rule=subreddit,
-        )
-
-        builder = RedditBuilder
-        mock_stream = Mock(rule=subreddit)
-
-        with builder(simple_mock, mock_stream) as builder:
-            builder.build()
-            builder.save()
-
-        posts = {post.remote_identifier: post for post in Post.objects.all()}
-
-        self.assertCountEqual(
-            ("hm0qct", "hna75r", "hngs71", "hngsj8", "hnd7cy"), posts.keys()
-        )
-
-        existing_post.refresh_from_db()
-
-        self.assertEquals(existing_post.remote_identifier, "hm0qct")
-        self.assertEquals(existing_post.author, "AutoModerator")
-        self.assertEquals(
-            existing_post.title,
-            "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020",
-        )
-        self.assertIn(
-            "This megathread is also to hear opinions from anyone just starting out "
-            "with Linux or those that have used Linux (GNU or otherwise) for a long time.",
-            existing_post.body,
-        )
-        self.assertEquals(
-            existing_post.publication_date,
-            pytz.utc.localize(datetime(2020, 7, 6, 6, 11, 22)),
-        )
-        self.assertEquals(
-            existing_post.url,
-            "https://www.reddit.com/r/linux/comments/hm0qct/linux_experiencesrants_or_educationcertifications/",
-        )
-
     def test_html_sanitizing(self):
         builder = RedditBuilder
 
@@ -225,17 +179,6 @@ class RedditBuilderTestCase(TestCase):
             ("hm0qct", "hna75r", "hngs71", "hngsj8", "hnd7cy"), posts.keys()
         )
 
-        duplicate_post.refresh_from_db()
-
-        self.assertEquals(
-            duplicate_post.publication_date,
-            pytz.utc.localize(datetime(2020, 7, 6, 6, 11, 22)),
-        )
-        self.assertEquals(
-            duplicate_post.title,
-            "Linux Experiences/Rants or Education/Certifications thread - July 06, 2020",
-        )
-
     def test_image_post(self):
         builder = RedditBuilder
 
diff --git a/src/newsreader/news/collection/tests/twitter/builder/mocks.py b/src/newsreader/news/collection/tests/twitter/builder/mocks.py
index b330f2f..2be360d 100644
--- a/src/newsreader/news/collection/tests/twitter/builder/mocks.py
+++ b/src/newsreader/news/collection/tests/twitter/builder/mocks.py
@@ -2185,3 +2185,202 @@ unsanitized_mock = [
         },
     }
 ]
+
+broken_mock = [
+    {
+        "contributors": None,
+        "coordinates": None,
+        "created_at": "Fri Aug 07 00:17:05 +0000 2020",
+        "display_text_range": [11, 59],
+        "entities": {
+            "hashtags": [],
+            "symbols": [],
+            "urls": [
+                {
+                    "display_url": "youtu.be/rDy7tPf6CT8",
+                    "expanded_url": "https://youtu.be/rDy7tPf6CT8",
+                    "indices": [36, 59],
+                    "url": "https://t.co/trAcIxBMlX",
+                }
+            ],
+            "user_mentions": [
+                {
+                    "id": 975844884606275587,
+                    "id_str": "975844884606275587",
+                    "indices": [0, 10],
+                    "name": "ArieNeo",
+                    "screen_name": "ArieNeoSC",
+                }
+            ],
+        },
+        "favorite_count": 19,
+        "favorited": False,
+        # Note the missing full_text key here
+        "geo": None,
+        "id": 1291528756373286914,
+        "id_str": "1291528756373286914",
+        "in_reply_to_screen_name": "ArieNeoSC",
+        "in_reply_to_status_id": 1291507356313038850,
+        "in_reply_to_status_id_str": "1291507356313038850",
+        "in_reply_to_user_id": 975844884606275587,
+        "in_reply_to_user_id_str": "975844884606275587",
+        "is_quote_status": False,
+        "lang": "en",
+        "place": None,
+        "possibly_sensitive": False,
+        "retweet_count": 5,
+        "retweeted": False,
+        "source": '<a href="https://mobile.twitter.com" rel="nofollow">Twitter Web App</a>',
+        "truncated": False,
+        "user": {
+            "contributors_enabled": False,
+            "created_at": "Wed Sep 05 00:58:11 +0000 2012",
+            "default_profile": False,
+            "default_profile_image": False,
+            "description": "The official Twitter profile for #StarCitizen and Roberts Space Industries.",
+            "entities": {
+                "description": {"urls": []},
+                "url": {
+                    "urls": [
+                        {
+                            "display_url": "robertsspaceindustries.com",
+                            "expanded_url": "http://www.robertsspaceindustries.com",
+                            "indices": [0, 23],
+                            "url": "https://t.co/iqO6apof3y",
+                        }
+                    ]
+                },
+            },
+            "favourites_count": 4588,
+            "follow_request_sent": None,
+            "followers_count": 106169,
+            "following": None,
+            "friends_count": 201,
+            "geo_enabled": False,
+            "has_extended_profile": False,
+            "id": 803542770,
+            "id_str": "803542770",
+            "is_translation_enabled": False,
+            "is_translator": False,
+            "lang": None,
+            "listed_count": 890,
+            "location": "Roberts Space Industries",
+            "name": "Star Citizen",
+            "notifications": None,
+            "profile_background_color": "131516",
+            "profile_background_image_url": "http://abs.twimg.com/images/themes/theme14/bg.gif",
+            "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme14/bg.gif",
+            "profile_background_tile": False,
+            "profile_banner_url": "https://pbs.twimg.com/profile_banners/803542770/1596651186",
+            "profile_image_url": "http://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg",
+            "profile_image_url_https": "https://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg",
+            "profile_link_color": "0A5485",
+            "profile_sidebar_border_color": "FFFFFF",
+            "profile_sidebar_fill_color": "EFEFEF",
+            "profile_text_color": "333333",
+            "profile_use_background_image": True,
+            "protected": False,
+            "screen_name": "RobertsSpaceInd",
+            "statuses_count": 6210,
+            "time_zone": None,
+            "translator_type": "none",
+            "url": "https://t.co/iqO6apof3y",
+            "utc_offset": None,
+            "verified": True,
+        },
+    },
+    {
+        "contributors": None,
+        "coordinates": None,
+        "created_at": "Wed Jul 29 19:01:47 +0000 2020",
+        "display_text_range": [10, 98],
+        "entities": {
+            "hashtags": [],
+            "symbols": [],
+            "urls": [],
+            "user_mentions": [
+                {
+                    "id": 435221600,
+                    "id_str": "435221600",
+                    "indices": [0, 9],
+                    "name": "Christopher Blough",
+                    "screen_name": "RelicCcb",
+                }
+            ],
+        },
+        "favorite_count": 1,
+        "favorited": False,
+        "full_text": "@RelicCcb Hi Christoper, we have checked the status of your investigation and it is still ongoing.",
+        "geo": None,
+        "id": 1288550304095416320,
+        "id_str": "1288550304095416320",
+        "in_reply_to_screen_name": "RelicCcb",
+        "in_reply_to_status_id": 1288475147951898625,
+        "in_reply_to_status_id_str": "1288475147951898625",
+        "in_reply_to_user_id": 435221600,
+        "in_reply_to_user_id_str": "435221600",
+        "is_quote_status": False,
+        "lang": "en",
+        "place": None,
+        "retweet_count": 0,
+        "retweeted": False,
+        "source": '<a href="https://mobile.twitter.com" rel="nofollow">Twitter Web App</a>',
+        "truncated": False,
+        "user": {
+            "contributors_enabled": False,
+            "created_at": "Wed Sep 05 00:58:11 +0000 2012",
+            "default_profile": False,
+            "default_profile_image": False,
+            "description": "The official Twitter profile for #StarCitizen and Roberts Space Industries.",
+            "entities": {
+                "description": {"urls": []},
+                "url": {
+                    "urls": [
+                        {
+                            "display_url": "robertsspaceindustries.com",
+                            "expanded_url": "http://www.robertsspaceindustries.com",
+                            "indices": [0, 23],
+                            "url": "https://t.co/iqO6apof3y",
+                        }
+                    ]
+                },
+            },
+            "favourites_count": 4588,
+            "follow_request_sent": None,
+            "followers_count": 106169,
+            "following": None,
+            "friends_count": 201,
+            "geo_enabled": False,
+            "has_extended_profile": False,
+            "id": 803542770,
+            "id_str": "803542770",
+            "is_translation_enabled": False,
+            "is_translator": False,
+            "lang": None,
+            "listed_count": 890,
+            "location": "Roberts Space Industries",
+            "name": "Star Citizen",
+            "notifications": None,
+            "profile_background_color": "131516",
+            "profile_background_image_url": "http://abs.twimg.com/images/themes/theme14/bg.gif",
+            "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme14/bg.gif",
+            "profile_background_tile": False,
+            "profile_banner_url": "https://pbs.twimg.com/profile_banners/803542770/1596651186",
+            "profile_image_url": "http://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg",
+            "profile_image_url_https": "https://pbs.twimg.com/profile_images/963109950103814144/ysnj_Asy_normal.jpg",
+            "profile_link_color": "0A5485",
+            "profile_sidebar_border_color": "FFFFFF",
+            "profile_sidebar_fill_color": "EFEFEF",
+            "profile_text_color": "333333",
+            "profile_use_background_image": True,
+            "protected": False,
+            "screen_name": "RobertsSpaceInd",
+            "statuses_count": 6210,
+            "time_zone": None,
+            "translator_type": "none",
+            "url": "https://t.co/iqO6apof3y",
+            "utc_offset": None,
+            "verified": True,
+        },
+    },
+]
diff --git a/src/newsreader/news/collection/tests/twitter/builder/tests.py b/src/newsreader/news/collection/tests/twitter/builder/tests.py
index 37d7ad7..2e9ecc0 100644
--- a/src/newsreader/news/collection/tests/twitter/builder/tests.py
+++ b/src/newsreader/news/collection/tests/twitter/builder/tests.py
@@ -10,6 +10,7 @@ from ftfy import fix_text
 
 from newsreader.news.collection.tests.factories import TwitterTimelineFactory
 from newsreader.news.collection.tests.twitter.builder.mocks import (
+    broken_mock,
     gif_mock,
     image_mock,
     quoted_mock,
@@ -410,3 +411,21 @@ class TwitterBuilderTestCase(TestCase):
             builder.save()
 
         self.assertEquals(Post.objects.count(), 2)
+
+    def test_bad_post(self):
+        """
+        Tests that the builder will ignore posts which miss data
+        """
+        builder = TwitterBuilder
+
+        profile = TwitterTimelineFactory(screen_name="RobertsSpaceInd")
+        mock_stream = Mock(rule=profile)
+
+        with builder(broken_mock, mock_stream) as builder:
+            builder.build()
+            builder.save()
+
+        self.assertCountEqual(
+            Post.objects.values_list("remote_identifier", flat=True),
+            ["1288550304095416320"],
+        )
diff --git a/src/newsreader/news/collection/twitter.py b/src/newsreader/news/collection/twitter.py
index dc32ecc..36047a5 100644
--- a/src/newsreader/news/collection/twitter.py
+++ b/src/newsreader/news/collection/twitter.py
@@ -22,6 +22,10 @@ from newsreader.news.collection.base import (
 )
 from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
 from newsreader.news.collection.exceptions import (
+    BuilderDuplicateException,
+    BuilderException,
+    BuilderMissingDataException,
+    BuilderParseException,
     StreamDeniedException,
     StreamException,
     StreamNotFoundException,
@@ -48,43 +52,69 @@ class TwitterBuilder(PostBuilder):
 
     def build(self):
         results = {}
-        rule = self.stream.rule
 
         for post in self.payload:
-            remote_identifier = post["id_str"]
-
-            if remote_identifier in self.existing_posts:
+            try:
+                post = self.build_post(post)
+            except BuilderException:
+                logger.exception("Failed building post")
                 continue
 
-            url = f"{TWITTER_URL}/{rule.screen_name}/status/{remote_identifier}"
-            body = urlize(post["full_text"], nofollow=True)
+            identifier = post.remote_identifier
+            results[identifier] = post
+
+        self.instances = results.values()
+
+    def build_post(self, data):
+        remote_identifier = data.get("id_str", "")
+        rule = self.stream.rule
+
+        if remote_identifier in self.existing_posts:
+            raise BuilderDuplicateException(payload=data)
+
+        try:
+            body = urlize(data["full_text"], nofollow=True)
             title = truncate_text(
-                Post, "title", self.sanitize_fragment(post["full_text"])
+                Post, "title", self.sanitize_fragment(data["full_text"])
             )
 
             publication_date = pytz.utc.localize(
-                datetime.strptime(post["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
+                datetime.strptime(data["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
             )
+        except KeyError as e:
+            raise BuilderMissingDataException(payload=data) from e
+        except (OverflowError, OSError) as e:
+            raise BuilderParseException(payload=data) from e
 
-            if "extended_entities" in post:
-                try:
-                    media_entities = self.get_media_entities(post)
-                    body += media_entities
-                except KeyError:
-                    logger.exception(f"Failed parsing media_entities for {url}")
+        url = f"{TWITTER_URL}/{rule.screen_name}/status/{remote_identifier}"
 
-            if "retweeted_status" in post:
-                original_post = post["retweeted_status"]
+        if "extended_entities" in data:
+            try:
+                media_entities = self.get_media_entities(data)
+                body += media_entities
+            except KeyError as e:
+                raise BuilderMissingDataException(
+                    message="Failed parsing data for media entities", payload=data
+                ) from e
+
+        try:
+            if "retweeted_status" in data:
+                original_post = data["retweeted_status"]
                 original_tweet = urlize(original_post["full_text"], nofollow=True)
                 body = f"{body} <br><div>Original tweet: {original_tweet}</div>"
-            if "quoted_status" in post:
-                original_post = post["quoted_status"]
+            if "quoted_status" in data:
+                original_post = data["quoted_status"]
                 original_tweet = urlize(original_post["full_text"], nofollow=True)
                 body = f"{body} <br><div>Quoted tweet: {original_tweet}</div>"
+        except KeyError as e:
+            raise BuilderMissingDataException(
+                message="Failed parsing data for original tweet", payload=data
+            ) from e
 
-            body = self.sanitize_fragment(body)
+        body = self.sanitize_fragment(body)
 
-            data = {
+        return Post(
+            **{
                 "remote_identifier": remote_identifier,
                 "title": fix_text(title),
                 "body": fix_text(body),
@@ -93,13 +123,10 @@ class TwitterBuilder(PostBuilder):
                 "url": url,
                 "rule": rule,
             }
+        )
 
-            results[remote_identifier] = Post(**data)
-
-        self.instances = results.values()
-
-    def get_media_entities(self, post):
-        media_entities = post["extended_entities"]["media"]
+    def get_media_entities(self, data):
+        media_entities = data["extended_entities"]["media"]
         formatted_entities = ""
 
         for media_entity in media_entities:
diff --git a/src/newsreader/scss/components/post/_post.scss b/src/newsreader/scss/components/post/_post.scss
index e73dbd2..ae94f6c 100644
--- a/src/newsreader/scss/components/post/_post.scss
+++ b/src/newsreader/scss/components/post/_post.scss
@@ -70,6 +70,8 @@
 
     & img, video {
       padding: 10px 0;
+
+      width: max-content;
       max-width: 100%;
     }
 
diff --git a/src/newsreader/templates/base.html b/src/newsreader/templates/base.html
index efaf9f2..57da011 100644
--- a/src/newsreader/templates/base.html
+++ b/src/newsreader/templates/base.html
@@ -16,7 +16,7 @@
         {% if request.user.is_authenticated %}
           <li class="nav__item"><a href="{% url 'index' %}">Home</a></li>
           <li class="nav__item"><a href="{% url 'news:core:categories' %}">Categories</a></li>
-          <li class="nav__item"><a href="{% url 'news:collection:rules' %}">Feeds</a></li>
+          <li class="nav__item"><a href="{% url 'news:collection:rules' %}">Sources</a></li>
           <li class="nav__item"><a href="{% url 'accounts:settings:home' %}">Settings</a></li>
           {% if request.user.is_superuser %}
             <li class="nav__item"><a href="{% url 'admin:index' %}">Admin</a></li>
diff --git a/webpack.common.babel.js b/webpack.common.babel.js
index bbfb403..2778595 100644
--- a/webpack.common.babel.js
+++ b/webpack.common.babel.js
@@ -40,7 +40,8 @@ export default {
       allChunks: true,
     }),
     new CleanWebpackPlugin({
-      cleanOnceBeforeBuildPatterns: ['**/*', '!favicon.png'],
+      cleanOnceBeforeBuildPatterns: ['js', 'css', 'fonts'],
+      cleanAfterEveryBuildPatterns: ['!fonts/**'],
     }),
   ],
 };