Use new builder exceptions in TwitterBuilder
This commit is contained in:
parent
e89b4c04a1
commit
379f8516e3
1 changed files with 50 additions and 28 deletions
|
|
@ -22,6 +22,10 @@ from newsreader.news.collection.base import (
|
||||||
)
|
)
|
||||||
from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
|
from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
|
||||||
from newsreader.news.collection.exceptions import (
|
from newsreader.news.collection.exceptions import (
|
||||||
|
BuilderDuplicateException,
|
||||||
|
BuilderException,
|
||||||
|
BuilderMissingDataException,
|
||||||
|
BuilderParseException,
|
||||||
StreamDeniedException,
|
StreamDeniedException,
|
||||||
StreamException,
|
StreamException,
|
||||||
StreamNotFoundException,
|
StreamNotFoundException,
|
||||||
|
|
@ -50,36 +54,50 @@ class TwitterBuilder(PostBuilder):
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
for post in self.payload:
|
for post in self.payload:
|
||||||
remote_identifier = post.get("id_str")
|
|
||||||
|
|
||||||
if not remote_identifier or remote_identifier in self.existing_posts:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results[remote_identifier] = self.build_post(post)
|
post = self.build_post(post)
|
||||||
except KeyError:
|
except BuilderException:
|
||||||
logger.exception(f"Failed building post {remote_identifier}")
|
logger.exception("Failed building post")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
identifier = post.remote_identifier
|
||||||
|
results[identifier] = post
|
||||||
|
|
||||||
self.instances = results.values()
|
self.instances = results.values()
|
||||||
|
|
||||||
def build_post(self, data):
|
def build_post(self, data):
|
||||||
remote_identifier = data["id_str"]
|
remote_identifier = data.get("id_str", "")
|
||||||
|
rule = self.stream.rule
|
||||||
|
|
||||||
|
if remote_identifier in self.existing_posts:
|
||||||
|
raise BuilderDuplicateException(payload=data)
|
||||||
|
|
||||||
|
try:
|
||||||
body = urlize(data["full_text"], nofollow=True)
|
body = urlize(data["full_text"], nofollow=True)
|
||||||
title = truncate_text(Post, "title", self.sanitize_fragment(data["full_text"]))
|
title = truncate_text(
|
||||||
url = f"{TWITTER_URL}/{self.stream.rule.screen_name}/status/{remote_identifier}"
|
Post, "title", self.sanitize_fragment(data["full_text"])
|
||||||
|
)
|
||||||
|
|
||||||
publication_date = pytz.utc.localize(
|
publication_date = pytz.utc.localize(
|
||||||
datetime.strptime(data["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
datetime.strptime(data["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
||||||
)
|
)
|
||||||
|
except KeyError as e:
|
||||||
|
raise BuilderMissingDataException(payload=data) from e
|
||||||
|
except (OverflowError, OSError) as e:
|
||||||
|
raise BuilderParseException(payload=data) from e
|
||||||
|
|
||||||
|
url = f"{TWITTER_URL}/{rule.screen_name}/status/{remote_identifier}"
|
||||||
|
|
||||||
if "extended_entities" in data:
|
if "extended_entities" in data:
|
||||||
try:
|
try:
|
||||||
media_entities = self.get_media_entities(data)
|
media_entities = self.get_media_entities(data)
|
||||||
body += media_entities
|
body += media_entities
|
||||||
except KeyError:
|
except KeyError as e:
|
||||||
logger.exception(f"Failed parsing media_entities for {url}")
|
raise BuilderMissingDataException(
|
||||||
|
message="Failed parsing data for media entities", payload=data
|
||||||
|
) from e
|
||||||
|
|
||||||
|
try:
|
||||||
if "retweeted_status" in data:
|
if "retweeted_status" in data:
|
||||||
original_post = data["retweeted_status"]
|
original_post = data["retweeted_status"]
|
||||||
original_tweet = urlize(original_post["full_text"], nofollow=True)
|
original_tweet = urlize(original_post["full_text"], nofollow=True)
|
||||||
|
|
@ -88,18 +106,22 @@ class TwitterBuilder(PostBuilder):
|
||||||
original_post = data["quoted_status"]
|
original_post = data["quoted_status"]
|
||||||
original_tweet = urlize(original_post["full_text"], nofollow=True)
|
original_tweet = urlize(original_post["full_text"], nofollow=True)
|
||||||
body = f"{body} <br><div>Quoted tweet: {original_tweet}</div>"
|
body = f"{body} <br><div>Quoted tweet: {original_tweet}</div>"
|
||||||
|
except KeyError as e:
|
||||||
|
raise BuilderMissingDataException(
|
||||||
|
message="Failed parsing data for original tweet", payload=data
|
||||||
|
) from e
|
||||||
|
|
||||||
body = self.sanitize_fragment(body)
|
body = self.sanitize_fragment(body)
|
||||||
|
|
||||||
return Post(
|
return Post(
|
||||||
**{
|
**{
|
||||||
"remote_identifier": data["id_str"],
|
"remote_identifier": remote_identifier,
|
||||||
"title": fix_text(title),
|
"title": fix_text(title),
|
||||||
"body": fix_text(body),
|
"body": fix_text(body),
|
||||||
"author": self.stream.rule.screen_name,
|
"author": rule.screen_name,
|
||||||
"publication_date": publication_date,
|
"publication_date": publication_date,
|
||||||
"url": url,
|
"url": url,
|
||||||
"rule": self.stream.rule,
|
"rule": rule,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue