Use new builder exceptions in TwitterBuilder
This commit is contained in:
parent
e89b4c04a1
commit
379f8516e3
1 changed files with 50 additions and 28 deletions
|
|
@ -22,6 +22,10 @@ from newsreader.news.collection.base import (
|
|||
)
|
||||
from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
|
||||
from newsreader.news.collection.exceptions import (
|
||||
BuilderDuplicateException,
|
||||
BuilderException,
|
||||
BuilderMissingDataException,
|
||||
BuilderParseException,
|
||||
StreamDeniedException,
|
||||
StreamException,
|
||||
StreamNotFoundException,
|
||||
|
|
@ -50,56 +54,74 @@ class TwitterBuilder(PostBuilder):
|
|||
results = {}
|
||||
|
||||
for post in self.payload:
|
||||
remote_identifier = post.get("id_str")
|
||||
|
||||
if not remote_identifier or remote_identifier in self.existing_posts:
|
||||
continue
|
||||
|
||||
try:
|
||||
results[remote_identifier] = self.build_post(post)
|
||||
except KeyError:
|
||||
logger.exception(f"Failed building post {remote_identifier}")
|
||||
post = self.build_post(post)
|
||||
except BuilderException:
|
||||
logger.exception("Failed building post")
|
||||
continue
|
||||
|
||||
identifier = post.remote_identifier
|
||||
results[identifier] = post
|
||||
|
||||
self.instances = results.values()
|
||||
|
||||
def build_post(self, data):
|
||||
remote_identifier = data["id_str"]
|
||||
body = urlize(data["full_text"], nofollow=True)
|
||||
title = truncate_text(Post, "title", self.sanitize_fragment(data["full_text"]))
|
||||
url = f"{TWITTER_URL}/{self.stream.rule.screen_name}/status/{remote_identifier}"
|
||||
remote_identifier = data.get("id_str", "")
|
||||
rule = self.stream.rule
|
||||
|
||||
publication_date = pytz.utc.localize(
|
||||
datetime.strptime(data["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
||||
)
|
||||
if remote_identifier in self.existing_posts:
|
||||
raise BuilderDuplicateException(payload=data)
|
||||
|
||||
try:
|
||||
body = urlize(data["full_text"], nofollow=True)
|
||||
title = truncate_text(
|
||||
Post, "title", self.sanitize_fragment(data["full_text"])
|
||||
)
|
||||
|
||||
publication_date = pytz.utc.localize(
|
||||
datetime.strptime(data["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
|
||||
)
|
||||
except KeyError as e:
|
||||
raise BuilderMissingDataException(payload=data) from e
|
||||
except (OverflowError, OSError) as e:
|
||||
raise BuilderParseException(payload=data) from e
|
||||
|
||||
url = f"{TWITTER_URL}/{rule.screen_name}/status/{remote_identifier}"
|
||||
|
||||
if "extended_entities" in data:
|
||||
try:
|
||||
media_entities = self.get_media_entities(data)
|
||||
body += media_entities
|
||||
except KeyError:
|
||||
logger.exception(f"Failed parsing media_entities for {url}")
|
||||
except KeyError as e:
|
||||
raise BuilderMissingDataException(
|
||||
message="Failed parsing data for media entities", payload=data
|
||||
) from e
|
||||
|
||||
if "retweeted_status" in data:
|
||||
original_post = data["retweeted_status"]
|
||||
original_tweet = urlize(original_post["full_text"], nofollow=True)
|
||||
body = f"{body} <br><div>Original tweet: {original_tweet}</div>"
|
||||
if "quoted_status" in data:
|
||||
original_post = data["quoted_status"]
|
||||
original_tweet = urlize(original_post["full_text"], nofollow=True)
|
||||
body = f"{body} <br><div>Quoted tweet: {original_tweet}</div>"
|
||||
try:
|
||||
if "retweeted_status" in data:
|
||||
original_post = data["retweeted_status"]
|
||||
original_tweet = urlize(original_post["full_text"], nofollow=True)
|
||||
body = f"{body} <br><div>Original tweet: {original_tweet}</div>"
|
||||
if "quoted_status" in data:
|
||||
original_post = data["quoted_status"]
|
||||
original_tweet = urlize(original_post["full_text"], nofollow=True)
|
||||
body = f"{body} <br><div>Quoted tweet: {original_tweet}</div>"
|
||||
except KeyError as e:
|
||||
raise BuilderMissingDataException(
|
||||
message="Failed parsing data for original tweet", payload=data
|
||||
) from e
|
||||
|
||||
body = self.sanitize_fragment(body)
|
||||
|
||||
return Post(
|
||||
**{
|
||||
"remote_identifier": data["id_str"],
|
||||
"remote_identifier": remote_identifier,
|
||||
"title": fix_text(title),
|
||||
"body": fix_text(body),
|
||||
"author": self.stream.rule.screen_name,
|
||||
"author": rule.screen_name,
|
||||
"publication_date": publication_date,
|
||||
"url": url,
|
||||
"rule": self.stream.rule,
|
||||
"rule": rule,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue