Add initial twitter scheduler & rename last_suceeded to last_run

This commit is contained in:
Sonny Bakker 2020-09-13 22:36:31 +02:00
parent d9999752bc
commit 7fe3af0115
18 changed files with 4211 additions and 4096 deletions

View file

@ -0,0 +1,21 @@
# Generated by Django 3.0.7 on 2020-09-13 19:01
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("accounts", "0010_auto_20200603_2230")]
operations = [
migrations.AddField(
model_name="user",
name="twitter_oauth_token",
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AddField(
model_name="user",
name="twitter_oauth_token_secret",
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View file

@ -53,6 +53,9 @@ class User(AbstractUser):
reddit_refresh_token = models.CharField(max_length=255, blank=True, null=True)
reddit_access_token = models.CharField(max_length=255, blank=True, null=True)
twitter_oauth_token = models.CharField(max_length=255, blank=True, null=True)
twitter_oauth_token_secret = models.CharField(max_length=255, blank=True, null=True)
username = None
objects = UserManager()

View file

@ -3471,7 +3471,7 @@
"favicon": "https://news.ycombinator.com/favicon.ico",
"timezone": "UTC",
"category": 9,
"last_suceeded": "2020-07-14T11:45:30.477Z",
"last_run": "2020-07-14T11:45:30.477Z",
"succeeded": true,
"error": null,
"enabled": true,
@ -3493,7 +3493,7 @@
"favicon": "https://m.files.bbci.co.uk/modules/bbc-morph-news-waf-page-meta/2.5.2/apple-touch-icon-57x57-precomposed.png",
"timezone": "UTC",
"category": 8,
"last_suceeded": "2020-07-14T11:45:28.863Z",
"last_run": "2020-07-14T11:45:28.863Z",
"succeeded": true,
"error": null,
"enabled": true,
@ -3515,7 +3515,7 @@
"favicon": "https://cdn.arstechnica.net/favicon.ico",
"timezone": "UTC",
"category": 9,
"last_suceeded": "2020-07-14T11:45:29.810Z",
"last_run": "2020-07-14T11:45:29.810Z",
"succeeded": true,
"error": null,
"enabled": true,
@ -3537,7 +3537,7 @@
"favicon": "https://assets.guim.co.uk/images/favicons/873381bf11d58e20f551905d51575117/72x72.png",
"timezone": "UTC",
"category": 8,
"last_suceeded": "2020-07-14T11:45:30.181Z",
"last_run": "2020-07-14T11:45:30.181Z",
"succeeded": true,
"error": null,
"enabled": true,
@ -3559,7 +3559,7 @@
"favicon": null,
"timezone": "UTC",
"category": 9,
"last_suceeded": "2020-07-14T11:45:29.525Z",
"last_run": "2020-07-14T11:45:29.525Z",
"succeeded": true,
"error": null,
"enabled": true,
@ -3581,7 +3581,7 @@
"favicon": "https://cdn.vox-cdn.com/uploads/chorus_asset/file/7395367/favicon-16x16.0.png",
"timezone": "UTC",
"category": 9,
"last_suceeded": "2020-07-14T11:45:30.066Z",
"last_run": "2020-07-14T11:45:30.066Z",
"succeeded": true,
"error": null,
"enabled": true,
@ -3603,7 +3603,7 @@
"favicon": null,
"timezone": "Europe/Amsterdam",
"category": 8,
"last_suceeded": "2020-07-14T11:45:29.362Z",
"last_run": "2020-07-14T11:45:29.362Z",
"succeeded": true,
"error": null,
"enabled": true,
@ -3625,7 +3625,7 @@
"favicon": null,
"timezone": "UTC",
"category": 9,
"last_suceeded": "2020-07-21T20:14:50.492Z",
"last_run": "2020-07-21T20:14:50.492Z",
"succeeded": true,
"error": null,
"enabled": true,
@ -3647,7 +3647,7 @@
"favicon": null,
"timezone": "UTC",
"category": 8,
"last_suceeded": "2020-07-21T20:14:50.768Z",
"last_run": "2020-07-21T20:14:50.768Z",
"succeeded": true,
"error": null,
"enabled": true,
@ -3669,7 +3669,7 @@
"favicon": null,
"timezone": "UTC",
"category": 9,
"last_suceeded": "2020-07-21T20:14:50.355Z",
"last_run": "2020-07-21T20:14:50.355Z",
"succeeded": true,
"error": null,
"enabled": true,

View file

@ -47,7 +47,7 @@
"user" : 2,
"succeeded" : true,
"modified" : "2019-07-20T11:28:16.473Z",
"last_suceeded" : "2019-07-20T11:28:16.316Z",
"last_run" : "2019-07-20T11:28:16.316Z",
"name" : "Hackers News",
"website_url" : null,
"created" : "2019-07-14T13:08:10.374Z",
@ -65,7 +65,7 @@
"error" : null,
"user" : 2,
"succeeded" : true,
"last_suceeded" : "2019-07-20T11:28:15.691Z",
"last_run" : "2019-07-20T11:28:15.691Z",
"name" : "BBC",
"modified" : "2019-07-20T12:07:49.164Z",
"timezone" : "UTC",
@ -85,7 +85,7 @@
"website_url" : null,
"name" : "Ars Technica",
"succeeded" : true,
"last_suceeded" : "2019-07-20T11:28:15.986Z",
"last_run" : "2019-07-20T11:28:15.986Z",
"modified" : "2019-07-20T11:28:16.033Z",
"user" : 2
},
@ -102,7 +102,7 @@
"user" : 2,
"name" : "The Guardian",
"succeeded" : true,
"last_suceeded" : "2019-07-20T11:28:16.078Z",
"last_run" : "2019-07-20T11:28:16.078Z",
"modified" : "2019-07-20T12:07:44.292Z",
"created" : "2019-07-20T11:25:02.089Z",
"website_url" : null,
@ -119,7 +119,7 @@
"website_url" : null,
"created" : "2019-07-20T11:25:30.121Z",
"user" : 2,
"last_suceeded" : "2019-07-20T11:28:15.860Z",
"last_run" : "2019-07-20T11:28:15.860Z",
"succeeded" : true,
"modified" : "2019-07-20T12:07:28.473Z",
"name" : "Tweakers"
@ -139,7 +139,7 @@
"website_url" : null,
"timezone" : "UTC",
"user" : 2,
"last_suceeded" : "2019-07-20T11:28:16.034Z",
"last_run" : "2019-07-20T11:28:16.034Z",
"succeeded" : true,
"modified" : "2019-07-20T12:07:21.704Z",
"name" : "The Verge"

View file

@ -6,14 +6,7 @@ from newsreader.news.collection.models import CollectionRule
class CollectionRuleAdmin(admin.ModelAdmin):
fields = ("url", "name", "timezone", "category", "favicon", "user")
list_display = (
"name",
"type_display",
"category",
"url",
"last_suceeded",
"succeeded",
)
list_display = ("name", "type_display", "category", "url", "last_run", "succeeded")
list_filter = ("user",)
def save_model(self, request, obj, form, change):

View file

@ -108,6 +108,15 @@ class Collector:
abstract = True
class Scheduler:
"""
Schedules rules according to certain ratelimitting
"""
def get_scheduled_rules(self):
raise NotImplementedError
class PostBuilder(Builder):
rule_type = None

View file

@ -5,7 +5,7 @@ from django.utils.translation import gettext as _
class RuleTypeChoices(TextChoices):
feed = "feed", _("Feed")
subreddit = "subreddit", _("Subreddit")
twitter = "twitter", _("Twitter")
twitter_timeline = "twitter_timeline", _("Twitter timeline")
class TwitterPostTypeChoices(TextChoices):

View file

@ -118,7 +118,6 @@ class FeedClient(PostClient):
stream.rule.error = None
stream.rule.succeeded = True
stream.rule.last_suceeded = timezone.now()
yield payload
except (StreamNotFoundException, StreamTimeOutException) as e:
@ -134,6 +133,7 @@ class FeedClient(PostClient):
continue
finally:
stream.rule.last_run = timezone.now()
stream.rule.save()

View file

@ -0,0 +1,24 @@
# Generated by Django 3.0.7 on 2020-09-13 19:01
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("collection", "0009_auto_20200807_2030")]
operations = [
migrations.AlterField(
model_name="collectionrule",
name="type",
field=models.CharField(
choices=[
("feed", "Feed"),
("subreddit", "Subreddit"),
("twitter_timeline", "Twitter timeline"),
],
default="feed",
max_length=20,
),
)
]

View file

@ -0,0 +1,14 @@
# Generated by Django 3.0.7 on 2020-09-13 19:57
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [("collection", "0010_auto_20200913_2101")]
operations = [
migrations.RenameField(
model_name="collectionrule", old_name="last_suceeded", new_name="last_run"
)
]

View file

@ -41,9 +41,8 @@ class CollectionRule(TimeStampedModel):
on_delete=models.SET_NULL,
)
last_suceeded = models.DateTimeField(blank=True, null=True)
last_run = models.DateTimeField(blank=True, null=True)
succeeded = models.BooleanField(default=False)
error = models.CharField(max_length=1024, blank=True, null=True)
enabled = models.BooleanField(

View file

@ -20,6 +20,7 @@ from newsreader.news.collection.base import (
PostClient,
PostCollector,
PostStream,
Scheduler,
)
from newsreader.news.collection.choices import RuleTypeChoices
from newsreader.news.collection.constants import (
@ -239,6 +240,7 @@ class RedditClient(PostClient):
futures = {executor.submit(stream.read): stream for stream in batch}
if rate_limitted:
logger.warning("Aborting requests, ratelimit hit")
break
for future in as_completed(futures):
@ -249,7 +251,6 @@ class RedditClient(PostClient):
stream.rule.error = None
stream.rule.succeeded = True
stream.rule.last_suceeded = timezone.now()
yield response_data
except StreamDeniedException as e:
@ -281,6 +282,7 @@ class RedditClient(PostClient):
continue
finally:
stream.rule.last_run = timezone.now()
stream.rule.save()
@ -289,7 +291,7 @@ class RedditCollector(PostCollector):
client = RedditClient
class RedditScheduler:
class RedditScheduler(Scheduler):
max_amount = RATE_LIMIT
max_user_amount = RATE_LIMIT / 4
@ -300,7 +302,7 @@ class RedditScheduler:
user__reddit_access_token__isnull=False,
user__reddit_refresh_token__isnull=False,
enabled=True,
).order_by("last_suceeded")[:200]
).order_by("last_run")[:200]
else:
self.subreddits = subreddits

View file

@ -30,6 +30,6 @@ class SubredditFactory(CollectionRuleFactory):
website_url = REDDIT_URL
class TwitterProfileFactory(CollectionRuleFactory):
type = RuleTypeChoices.twitter
class TwitterTimeLineFactory(CollectionRuleFactory):
type = RuleTypeChoices.twitter_timeline
screen_name = factory.Faker("user_name")

View file

@ -26,6 +26,7 @@ from newsreader.news.core.tests.factories import FeedPostFactory
from .mocks import duplicate_mock, empty_mock, multiple_mock, multiple_update_mock
@freeze_time("2019-10-30 12:30:00")
class FeedCollectorTestCase(TestCase):
def setUp(self):
self.maxDiff = None
@ -39,7 +40,6 @@ class FeedCollectorTestCase(TestCase):
def tearDown(self):
patch.stopall()
@freeze_time("2019-10-30 12:30:00")
def test_simple_batch(self):
self.mocked_parse.return_value = multiple_mock
rule = FeedFactory()
@ -51,10 +51,9 @@ class FeedCollectorTestCase(TestCase):
self.assertEquals(Post.objects.count(), 3)
self.assertEquals(rule.succeeded, True)
self.assertEquals(rule.last_suceeded, timezone.now())
self.assertEquals(rule.last_run, timezone.now())
self.assertEquals(rule.error, None)
@freeze_time("2019-10-30 12:30:00")
def test_emtpy_batch(self):
self.mocked_fetch.return_value = Mock()
self.mocked_parse.return_value = empty_mock
@ -68,7 +67,7 @@ class FeedCollectorTestCase(TestCase):
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, True)
self.assertEquals(rule.error, None)
self.assertEquals(rule.last_suceeded, timezone.now())
self.assertEquals(rule.last_run, timezone.now())
def test_not_found(self):
self.mocked_fetch.side_effect = StreamNotFoundException
@ -85,10 +84,8 @@ class FeedCollectorTestCase(TestCase):
def test_denied(self):
self.mocked_fetch.side_effect = StreamDeniedException
last_suceeded = timezone.make_aware(
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
)
rule = FeedFactory(last_suceeded=last_suceeded)
old_run = timezone.make_aware(datetime(2019, 10, 30, 12, 30))
rule = FeedFactory(last_run=old_run)
collector = FeedCollector()
collector.collect()
@ -98,14 +95,12 @@ class FeedCollectorTestCase(TestCase):
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream does not have sufficient permissions")
self.assertEquals(rule.last_suceeded, last_suceeded)
self.assertEquals(rule.last_run, timezone.now())
def test_forbidden(self):
self.mocked_fetch.side_effect = StreamForbiddenException
last_suceeded = timezone.make_aware(
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
)
rule = FeedFactory(last_suceeded=last_suceeded)
old_run = pytz.utc.localize(datetime(2019, 10, 30, 12, 30))
rule = FeedFactory(last_run=old_run)
collector = FeedCollector()
collector.collect()
@ -115,14 +110,14 @@ class FeedCollectorTestCase(TestCase):
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream forbidden")
self.assertEquals(rule.last_suceeded, last_suceeded)
self.assertEquals(rule.last_run, timezone.now())
def test_timed_out(self):
self.mocked_fetch.side_effect = StreamTimeOutException
last_suceeded = timezone.make_aware(
last_run = timezone.make_aware(
datetime.combine(date=date(2019, 10, 30), time=time(12, 30))
)
rule = FeedFactory(last_suceeded=last_suceeded)
rule = FeedFactory(last_run=last_run)
collector = FeedCollector()
collector.collect()
@ -132,9 +127,10 @@ class FeedCollectorTestCase(TestCase):
self.assertEquals(Post.objects.count(), 0)
self.assertEquals(rule.succeeded, False)
self.assertEquals(rule.error, "Stream timed out")
self.assertEquals(rule.last_suceeded, last_suceeded)
self.assertEquals(
rule.last_run, pytz.utc.localize(datetime(2019, 10, 30, 12, 30))
)
@freeze_time("2019-10-30 12:30:00")
def test_duplicates(self):
self.mocked_parse.return_value = duplicate_mock
rule = FeedFactory()
@ -186,10 +182,9 @@ class FeedCollectorTestCase(TestCase):
self.assertEquals(Post.objects.count(), 3)
self.assertEquals(rule.succeeded, True)
self.assertEquals(rule.last_suceeded, timezone.now())
self.assertEquals(rule.last_run, timezone.now())
self.assertEquals(rule.error, None)
@freeze_time("2019-02-22 12:30:00")
def test_items_with_identifiers_get_updated(self):
self.mocked_parse.return_value = multiple_update_mock
rule = FeedFactory()
@ -231,7 +226,7 @@ class FeedCollectorTestCase(TestCase):
self.assertEquals(Post.objects.count(), 3)
self.assertEquals(rule.succeeded, True)
self.assertEquals(rule.last_suceeded, timezone.now())
self.assertEquals(rule.last_run, timezone.now())
self.assertEquals(rule.error, None)
self.assertEquals(
@ -246,9 +241,12 @@ class FeedCollectorTestCase(TestCase):
third_post.title, "Birmingham head teacher threatened over LGBT lessons"
)
@freeze_time("2019-02-22 12:30:00")
def test_disabled_rules(self):
rules = (FeedFactory(enabled=False), FeedFactory(enabled=True))
old_run = pytz.utc.localize(datetime(2019, 10, 28, 15))
rules = (
FeedFactory(enabled=False, last_run=old_run),
FeedFactory(enabled=True, last_run=old_run),
)
self.mocked_parse.return_value = multiple_mock
@ -260,8 +258,8 @@ class FeedCollectorTestCase(TestCase):
self.assertEquals(Post.objects.count(), 3)
self.assertEquals(rules[1].succeeded, True)
self.assertEquals(rules[1].last_suceeded, timezone.now())
self.assertEquals(rules[1].last_run, timezone.now())
self.assertEquals(rules[1].error, None)
self.assertEquals(rules[0].last_suceeded, None)
self.assertEquals(rules[0].last_run, old_run)
self.assertEquals(rules[0].succeeded, False)

View file

@ -74,7 +74,7 @@ class RedditCollectorTestCase(TestCase):
for subreddit in rules:
with self.subTest(subreddit=subreddit):
self.assertEquals(subreddit.succeeded, True)
self.assertEquals(subreddit.last_suceeded, timezone.now())
self.assertEquals(subreddit.last_run, timezone.now())
self.assertEquals(subreddit.error, None)
post = Post.objects.get(
@ -133,7 +133,7 @@ class RedditCollectorTestCase(TestCase):
for subreddit in rules:
with self.subTest(subreddit=subreddit):
self.assertEquals(subreddit.succeeded, True)
self.assertEquals(subreddit.last_suceeded, timezone.now())
self.assertEquals(subreddit.last_run, timezone.now())
self.assertEquals(subreddit.error, None)
def test_not_found(self):

View file

@ -25,19 +25,19 @@ class RedditSchedulerTestCase(TestCase):
CollectionRuleFactory(
user=user_1,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=4),
last_run=timezone.now() - timedelta(days=4),
enabled=True,
),
CollectionRuleFactory(
user=user_1,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=3),
last_run=timezone.now() - timedelta(days=3),
enabled=True,
),
CollectionRuleFactory(
user=user_1,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=2),
last_run=timezone.now() - timedelta(days=2),
enabled=True,
),
]
@ -46,19 +46,19 @@ class RedditSchedulerTestCase(TestCase):
CollectionRuleFactory(
user=user_2,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=4),
last_run=timezone.now() - timedelta(days=4),
enabled=True,
),
CollectionRuleFactory(
user=user_2,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=3),
last_run=timezone.now() - timedelta(days=3),
enabled=True,
),
CollectionRuleFactory(
user=user_2,
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(days=2),
last_run=timezone.now() - timedelta(days=2),
enabled=True,
),
]
@ -87,7 +87,7 @@ class RedditSchedulerTestCase(TestCase):
CollectionRuleFactory.create_batch(
name=f"rule-{index}",
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(seconds=index),
last_run=timezone.now() - timedelta(seconds=index),
enabled=True,
user=user,
size=15,
@ -121,7 +121,7 @@ class RedditSchedulerTestCase(TestCase):
CollectionRuleFactory(
name=f"rule-{index}",
type=RuleTypeChoices.subreddit,
last_suceeded=timezone.now() - timedelta(seconds=index),
last_run=timezone.now() - timedelta(seconds=index),
enabled=True,
user=user,
)

View file

@ -8,7 +8,7 @@ import pytz
from ftfy import fix_text
from newsreader.news.collection.tests.factories import TwitterProfileFactory
from newsreader.news.collection.tests.factories import TwitterTimeLineFactory
from newsreader.news.collection.tests.twitter.builder.mocks import (
gif_mock,
image_mock,
@ -31,7 +31,7 @@ class TwitterBuilderTestCase(TestCase):
def test_simple_post(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(simple_mock, mock_stream) as builder:
@ -91,7 +91,7 @@ class TwitterBuilderTestCase(TestCase):
def test_images_in_post(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(image_mock, mock_stream) as builder:
@ -134,7 +134,7 @@ class TwitterBuilderTestCase(TestCase):
def test_videos_in_post(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(video_mock, mock_stream) as builder:
@ -190,7 +190,7 @@ class TwitterBuilderTestCase(TestCase):
def test_video_without_bitrate(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(video_without_bitrate_mock, mock_stream) as builder:
@ -212,7 +212,7 @@ class TwitterBuilderTestCase(TestCase):
def test_GIFs_in_post(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(gif_mock, mock_stream) as builder:
@ -241,7 +241,7 @@ class TwitterBuilderTestCase(TestCase):
def test_retweet_post(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(retweet_mock, mock_stream) as builder:
@ -278,7 +278,7 @@ class TwitterBuilderTestCase(TestCase):
def test_quoted_post(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(quoted_mock, mock_stream) as builder:
@ -312,7 +312,7 @@ class TwitterBuilderTestCase(TestCase):
def test_empty_data(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder([], mock_stream) as builder:
@ -324,7 +324,7 @@ class TwitterBuilderTestCase(TestCase):
def test_html_sanitizing(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(unsanitized_mock, mock_stream) as builder:
@ -364,7 +364,7 @@ class TwitterBuilderTestCase(TestCase):
def test_urlize_on_urls(self):
builder = TwitterBuilder
profile = TwitterProfileFactory(screen_name="RobertsSpaceInd")
profile = TwitterTimeLineFactory(screen_name="RobertsSpaceInd")
mock_stream = Mock(rule=profile)
with builder(simple_mock, mock_stream) as builder:

View file

@ -1,6 +1,7 @@
import logging
from datetime import datetime
from json import JSONDecodeError
from django.utils.html import format_html, urlize
@ -13,9 +14,16 @@ from newsreader.news.collection.base import (
PostClient,
PostCollector,
PostStream,
Scheduler,
)
from newsreader.news.collection.choices import RuleTypeChoices, TwitterPostTypeChoices
from newsreader.news.collection.utils import truncate_text
from newsreader.news.collection.exceptions import (
StreamDeniedException,
StreamException,
StreamParseException,
StreamTooManyException,
)
from newsreader.news.collection.utils import fetch, truncate_text
from newsreader.news.core.models import Post
@ -26,7 +34,7 @@ TWITTER_API_URL = "https://api.twitter.com/1.1"
class TwitterBuilder(PostBuilder):
rule_type = RuleTypeChoices.twitter
rule_type = RuleTypeChoices.twitter_timeline
def build(self):
results = {}
@ -147,5 +155,49 @@ class TwitterCollector(PostCollector):
pass
class TwitterScheduler:
pass
# see https://developer.twitter.com/en/docs/twitter-api/v1/rate-limits
class TwitterTimeLineScheduler(Scheduler):
def __init__(self, user, timelines=[]):
self.user = user
if not timelines:
self.timelines = user.rules.enabled(
type=RuleTypeChoices.twitter_timeline
).order_by("last_run")[:200]
else:
self.timelines = timelines
def get_scheduled_rules(self):
if (
not self.user.twitter_oauth_token
or not self.user.twitter_oauth_token_secret
):
return []
max_amount = self.get_current_ratelimit()
return self.timelines[:max_amount] if max_amount else []
def get_current_ratelimit(self):
endpoint = (
"application/rate_limit_status.json?resources=help,users,search,statuses"
)
# TODO add appropriate authentication (OAuth 1.0a) headers
try:
response = fetch(f"{TWITTER_API_URL}/{endpoint}")
except StreamException:
logger.exception(f"Unable to retrieve current ratelimit for {self.user.pk}")
return
try:
payload = response.json()
except JSONDecodeError:
logger.exception(f"Unable to parse ratelimit request for {self.user.pk}")
return
if not "resources" in payload or not "statuses" in payload["resources"]:
return []
statuses = payload["resources"]["statuses"]
return statuses.get("/statuses/user_timeline", 0)