Skip to content

Commit

Permalink
T1104 v2 filter stream (#1136)
Browse files Browse the repository at this point in the history
Upgrade to Django 3.2; support for Twitter v.2 filtered-stream; deprecation of v 1.1 endpoints and credentials. 

Co-authored-by: Dolsy Smith <[email protected]>
Co-authored-by: Adhithya Kiran <[email protected]>
  • Loading branch information
3 people authored Apr 14, 2023
1 parent 282e18e commit a16130d
Show file tree
Hide file tree
Showing 13 changed files with 184 additions and 51 deletions.
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
FROM gwul/sfm-base@sha256:0b80a3d3562cdb4d631fbb55b9bd24889312838cbd27cd33e14cc0c18405f007
MAINTAINER Social Feed Manager <[email protected]>

ARG build_version=release

# Install apache
RUN apt-get update && apt-get install -y \
apache2=2.4* \
apache2-dev=2.4*

ADD . /opt/sfm-ui/
WORKDIR /opt/sfm-ui
RUN python -m pip install -r requirements/common.txt -r requirements/release.txt
RUN python -m pip install -r requirements/common.txt -r requirements/${build_version}.txt

# Adds fixtures.
ADD docker/ui/fixtures.json /opt/sfm-setup/
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile-consumer
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
FROM gwul/sfm-base@sha256:0b80a3d3562cdb4d631fbb55b9bd24889312838cbd27cd33e14cc0c18405f007
MAINTAINER Social Feed Manager <[email protected]>

ARG build_version=release

ADD . /opt/sfm-ui/
WORKDIR /opt/sfm-ui
RUN pip install -r requirements/common.txt -r requirements/release.txt
RUN pip install -r requirements/common.txt -r requirements/${build_version}.txt

ADD docker/consumer/invoke_consumer.sh /opt/sfm-setup/
RUN chmod +x /opt/sfm-setup/invoke_consumer.sh
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile-runserver
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
FROM gwul/sfm-base@sha256:0b80a3d3562cdb4d631fbb55b9bd24889312838cbd27cd33e14cc0c18405f007
MAINTAINER Social Feed Manager <[email protected]>

ARG build_version=release

ADD . /opt/sfm-ui/
WORKDIR /opt/sfm-ui
RUN pip install -r requirements/common.txt -r requirements/release.txt
RUN pip install -r requirements/common.txt -r requirements/${build_version}.txt

# Adds fixtures.
ADD docker/ui/fixtures.json /opt/sfm-setup/
Expand Down
8 changes: 4 additions & 4 deletions requirements/common.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
APScheduler==3.7.0
django==2.2.24
django==3.2.16
django-allauth==0.41.0
django-braces==1.14.0
django-crispy-forms==1.9.0
django-datatables-view==1.18.0
django-filter==2.2.0
django-simple-history==2.7.3
django-filter==21.1
django-simple-history==3.2.0
djangorestframework~=3.11.2
iso8601==0.1.12
jsonfield==3.1.0
psycopg2-binary==2.8.4
pytz==2019.3
pytz==2022.1
rabbitmq-admin==0.2
SQLAlchemy==1.3.5
# Used when executing SQL in migration.
Expand Down
6 changes: 3 additions & 3 deletions sfm/message_consumer/test_sfm_ui_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def setUp(self):
harvest_type="test_type", name="test_collection",
harvest_options=json.dumps({}))
stream_collection = Collection.objects.create(collection_set=collection_set, credential=credential,
harvest_type=Collection.TWITTER_SAMPLE,
harvest_type=Collection.TWITTER_FILTER_STREAM,
name="test_stream_collection",
harvest_options=json.dumps({}), is_on=True)

Expand Down Expand Up @@ -199,7 +199,7 @@ def test_harvest_status_on_message(self):

@patch("message_consumer.sfm_ui_consumer.collection_stop")
def test_harvest_status_stream_failed_on_message(self, mock_collection_stop):
self.consumer.routing_key = "harvest.status.twitter.twitter_sample"
self.consumer.routing_key = "harvest.status.twitter2.twitter_filter_stream"
self.consumer.message = {
"id": "test:3",
"status": Harvest.FAILURE,
Expand All @@ -218,7 +218,7 @@ def test_harvest_status_stream_failed_on_message(self, mock_collection_stop):

@patch("message_consumer.sfm_ui_consumer.collection_stop")
def test_rogue_harvest(self, mock_collection_stop):
self.consumer.routing_key = "harvest.status.twitter.twitter_sample"
self.consumer.routing_key = "harvest.status.twitter2.twitter_filter_stream"
self.consumer.message = {
"id": "test:3",
"status": Harvest.RUNNING,
Expand Down
9 changes: 7 additions & 2 deletions sfm/sfm/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,15 @@
SERIALIZE_HOUR = env.get('SFM_SERIALIZE_HOUR', '3')
SERIALIZE_MINUTE = env.get('SFM_SERIALIZE_MINUTE', '0')

SFM_UI_VERSION = "2.5.0"
SFM_UI_VERSION = "3.0.0"

# If a collection is schedules for <= PRIORITY_SCHEDULE_MINUTES,
# the routing key will have .priority appended.
PRIORITY_SCHEDULE_MINUTES = 60
# Harvest types that support priority queues.
PRIORITY_HARVEST_TYPES = ['twitter_search', 'twitter_user_timeline']
PRIORITY_HARVEST_TYPES = ['twitter_search', 'twitter_user_timeline', 'twitter_user_timeline_2', 'twitter_search_2', 'twitter_academic_search']
# For Django 3.2, recommended to set the autofield for primary-key columns explicitly
DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'
# permitted Twitter collection types
# types not included will be disabled in the UI
TWITTER_COLLECTION_TYPES = env.get('TWITTER_COLLECTION_TYPES', 'twitter_search_2,twitter_user_timeline_2').split(',')
85 changes: 83 additions & 2 deletions sfm/ui/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,22 @@ def save(self, commit=True):
m.save()
return m

class CollectionTwitterFilterStreamForm(BaseCollectionForm):
class Meta(BaseCollectionForm.Meta):
exclude = ('schedule_minutes',)

def __init__(self, *args, **kwargs):
super(CollectionTwitterFilterStreamForm, self).__init__(*args, **kwargs)

def save(self, commit=True):
m = super(CollectionTwitterFilterStreamForm, self).save(commit=False)
m.harvest_type = Collection.TWITTER_FILTER_STREAM
m.schedule_minutes = None
m.save()
return m




class CollectionFlickrUserForm(BaseCollectionForm):
incremental = forms.BooleanField(initial=True, required=False, label=INCREMENTAL_LABEL, help_text=INCREMENTAL_HELP)
Expand Down Expand Up @@ -652,7 +668,7 @@ class SeedTwitterSearch2Form(BaseSeedForm):
query = forms.CharField(required=True, widget=forms.Textarea(attrs={'rows': 4}),
help_text="See Twitter's <a href='https://developer.twitter.com/en/docs/twitter-api/tweets/counts/integrate/build-a-query' target='_blank'>instructions for building a query</a>. "
"Example: (happy OR happiness) lang:en -is:retweet")
start_time = forms.DateTimeField(required=False, help_text="Earliest date of tweets searched. Will be converted to UTC. Start and end dates must be within the previous 7 days. A start date outside of that window will be ignore.", widget=DateTimeInput(attrs={'class': 'datepicker'}))
start_time = forms.DateTimeField(required=False, help_text="Earliest date of tweets searched. Will be converted to UTC. Start and end dates must be within the previous 7 days. A start date outside of that window will be ignored.", widget=DateTimeInput(attrs={'class': 'datepicker'}))
end_time= forms.DateTimeField(required=False, help_text="Most recent date of tweets searched. Will be converted to UTC.", widget=DateTimeInput(attrs={'class': 'datepicker'}))
limit = forms.IntegerField(required=False, validators=[MinValueValidator(1)], help_text="Maximum number of tweets to be retrieved. Will be rounded up to a multiple of 100. Limits are approximate; actual results may exceed the limit slightly.")

Expand Down Expand Up @@ -882,6 +898,71 @@ def save(self, commit=True):
return m


class SeedTwitterFilterStreamForm(BaseSeedForm):
rule = forms.CharField(required=True, widget=forms.Textarea(attrs={'rows': 4}),
help_text="""Enter a streaming rule to select Tweets during your streaming harvest. See the <a href="https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/integrate/build-a-rule" target="_blank">Twitter API documentation</a> for guidance on creating rules.
""")

tag = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 1}),
help_text="""Enter a tag for your rule. Tags will appear in exported data for this collection.""")

def __init__(self, *args, **kwargs):
super(SeedTwitterFilterStreamForm, self).__init__(*args, **kwargs)
self.helper.layout[0][0].extend(("rule","tag"))

if self.instance and self.instance.token:
token = json.loads(self.instance.token)
if "rule" in token:
self.fields["rule"].initial = token["rule"]
if "tag" in token:
self.fields["tag"].initial = token["tag"]

def clean_rule(self):
rule_val = self.cleaned_data.get("rule").strip()
return rule_val

def clean_tag(self):
tag_val = self.cleaned_data.get("tag").strip()
return tag_val

def clean(self):
# if do string strip in here, string ends an empty space, not sure why
rule_val = self.cleaned_data.get("rule")
tag_val = self.cleaned_data.get("tag")

# should not all be empty
if not rule_val:
raise ValidationError(u"A streaming rule is required.")

token_val = {}
if rule_val:
token_val["rule"] = rule_val
if tag_val:
token_val["tag"] = tag_val
token_val = json.dumps(token_val, ensure_ascii=False)
# for the update view
if self.view_type == Seed.UPDATE_VIEW:
# check updated seeds exist in db if changes
# case insensitive match, and user can update seed `tack:Test` to 'tack:test'
if token_val.lower() != self.entry.token.lower() and \
token_val and Seed.objects.filter(collection=self.collection,
token__iexact=token_val).exists():
raise ValidationError(u'Seed: {} already exist.'.format(token_val))
else:
if token_val and Seed.objects.filter(collection=self.collection, token__iexact=token_val).exists():
raise ValidationError(u'Seed: {} already exist.'.format(token_val))

def save(self, commit=True):
m = super(SeedTwitterFilterStreamForm, self).save(commit=False)
token = dict()
if self.cleaned_data["rule"]:
token["rule"] = self.cleaned_data["rule"]
if self.cleaned_data["tag"]:
token["tag"] = self.cleaned_data["tag"]
m.token = json.dumps(token, ensure_ascii=False)
m.save()
return m

class SeedFlickrUserForm(BaseSeedForm):
class Meta(BaseSeedForm.Meta):
fields = ['token', 'uid']
Expand Down Expand Up @@ -1282,7 +1363,7 @@ def __init__(self, *args, **kwargs):
onclick="window.location.href='{0}'".format(cancel_url))
)
)
if len(self.fields["seeds"].queryset) < 2:
if (len(self.fields["seeds"].queryset) < 2) or (self.collection.harvest_type == 'twitter_filter_stream'):
del self.fields["seeds"]
del self.fields["seed_choice"]
self.helper.layout[0].pop(0)
Expand Down
38 changes: 27 additions & 11 deletions sfm/ui/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ def default_uuid():
return uuid.uuid4().hex


def update_harvest_types(harvest_types):
'''
Updates a list of SFM harvest types based on settings, filtering out deprecated Twitter harvest types.
'''
# If list, assume tuples
if isinstance(harvest_types, list):
return [h for h in harvest_types if (not h[0].startswith('twitter')) or (h[0] in settings.TWITTER_COLLECTION_TYPES)]
# Otherwise, assume dict
return {k: v for k, v in harvest_types.items() if (not k.startswith('twitter')) or (k in settings.TWITTER_COLLECTION_TYPES)}

class User(AbstractUser):
DAILY = "daily"
WEEKLY = "weekly"
Expand Down Expand Up @@ -112,8 +122,8 @@ class Credential(models.Model):
WEIBO = "weibo"
TUMBLR = "tumblr"
PLATFORM_CHOICES = [
(TWITTER, 'Twitter'),
(TWITTER2, 'Twitter version 2'),
# (TWITTER, 'Twitter'),
(TWITTER2, 'Twitter (v.2)'),
(FLICKR, 'Flickr'),
(WEIBO, 'Weibo'),
(TUMBLR, "Tumblr")
Expand Down Expand Up @@ -299,6 +309,7 @@ def natural_key(self):
class Collection(models.Model):
TWITTER_SEARCH = 'twitter_search'
TWITTER_FILTER = "twitter_filter"
TWITTER_FILTER_STREAM = "twitter_filter_stream"
TWITTER_USER_TIMELINE = 'twitter_user_timeline'
TWITTER_SAMPLE = 'twitter_sample'
TWITTER_ACADEMIC_SEARCH = 'twitter_academic_search'
Expand All @@ -318,33 +329,36 @@ class Collection(models.Model):
(60 * 24 * 7, 'Every week'),
(60 * 24 * 7 * 4, 'Every 4 weeks')
]
HARVEST_CHOICES = [
HARVEST_CHOICES = update_harvest_types([
(TWITTER_USER_TIMELINE, 'Twitter user timeline'),
(TWITTER_SEARCH, 'Twitter search'),
(TWITTER_FILTER, 'Twitter filter'),
(TWITTER_FILTER_STREAM, 'Twitter filtered stream'),
(TWITTER_SAMPLE, 'Twitter sample'),
(TWITTER_ACADEMIC_SEARCH, 'Twitter academic search'),
(TWITTER_SEARCH_2, 'Twitter search version 2'),
(TWITTER_USER_TIMELINE_2, 'Twitter user timeline version 2'),
(TWITTER_SEARCH_2, 'Twitter search (.v 2)'),
(TWITTER_USER_TIMELINE_2, 'Twitter user timeline (v. 2)'),
(TUMBLR_BLOG_POSTS, 'Tumblr blog posts'),
(FLICKR_USER, 'Flickr user'),
(WEIBO_TIMELINE, 'Weibo timeline')
]
HARVEST_DESCRIPTION = {
])
HARVEST_DESCRIPTION = update_harvest_types({
TWITTER_SEARCH: 'Recent tweets matching a query',
TWITTER_FILTER: 'Tweets in real time matching filter criteria',
TWITTER_FILTER_STREAM: 'Tweets in real time matching streaming rules',
TWITTER_USER_TIMELINE: 'Tweets from specific accounts',
TWITTER_SAMPLE: 'A subset of all tweets in real time',
TWITTER_ACADEMIC_SEARCH: 'Tweets from the full archive using Twitter Academic Research',
TWITTER_SEARCH_2: 'Recent tweets matching a query from the standard version 2 API',
TWITTER_USER_TIMELINE_2: 'Tweets from specific accounts, from the version 2 API',
TWITTER_SEARCH_2: 'Recent tweets matching a query from the v. 2 API',
TWITTER_USER_TIMELINE_2: 'Tweets from specific accounts, from the v. 2 API',
FLICKR_USER: 'Posts and photos from specific accounts',
WEIBO_TIMELINE: "Posts from a user and the user's friends",
TUMBLR_BLOG_POSTS: 'Blog posts from specific blogs'
}
})
HARVEST_FIELDS = {
TWITTER_SEARCH: {"link": None, "token": "Search query", "uid": None},
TWITTER_FILTER: {"link": None, "token": "Filter criteria", "uid": None},
TWITTER_FILTER_STREAM: {"link": None, "token": "Streaming rules", "uid": None},
TWITTER_USER_TIMELINE: {"link": "Link", "token": "Twitter accounts", "uid": "User ID"},
TWITTER_SAMPLE: None,
TWITTER_ACADEMIC_SEARCH: {"link": None, "token": "Search query", "uid": None},
Expand All @@ -356,6 +370,7 @@ class Collection(models.Model):
}
REQUIRED_SEED_COUNTS = {
TWITTER_FILTER: 1,
TWITTER_FILTER_STREAM: None, # allowed seeds depend on user's access level
TWITTER_SEARCH: 1,
TWITTER_ACADEMIC_SEARCH: 1,
TWITTER_SEARCH_2: 1,
Expand All @@ -366,6 +381,7 @@ class Collection(models.Model):
HARVEST_TYPES_TO_PLATFORM = {
TWITTER_SEARCH: Credential.TWITTER,
TWITTER_FILTER: Credential.TWITTER,
TWITTER_FILTER_STREAM: Credential.TWITTER2,
TWITTER_USER_TIMELINE: Credential.TWITTER,
TWITTER_SAMPLE: Credential.TWITTER,
TWITTER_ACADEMIC_SEARCH: Credential.TWITTER2,
Expand All @@ -376,7 +392,7 @@ class Collection(models.Model):
WEIBO_SEARCH: Credential.WEIBO,
TUMBLR_BLOG_POSTS: Credential.TUMBLR
}
STREAMING_HARVEST_TYPES = (TWITTER_SAMPLE, TWITTER_FILTER)
STREAMING_HARVEST_TYPES = (TWITTER_FILTER_STREAM)
RATE_LIMITED_HARVEST_TYPES = (TWITTER_USER_TIMELINE, TWITTER_SEARCH, TWITTER_USER_TIMELINE_2, TWITTER_SEARCH_2, TWITTER_ACADEMIC_SEARCH)
DEFAULT_VISIBILITY = 'default'
LOCAL_VISIBILITY = 'local'
Expand Down
2 changes: 1 addition & 1 deletion sfm/ui/templates/ui/collection_detail.html
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ <h4 class="modal-title" id="addNoteTurnOffModalLabel">Add note & turn off</h4>
</div>
</div>
{% else %}
<button type="submit" class={% if seed_error_message or seed_warning_message or credential_used_col or not can_toggle_on or stream_stopping or not collection.is_active %}"btn btn-secondary" disabled="disabled" {% else %} "btn btn-success" {% endif %}>
<button type="submit" class={% if seed_error_message or seed_warning_message or credential_used_col or not can_toggle_on or stream_stopping or not collection.is_active or disabled_collection_type %}"btn btn-secondary" disabled="disabled" {% else %} "btn btn-success" {% endif %}>
<span class="fas fa-power-off fa-fw" aria-hidden="true"></span> Turn on
</button><br />
{% endif %}
Expand Down
Loading

0 comments on commit a16130d

Please sign in to comment.