Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

T1104 v2 filter stream #1136

Merged
merged 20 commits into from
Apr 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
FROM gwul/sfm-base@sha256:0b80a3d3562cdb4d631fbb55b9bd24889312838cbd27cd33e14cc0c18405f007
MAINTAINER Social Feed Manager <[email protected]>

ARG build_version=release

# Install apache
RUN apt-get update && apt-get install -y \
apache2=2.4* \
apache2-dev=2.4*

ADD . /opt/sfm-ui/
WORKDIR /opt/sfm-ui
RUN python -m pip install -r requirements/common.txt -r requirements/release.txt
RUN python -m pip install -r requirements/common.txt -r requirements/${build_version}.txt

# Adds fixtures.
ADD docker/ui/fixtures.json /opt/sfm-setup/
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile-consumer
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
FROM gwul/sfm-base@sha256:0b80a3d3562cdb4d631fbb55b9bd24889312838cbd27cd33e14cc0c18405f007
MAINTAINER Social Feed Manager <[email protected]>

ARG build_version=release

ADD . /opt/sfm-ui/
WORKDIR /opt/sfm-ui
RUN pip install -r requirements/common.txt -r requirements/release.txt
RUN pip install -r requirements/common.txt -r requirements/${build_version}.txt

ADD docker/consumer/invoke_consumer.sh /opt/sfm-setup/
RUN chmod +x /opt/sfm-setup/invoke_consumer.sh
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile-runserver
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
FROM gwul/sfm-base@sha256:0b80a3d3562cdb4d631fbb55b9bd24889312838cbd27cd33e14cc0c18405f007
MAINTAINER Social Feed Manager <[email protected]>

ARG build_version=release

ADD . /opt/sfm-ui/
WORKDIR /opt/sfm-ui
RUN pip install -r requirements/common.txt -r requirements/release.txt
RUN pip install -r requirements/common.txt -r requirements/${build_version}.txt

# Adds fixtures.
ADD docker/ui/fixtures.json /opt/sfm-setup/
Expand Down
8 changes: 4 additions & 4 deletions requirements/common.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
APScheduler==3.7.0
django==2.2.24
django==3.2.16
django-allauth==0.41.0
django-braces==1.14.0
django-crispy-forms==1.9.0
django-datatables-view==1.18.0
django-filter==2.2.0
django-simple-history==2.7.3
django-filter==21.1
django-simple-history==3.2.0
djangorestframework~=3.11.2
iso8601==0.1.12
jsonfield==3.1.0
psycopg2-binary==2.8.4
pytz==2019.3
pytz==2022.1
rabbitmq-admin==0.2
SQLAlchemy==1.3.5
# Used when executing SQL in migration.
Expand Down
6 changes: 3 additions & 3 deletions sfm/message_consumer/test_sfm_ui_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def setUp(self):
harvest_type="test_type", name="test_collection",
harvest_options=json.dumps({}))
stream_collection = Collection.objects.create(collection_set=collection_set, credential=credential,
harvest_type=Collection.TWITTER_SAMPLE,
harvest_type=Collection.TWITTER_FILTER_STREAM,
name="test_stream_collection",
harvest_options=json.dumps({}), is_on=True)

Expand Down Expand Up @@ -199,7 +199,7 @@ def test_harvest_status_on_message(self):

@patch("message_consumer.sfm_ui_consumer.collection_stop")
def test_harvest_status_stream_failed_on_message(self, mock_collection_stop):
self.consumer.routing_key = "harvest.status.twitter.twitter_sample"
self.consumer.routing_key = "harvest.status.twitter2.twitter_filter_stream"
self.consumer.message = {
"id": "test:3",
"status": Harvest.FAILURE,
Expand All @@ -218,7 +218,7 @@ def test_harvest_status_stream_failed_on_message(self, mock_collection_stop):

@patch("message_consumer.sfm_ui_consumer.collection_stop")
def test_rogue_harvest(self, mock_collection_stop):
self.consumer.routing_key = "harvest.status.twitter.twitter_sample"
self.consumer.routing_key = "harvest.status.twitter2.twitter_filter_stream"
self.consumer.message = {
"id": "test:3",
"status": Harvest.RUNNING,
Expand Down
9 changes: 7 additions & 2 deletions sfm/sfm/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,15 @@
SERIALIZE_HOUR = env.get('SFM_SERIALIZE_HOUR', '3')
SERIALIZE_MINUTE = env.get('SFM_SERIALIZE_MINUTE', '0')

SFM_UI_VERSION = "2.5.0"
SFM_UI_VERSION = "3.0.0"

# If a collection is schedules for <= PRIORITY_SCHEDULE_MINUTES,
# the routing key will have .priority appended.
PRIORITY_SCHEDULE_MINUTES = 60
# Harvest types that support priority queues.
PRIORITY_HARVEST_TYPES = ['twitter_search', 'twitter_user_timeline']
PRIORITY_HARVEST_TYPES = ['twitter_search', 'twitter_user_timeline', 'twitter_user_timeline_2', 'twitter_search_2', 'twitter_academic_search']
# For Django 3.2, recommended to set the autofield for primary-key columns explicitly
DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'
# permitted Twitter collection types
# types not included will be disabled in the UI
TWITTER_COLLECTION_TYPES = env.get('TWITTER_COLLECTION_TYPES', 'twitter_search_2,twitter_user_timeline_2').split(',')
85 changes: 83 additions & 2 deletions sfm/ui/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,22 @@ def save(self, commit=True):
m.save()
return m

class CollectionTwitterFilterStreamForm(BaseCollectionForm):
class Meta(BaseCollectionForm.Meta):
exclude = ('schedule_minutes',)

def __init__(self, *args, **kwargs):
super(CollectionTwitterFilterStreamForm, self).__init__(*args, **kwargs)

def save(self, commit=True):
m = super(CollectionTwitterFilterStreamForm, self).save(commit=False)
m.harvest_type = Collection.TWITTER_FILTER_STREAM
m.schedule_minutes = None
m.save()
return m




class CollectionFlickrUserForm(BaseCollectionForm):
incremental = forms.BooleanField(initial=True, required=False, label=INCREMENTAL_LABEL, help_text=INCREMENTAL_HELP)
Expand Down Expand Up @@ -652,7 +668,7 @@ class SeedTwitterSearch2Form(BaseSeedForm):
query = forms.CharField(required=True, widget=forms.Textarea(attrs={'rows': 4}),
help_text="See Twitter's <a href='https://developer.twitter.com/en/docs/twitter-api/tweets/counts/integrate/build-a-query' target='_blank'>instructions for building a query</a>. "
"Example: (happy OR happiness) lang:en -is:retweet")
start_time = forms.DateTimeField(required=False, help_text="Earliest date of tweets searched. Will be converted to UTC. Start and end dates must be within the previous 7 days. A start date outside of that window will be ignore.", widget=DateTimeInput(attrs={'class': 'datepicker'}))
start_time = forms.DateTimeField(required=False, help_text="Earliest date of tweets searched. Will be converted to UTC. Start and end dates must be within the previous 7 days. A start date outside of that window will be ignored.", widget=DateTimeInput(attrs={'class': 'datepicker'}))
end_time= forms.DateTimeField(required=False, help_text="Most recent date of tweets searched. Will be converted to UTC.", widget=DateTimeInput(attrs={'class': 'datepicker'}))
limit = forms.IntegerField(required=False, validators=[MinValueValidator(1)], help_text="Maximum number of tweets to be retrieved. Will be rounded up to a multiple of 100. Limits are approximate; actual results may exceed the limit slightly.")

Expand Down Expand Up @@ -882,6 +898,71 @@ def save(self, commit=True):
return m


class SeedTwitterFilterStreamForm(BaseSeedForm):
rule = forms.CharField(required=True, widget=forms.Textarea(attrs={'rows': 4}),
help_text="""Enter a streaming rule to select Tweets during your streaming harvest. See the <a href="https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/integrate/build-a-rule" target="_blank">Twitter API documentation</a> for guidance on creating rules.
""")

tag = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 1}),
help_text="""Enter a tag for your rule. Tags will appear in exported data for this collection.""")

def __init__(self, *args, **kwargs):
super(SeedTwitterFilterStreamForm, self).__init__(*args, **kwargs)
self.helper.layout[0][0].extend(("rule","tag"))

if self.instance and self.instance.token:
token = json.loads(self.instance.token)
if "rule" in token:
self.fields["rule"].initial = token["rule"]
if "tag" in token:
self.fields["tag"].initial = token["tag"]

def clean_rule(self):
rule_val = self.cleaned_data.get("rule").strip()
return rule_val

def clean_tag(self):
tag_val = self.cleaned_data.get("tag").strip()
return tag_val

def clean(self):
# if do string strip in here, string ends an empty space, not sure why
rule_val = self.cleaned_data.get("rule")
tag_val = self.cleaned_data.get("tag")

# should not all be empty
if not rule_val:
raise ValidationError(u"A streaming rule is required.")

token_val = {}
if rule_val:
token_val["rule"] = rule_val
if tag_val:
token_val["tag"] = tag_val
token_val = json.dumps(token_val, ensure_ascii=False)
# for the update view
if self.view_type == Seed.UPDATE_VIEW:
# check updated seeds exist in db if changes
# case insensitive match, and user can update seed `tack:Test` to 'tack:test'
if token_val.lower() != self.entry.token.lower() and \
token_val and Seed.objects.filter(collection=self.collection,
token__iexact=token_val).exists():
raise ValidationError(u'Seed: {} already exist.'.format(token_val))
else:
if token_val and Seed.objects.filter(collection=self.collection, token__iexact=token_val).exists():
raise ValidationError(u'Seed: {} already exist.'.format(token_val))

def save(self, commit=True):
m = super(SeedTwitterFilterStreamForm, self).save(commit=False)
token = dict()
if self.cleaned_data["rule"]:
token["rule"] = self.cleaned_data["rule"]
if self.cleaned_data["tag"]:
token["tag"] = self.cleaned_data["tag"]
m.token = json.dumps(token, ensure_ascii=False)
m.save()
return m

class SeedFlickrUserForm(BaseSeedForm):
class Meta(BaseSeedForm.Meta):
fields = ['token', 'uid']
Expand Down Expand Up @@ -1282,7 +1363,7 @@ def __init__(self, *args, **kwargs):
onclick="window.location.href='{0}'".format(cancel_url))
)
)
if len(self.fields["seeds"].queryset) < 2:
if (len(self.fields["seeds"].queryset) < 2) or (self.collection.harvest_type == 'twitter_filter_stream'):
del self.fields["seeds"]
del self.fields["seed_choice"]
self.helper.layout[0].pop(0)
Expand Down
38 changes: 27 additions & 11 deletions sfm/ui/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ def default_uuid():
return uuid.uuid4().hex


def update_harvest_types(harvest_types):
'''
Updates a list of SFM harvest types based on settings, filtering out deprecated Twitter harvest types.
'''
# If list, assume tuples
if isinstance(harvest_types, list):
return [h for h in harvest_types if (not h[0].startswith('twitter')) or (h[0] in settings.TWITTER_COLLECTION_TYPES)]
# Otherwise, assume dict
return {k: v for k, v in harvest_types.items() if (not k.startswith('twitter')) or (k in settings.TWITTER_COLLECTION_TYPES)}

class User(AbstractUser):
DAILY = "daily"
WEEKLY = "weekly"
Expand Down Expand Up @@ -112,8 +122,8 @@ class Credential(models.Model):
WEIBO = "weibo"
TUMBLR = "tumblr"
PLATFORM_CHOICES = [
(TWITTER, 'Twitter'),
(TWITTER2, 'Twitter version 2'),
# (TWITTER, 'Twitter'),
(TWITTER2, 'Twitter (v.2)'),
(FLICKR, 'Flickr'),
(WEIBO, 'Weibo'),
(TUMBLR, "Tumblr")
Expand Down Expand Up @@ -299,6 +309,7 @@ def natural_key(self):
class Collection(models.Model):
TWITTER_SEARCH = 'twitter_search'
TWITTER_FILTER = "twitter_filter"
TWITTER_FILTER_STREAM = "twitter_filter_stream"
TWITTER_USER_TIMELINE = 'twitter_user_timeline'
TWITTER_SAMPLE = 'twitter_sample'
TWITTER_ACADEMIC_SEARCH = 'twitter_academic_search'
Expand All @@ -318,33 +329,36 @@ class Collection(models.Model):
(60 * 24 * 7, 'Every week'),
(60 * 24 * 7 * 4, 'Every 4 weeks')
]
HARVEST_CHOICES = [
HARVEST_CHOICES = update_harvest_types([
(TWITTER_USER_TIMELINE, 'Twitter user timeline'),
(TWITTER_SEARCH, 'Twitter search'),
(TWITTER_FILTER, 'Twitter filter'),
(TWITTER_FILTER_STREAM, 'Twitter filtered stream'),
(TWITTER_SAMPLE, 'Twitter sample'),
(TWITTER_ACADEMIC_SEARCH, 'Twitter academic search'),
(TWITTER_SEARCH_2, 'Twitter search version 2'),
(TWITTER_USER_TIMELINE_2, 'Twitter user timeline version 2'),
(TWITTER_SEARCH_2, 'Twitter search (.v 2)'),
(TWITTER_USER_TIMELINE_2, 'Twitter user timeline (v. 2)'),
(TUMBLR_BLOG_POSTS, 'Tumblr blog posts'),
(FLICKR_USER, 'Flickr user'),
(WEIBO_TIMELINE, 'Weibo timeline')
]
HARVEST_DESCRIPTION = {
])
HARVEST_DESCRIPTION = update_harvest_types({
TWITTER_SEARCH: 'Recent tweets matching a query',
TWITTER_FILTER: 'Tweets in real time matching filter criteria',
TWITTER_FILTER_STREAM: 'Tweets in real time matching streaming rules',
TWITTER_USER_TIMELINE: 'Tweets from specific accounts',
TWITTER_SAMPLE: 'A subset of all tweets in real time',
TWITTER_ACADEMIC_SEARCH: 'Tweets from the full archive using Twitter Academic Research',
TWITTER_SEARCH_2: 'Recent tweets matching a query from the standard version 2 API',
TWITTER_USER_TIMELINE_2: 'Tweets from specific accounts, from the version 2 API',
TWITTER_SEARCH_2: 'Recent tweets matching a query from the v. 2 API',
TWITTER_USER_TIMELINE_2: 'Tweets from specific accounts, from the v. 2 API',
FLICKR_USER: 'Posts and photos from specific accounts',
WEIBO_TIMELINE: "Posts from a user and the user's friends",
TUMBLR_BLOG_POSTS: 'Blog posts from specific blogs'
}
})
HARVEST_FIELDS = {
TWITTER_SEARCH: {"link": None, "token": "Search query", "uid": None},
TWITTER_FILTER: {"link": None, "token": "Filter criteria", "uid": None},
TWITTER_FILTER_STREAM: {"link": None, "token": "Streaming rules", "uid": None},
TWITTER_USER_TIMELINE: {"link": "Link", "token": "Twitter accounts", "uid": "User ID"},
TWITTER_SAMPLE: None,
TWITTER_ACADEMIC_SEARCH: {"link": None, "token": "Search query", "uid": None},
Expand All @@ -356,6 +370,7 @@ class Collection(models.Model):
}
REQUIRED_SEED_COUNTS = {
TWITTER_FILTER: 1,
TWITTER_FILTER_STREAM: None, # allowed seeds depend on user's access level
TWITTER_SEARCH: 1,
TWITTER_ACADEMIC_SEARCH: 1,
TWITTER_SEARCH_2: 1,
Expand All @@ -366,6 +381,7 @@ class Collection(models.Model):
HARVEST_TYPES_TO_PLATFORM = {
TWITTER_SEARCH: Credential.TWITTER,
TWITTER_FILTER: Credential.TWITTER,
TWITTER_FILTER_STREAM: Credential.TWITTER2,
TWITTER_USER_TIMELINE: Credential.TWITTER,
TWITTER_SAMPLE: Credential.TWITTER,
TWITTER_ACADEMIC_SEARCH: Credential.TWITTER2,
Expand All @@ -376,7 +392,7 @@ class Collection(models.Model):
WEIBO_SEARCH: Credential.WEIBO,
TUMBLR_BLOG_POSTS: Credential.TUMBLR
}
STREAMING_HARVEST_TYPES = (TWITTER_SAMPLE, TWITTER_FILTER)
STREAMING_HARVEST_TYPES = (TWITTER_FILTER_STREAM)
RATE_LIMITED_HARVEST_TYPES = (TWITTER_USER_TIMELINE, TWITTER_SEARCH, TWITTER_USER_TIMELINE_2, TWITTER_SEARCH_2, TWITTER_ACADEMIC_SEARCH)
DEFAULT_VISIBILITY = 'default'
LOCAL_VISIBILITY = 'local'
Expand Down
2 changes: 1 addition & 1 deletion sfm/ui/templates/ui/collection_detail.html
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ <h4 class="modal-title" id="addNoteTurnOffModalLabel">Add note & turn off</h4>
</div>
</div>
{% else %}
<button type="submit" class={% if seed_error_message or seed_warning_message or credential_used_col or not can_toggle_on or stream_stopping or not collection.is_active %}"btn btn-secondary" disabled="disabled" {% else %} "btn btn-success" {% endif %}>
<button type="submit" class={% if seed_error_message or seed_warning_message or credential_used_col or not can_toggle_on or stream_stopping or not collection.is_active or disabled_collection_type %}"btn btn-secondary" disabled="disabled" {% else %} "btn btn-success" {% endif %}>
<span class="fas fa-power-off fa-fw" aria-hidden="true"></span> Turn on
</button><br />
{% endif %}
Expand Down
Loading