From 34b5954fe5728501375ceacee752c68ba903f0a4 Mon Sep 17 00:00:00 2001 From: Steve Laing Date: Thu, 1 Feb 2024 16:16:41 +0000 Subject: [PATCH 1/2] Enable filtering of query string parameters Adds a configurable web_request event query string filter via ActiveSupport::ParameterFilter mechanism. --- lib/dfe/analytics.rb | 13 ++++++++++ lib/dfe/analytics/event.rb | 5 +++- lib/dfe/analytics/query_string_utilities.rb | 18 +++++++++++++ spec/dfe/analytics/event_spec.rb | 27 ++++++++++++++++++++ spec/dfe/analytics/requests_spec.rb | 28 +++++++++++++++++++++ 5 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 lib/dfe/analytics/query_string_utilities.rb diff --git a/lib/dfe/analytics.rb b/lib/dfe/analytics.rb index 43a72827..30b27fb2 100644 --- a/lib/dfe/analytics.rb +++ b/lib/dfe/analytics.rb @@ -18,6 +18,7 @@ require 'dfe/analytics/middleware/request_identity' require 'dfe/analytics/middleware/send_cached_page_request_event' require 'dfe/analytics/railtie' +require 'active_support/parameter_filter' module DfE module Analytics @@ -63,6 +64,8 @@ def self.config pseudonymise_web_request_user_id entity_table_checks_enabled rack_page_cached + filter_web_request_events + web_request_event_filter ] @config ||= Struct.new(*configurables).new @@ -86,6 +89,8 @@ def self.configure config.pseudonymise_web_request_user_id ||= false config.entity_table_checks_enabled ||= false config.rack_page_cached ||= proc { |_rack_env| false } + config.filter_web_request_events ||= false + config.web_request_event_filter ||= ActiveSupport::ParameterFilter.new(Rails.application.config.filter_parameters) end def self.initialize! @@ -244,5 +249,13 @@ def self.rack_page_cached?(rack_env) def self.entity_table_checks_enabled? config.entity_table_checks_enabled end + + def self.filter_web_request_events? + config.filter_web_request_events + end + + def self.web_request_event_filter + config.web_request_event_filter + end end end diff --git a/lib/dfe/analytics/event.rb b/lib/dfe/analytics/event.rb index d3534ba8..3b84eb51 100644 --- a/lib/dfe/analytics/event.rb +++ b/lib/dfe/analytics/event.rb @@ -1,10 +1,13 @@ # frozen_string_literal: true require 'active_support/values/time_zone' +require 'dfe/analytics/query_string_utilities' module DfE module Analytics class Event + include QueryStringUtilities + EVENT_TYPES = %w[ web_request create_entity update_entity delete_entity import_entity initialise_analytics entity_table_check ].freeze @@ -37,7 +40,7 @@ def with_request_details(rack_request) request_user_agent: ensure_utf8(rack_request.user_agent), request_method: rack_request.method, request_path: ensure_utf8(rack_request.path), - request_query: hash_to_kv_pairs(Rack::Utils.parse_query(rack_request.query_string)), + request_query: hash_to_kv_pairs(query_string_to_hash(rack_request.query_string)), request_referer: ensure_utf8(rack_request.referer), anonymised_user_agent_and_ip: anonymised_user_agent_and_ip(rack_request) ) diff --git a/lib/dfe/analytics/query_string_utilities.rb b/lib/dfe/analytics/query_string_utilities.rb new file mode 100644 index 00000000..37812319 --- /dev/null +++ b/lib/dfe/analytics/query_string_utilities.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module DfE + module Analytics + # Utility methods for query string processing + module QueryStringUtilities + def query_string_to_hash(query_string) + hash = Rack::Utils.parse_query(query_string) + hash = filter_query_string(hash) if DfE::Analytics.filter_web_request_events? + hash + end + + def filter_query_string(hash) + DfE::Analytics.web_request_event_filter.filter(hash) + end + end + end +end diff --git a/spec/dfe/analytics/event_spec.rb b/spec/dfe/analytics/event_spec.rb index 1e57e9dd..480f4ff8 100644 --- a/spec/dfe/analytics/event_spec.rb +++ b/spec/dfe/analytics/event_spec.rb @@ -209,6 +209,33 @@ def as_json end end + describe 'with_request_details using parameter filter' do + subject do + request = fake_request( + query_string: 'some_param=123&some_other_param=456' + ) + + described_class.new + .with_request_details(request) + .as_json['request_query'] + end + + before do + allow(DfE::Analytics).to receive(:web_request_event_filter).and_return( + ActiveSupport::ParameterFilter.new([:some_param]) + ) + allow(DfE::Analytics).to receive(:filter_web_request_events?).and_return(true) + end + + it 'filters specified parameter values' do + expect(subject).to include({ 'key' => 'some_param', 'value' => ['[FILTERED]'] }) + end + + it 'does not filter unspecified parameter values' do + expect(subject).to include({ 'key' => 'some_other_param', 'value' => ['456'] }) + end + end + def fake_request(overrides = {}) attrs = { uuid: '123', diff --git a/spec/dfe/analytics/requests_spec.rb b/spec/dfe/analytics/requests_spec.rb index 2a371b9a..a4697e5f 100644 --- a/spec/dfe/analytics/requests_spec.rb +++ b/spec/dfe/analytics/requests_spec.rb @@ -156,4 +156,32 @@ def index end).to have_been_made end end + + context 'with request filtering' do + before do + allow(DfE::Analytics).to receive(:web_request_event_filter).and_return( + ActiveSupport::ParameterFilter.new([:array_param]) + ) + allow(DfE::Analytics).to receive(:filter_web_request_events?).and_return(true) + end + + it 'filters out sensitive params' do + request = stub_analytics_event_submission + DfE::Analytics::Testing.webmock! do + perform_enqueued_jobs do + get('/example/path', + params: { page: '1', per_page: '25', array_param: %w[1 2] }, + headers: { 'HTTP_USER_AGENT' => 'Test agent' }) + end + end + + expect(request.with do |req| + body = JSON.parse(req.body) + payload = body['rows'].first['json'] + expect(payload['request_query']).to include( + { 'key' => 'array_param[]', 'value' => ['[FILTERED]'] } + ) + end).to have_been_made + end + end end From 23459aae28a0528997d05a83a8264925e2a0db23 Mon Sep 17 00:00:00 2001 From: Steve Laing Date: Thu, 1 Feb 2024 16:22:49 +0000 Subject: [PATCH 2/2] Update README detailing web request event query filtering --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 0ef74e55..20f064b6 100644 --- a/README.md +++ b/README.md @@ -231,6 +231,14 @@ user identifier proc can be defined in `config/initializers/dfe_analytics.rb`: DfE::Analytics.config.user_identifier = proc { |user| user&.uid } ``` +#### Filtering PII from web request query string data + +Query strings may be filtered using the same `Rails.application.config.filter_parameters` +configuration you'd normally use to prevent PII leakage to logs and error handling services. + +To enable this option, ensure the attributes to filter are specified in `Rails.application.config.filter_parameters`. +Then enable filtering with `DfE::Analytics.config.filter_web_request_events = true`. + ### 6. Import existing data To load the current contents of your database into BigQuery, run