Skip to content

Commit 7a6fd91

Browse files
authored
feat(archival): unconditionally scrub HTTP headers and bodies (#1335)
This commit modifies ArchivalMaybeBinaryData to unconditionally scrub IP addresses and endpoints. It also renames the struct ArchivalScrubbedMaybeBinaryData to underline that it's scrubbing now. We're using ArchivalScrubbedMaybeBinaryData to represent HTTP headers and bodies collected by OONI measurements. With this commit merged, I am now much less concerned about the potential unintended effects of aggressively using happy eyeballs, which we started introducing as part of ooni/probe#2531. In other words, the rest of the refactoring of the OONI bootstrap could proceed a bit faster.
1 parent b9fcb78 commit 7a6fd91

File tree

10 files changed

+303
-290
lines changed

10 files changed

+303
-290
lines changed

internal/experiment/hhfm/hhfm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ func NewRequestEntryList(req *http.Request, headers map[string]string) (out []tr
278278
// specific *http.Response instance and its body.
279279
func NewHTTPResponse(resp *http.Response, data []byte) (out tracex.HTTPResponse) {
280280
out = tracex.HTTPResponse{
281-
Body: model.ArchivalMaybeBinaryString(data),
281+
Body: model.ArchivalScrubbedMaybeBinaryString(data),
282282
Code: int64(resp.StatusCode),
283283
Headers: model.ArchivalNewHTTPHeadersMap(resp.Header),
284284
HeadersList: model.ArchivalNewHTTPHeadersList(resp.Header),

internal/experiment/hhfm/hhfm_test.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -744,13 +744,13 @@ func TestNewRequestEntryList(t *testing.T) {
744744
wantOut: []tracex.RequestEntry{{
745745
Request: tracex.HTTPRequest{
746746
HeadersList: []model.ArchivalHTTPHeader{{
747-
model.ArchivalMaybeBinaryString("ContENt-tYPE"),
748-
model.ArchivalMaybeBinaryString("text/plain"),
747+
model.ArchivalScrubbedMaybeBinaryString("ContENt-tYPE"),
748+
model.ArchivalScrubbedMaybeBinaryString("text/plain"),
749749
}, {
750-
model.ArchivalMaybeBinaryString("User-aGENT"),
751-
model.ArchivalMaybeBinaryString("foo/1.0"),
750+
model.ArchivalScrubbedMaybeBinaryString("User-aGENT"),
751+
model.ArchivalScrubbedMaybeBinaryString("foo/1.0"),
752752
}},
753-
Headers: map[string]model.ArchivalMaybeBinaryString{
753+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
754754
"ContENt-tYPE": "text/plain",
755755
"User-aGENT": "foo/1.0",
756756
},
@@ -773,7 +773,7 @@ func TestNewRequestEntryList(t *testing.T) {
773773
wantOut: []tracex.RequestEntry{{
774774
Request: tracex.HTTPRequest{
775775
Method: "GeT",
776-
Headers: make(map[string]model.ArchivalMaybeBinaryString),
776+
Headers: make(map[string]model.ArchivalScrubbedMaybeBinaryString),
777777
HeadersList: []model.ArchivalHTTPHeader{},
778778
URL: "http://10.0.0.1/",
779779
},
@@ -811,16 +811,16 @@ func TestNewHTTPResponse(t *testing.T) {
811811
data: []byte("deadbeef"),
812812
},
813813
wantOut: tracex.HTTPResponse{
814-
Body: model.ArchivalMaybeBinaryString("deadbeef"),
814+
Body: model.ArchivalScrubbedMaybeBinaryString("deadbeef"),
815815
Code: 200,
816816
HeadersList: []model.ArchivalHTTPHeader{{
817-
model.ArchivalMaybeBinaryString("Content-Type"),
818-
model.ArchivalMaybeBinaryString("text/plain"),
817+
model.ArchivalScrubbedMaybeBinaryString("Content-Type"),
818+
model.ArchivalScrubbedMaybeBinaryString("text/plain"),
819819
}, {
820-
model.ArchivalMaybeBinaryString("User-Agent"),
821-
model.ArchivalMaybeBinaryString("foo/1.0"),
820+
model.ArchivalScrubbedMaybeBinaryString("User-Agent"),
821+
model.ArchivalScrubbedMaybeBinaryString("foo/1.0"),
822822
}},
823-
Headers: map[string]model.ArchivalMaybeBinaryString{
823+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
824824
"Content-Type": "text/plain",
825825
"User-Agent": "foo/1.0",
826826
},
@@ -831,10 +831,10 @@ func TestNewHTTPResponse(t *testing.T) {
831831
resp: &http.Response{StatusCode: 200},
832832
},
833833
wantOut: tracex.HTTPResponse{
834-
Body: model.ArchivalMaybeBinaryString(""),
834+
Body: model.ArchivalScrubbedMaybeBinaryString(""),
835835
Code: 200,
836836
HeadersList: []model.ArchivalHTTPHeader{},
837-
Headers: map[string]model.ArchivalMaybeBinaryString{},
837+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{},
838838
},
839839
}}
840840
for _, tt := range tests {

internal/experiment/riseupvpn/riseupvpn_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -773,11 +773,11 @@ func generateMockGetter(requestResponse map[string]string, responseStatus map[st
773773
Failure: failure,
774774
Request: tracex.HTTPRequest{
775775
URL: url,
776-
Body: model.ArchivalMaybeBinaryString(""),
776+
Body: model.ArchivalScrubbedMaybeBinaryString(""),
777777
BodyIsTruncated: false,
778778
},
779779
Response: tracex.HTTPResponse{
780-
Body: model.ArchivalMaybeBinaryString(
780+
Body: model.ArchivalScrubbedMaybeBinaryString(
781781
responseBody,
782782
),
783783
BodyIsTruncated: false,

internal/experiment/webconnectivity/httpanalysis_test.go

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
7777
tk: urlgetter.TestKeys{
7878
Requests: []tracex.RequestEntry{{
7979
Response: tracex.HTTPResponse{
80-
Body: model.ArchivalMaybeBinaryString(
80+
Body: model.ArchivalScrubbedMaybeBinaryString(
8181
randx.Letters(768),
8282
),
8383
},
@@ -96,7 +96,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
9696
tk: urlgetter.TestKeys{
9797
Requests: []tracex.RequestEntry{{
9898
Response: tracex.HTTPResponse{
99-
Body: model.ArchivalMaybeBinaryString(
99+
Body: model.ArchivalScrubbedMaybeBinaryString(
100100
randx.Letters(768),
101101
),
102102
},
@@ -116,7 +116,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
116116
tk: urlgetter.TestKeys{
117117
Requests: []tracex.RequestEntry{{
118118
Response: tracex.HTTPResponse{
119-
Body: model.ArchivalMaybeBinaryString(
119+
Body: model.ArchivalScrubbedMaybeBinaryString(
120120
randx.Letters(1024),
121121
),
122122
},
@@ -136,7 +136,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
136136
tk: urlgetter.TestKeys{
137137
Requests: []tracex.RequestEntry{{
138138
Response: tracex.HTTPResponse{
139-
Body: model.ArchivalMaybeBinaryString(
139+
Body: model.ArchivalScrubbedMaybeBinaryString(
140140
randx.Letters(8),
141141
),
142142
},
@@ -156,7 +156,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
156156
tk: urlgetter.TestKeys{
157157
Requests: []tracex.RequestEntry{{
158158
Response: tracex.HTTPResponse{
159-
Body: model.ArchivalMaybeBinaryString(
159+
Body: model.ArchivalScrubbedMaybeBinaryString(
160160
randx.Letters(16),
161161
),
162162
},
@@ -366,7 +366,7 @@ func TestHeadersMatch(t *testing.T) {
366366
tk: urlgetter.TestKeys{
367367
Requests: []tracex.RequestEntry{{
368368
Response: tracex.HTTPResponse{
369-
Headers: map[string]model.ArchivalMaybeBinaryString{
369+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
370370
"Date": "Mon Jul 13 21:10:08 CEST 2020",
371371
},
372372
Code: 200,
@@ -382,7 +382,7 @@ func TestHeadersMatch(t *testing.T) {
382382
tk: urlgetter.TestKeys{
383383
Requests: []tracex.RequestEntry{{
384384
Response: tracex.HTTPResponse{
385-
Headers: map[string]model.ArchivalMaybeBinaryString{
385+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
386386
"Date": "Mon Jul 13 21:10:08 CEST 2020",
387387
},
388388
Code: 200,
@@ -402,7 +402,7 @@ func TestHeadersMatch(t *testing.T) {
402402
tk: urlgetter.TestKeys{
403403
Requests: []tracex.RequestEntry{{
404404
Response: tracex.HTTPResponse{
405-
Headers: map[string]model.ArchivalMaybeBinaryString{
405+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
406406
"Date": "Mon Jul 13 21:10:08 CEST 2020",
407407
},
408408
Code: 200,
@@ -425,7 +425,7 @@ func TestHeadersMatch(t *testing.T) {
425425
tk: urlgetter.TestKeys{
426426
Requests: []tracex.RequestEntry{{
427427
Response: tracex.HTTPResponse{
428-
Headers: map[string]model.ArchivalMaybeBinaryString{
428+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
429429
"Date": "Mon Jul 13 21:10:08 CEST 2020",
430430
"Antani": "MASCETTI",
431431
},
@@ -450,7 +450,7 @@ func TestHeadersMatch(t *testing.T) {
450450
tk: urlgetter.TestKeys{
451451
Requests: []tracex.RequestEntry{{
452452
Response: tracex.HTTPResponse{
453-
Headers: map[string]model.ArchivalMaybeBinaryString{
453+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
454454
"Date": "Mon Jul 13 21:10:08 CEST 2020",
455455
"Antani": "MASCETTI",
456456
},
@@ -475,7 +475,7 @@ func TestHeadersMatch(t *testing.T) {
475475
tk: urlgetter.TestKeys{
476476
Requests: []tracex.RequestEntry{{
477477
Response: tracex.HTTPResponse{
478-
Headers: map[string]model.ArchivalMaybeBinaryString{
478+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
479479
"Accept-Ranges": "bytes",
480480
"Age": "404727",
481481
"Cache-Control": "max-age=604800",
@@ -522,7 +522,7 @@ func TestHeadersMatch(t *testing.T) {
522522
tk: urlgetter.TestKeys{
523523
Requests: []tracex.RequestEntry{{
524524
Response: tracex.HTTPResponse{
525-
Headers: map[string]model.ArchivalMaybeBinaryString{
525+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
526526
"Accept-Ranges": "bytes",
527527
"Age": "404727",
528528
"Cache-Control": "max-age=604800",
@@ -567,7 +567,7 @@ func TestHeadersMatch(t *testing.T) {
567567
tk: urlgetter.TestKeys{
568568
Requests: []tracex.RequestEntry{{
569569
Response: tracex.HTTPResponse{
570-
Headers: map[string]model.ArchivalMaybeBinaryString{
570+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
571571
"Accept-Ranges": "bytes",
572572
"Age": "404727",
573573
"Cache-Control": "max-age=604800",
@@ -608,7 +608,7 @@ func TestHeadersMatch(t *testing.T) {
608608
tk: urlgetter.TestKeys{
609609
Requests: []tracex.RequestEntry{{
610610
Response: tracex.HTTPResponse{
611-
Headers: map[string]model.ArchivalMaybeBinaryString{
611+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
612612
"accept-ranges": "bytes",
613613
"AGE": "404727",
614614
"cache-Control": "max-age=604800",
@@ -699,7 +699,7 @@ func TestTitleMatch(t *testing.T) {
699699
Requests: []tracex.RequestEntry{{
700700
Response: tracex.HTTPResponse{
701701
Code: 200,
702-
Body: model.ArchivalMaybeBinaryString("<HTML/>"),
702+
Body: model.ArchivalScrubbedMaybeBinaryString("<HTML/>"),
703703
},
704704
}},
705705
},
@@ -712,7 +712,7 @@ func TestTitleMatch(t *testing.T) {
712712
Requests: []tracex.RequestEntry{{
713713
Response: tracex.HTTPResponse{
714714
Code: 200,
715-
Body: model.ArchivalMaybeBinaryString("<HTML/>"),
715+
Body: model.ArchivalScrubbedMaybeBinaryString("<HTML/>"),
716716
},
717717
}},
718718
},
@@ -731,7 +731,7 @@ func TestTitleMatch(t *testing.T) {
731731
Requests: []tracex.RequestEntry{{
732732
Response: tracex.HTTPResponse{
733733
Code: 200,
734-
Body: model.ArchivalMaybeBinaryString(
734+
Body: model.ArchivalScrubbedMaybeBinaryString(
735735
"<HTML><TITLE>La community di MSN</TITLE></HTML>"),
736736
},
737737
}},
@@ -751,7 +751,7 @@ func TestTitleMatch(t *testing.T) {
751751
Requests: []tracex.RequestEntry{{
752752
Response: tracex.HTTPResponse{
753753
Code: 200,
754-
Body: model.ArchivalMaybeBinaryString(
754+
Body: model.ArchivalScrubbedMaybeBinaryString(
755755
"<HTML><TITLE>La communità di MSN</TITLE></HTML>"),
756756
},
757757
}},
@@ -771,7 +771,7 @@ func TestTitleMatch(t *testing.T) {
771771
Requests: []tracex.RequestEntry{{
772772
Response: tracex.HTTPResponse{
773773
Code: 200,
774-
Body: model.ArchivalMaybeBinaryString(
774+
Body: model.ArchivalScrubbedMaybeBinaryString(
775775
"<HTML><TITLE>" + randx.Letters(1024) + "</TITLE></HTML>"),
776776
},
777777
}},
@@ -791,7 +791,7 @@ func TestTitleMatch(t *testing.T) {
791791
Requests: []tracex.RequestEntry{{
792792
Response: tracex.HTTPResponse{
793793
Code: 200,
794-
Body: model.ArchivalMaybeBinaryString(
794+
Body: model.ArchivalScrubbedMaybeBinaryString(
795795
"<HTML><TiTLe>La commUNity di MSN</tITLE></HTML>"),
796796
},
797797
}},
@@ -811,7 +811,7 @@ func TestTitleMatch(t *testing.T) {
811811
Requests: []tracex.RequestEntry{{
812812
Response: tracex.HTTPResponse{
813813
Code: 200,
814-
Body: model.ArchivalMaybeBinaryString(
814+
Body: model.ArchivalScrubbedMaybeBinaryString(
815815
"<HTML><TiTLe>La commUNity di MSN</tITLE></HTML>"),
816816
},
817817
}},

internal/legacy/tracex/archival.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ func newRequestList(begin time.Time, events []Event) (out []RequestEntry) {
118118
entry.Response.HeadersList = model.ArchivalNewHTTPHeadersList(ev.HTTPResponseHeaders)
119119
entry.Response.Code = int64(ev.HTTPStatusCode)
120120
entry.Response.Locations = ev.HTTPResponseHeaders.Values("Location")
121-
entry.Response.Body = model.ArchivalMaybeBinaryString(ev.HTTPResponseBody)
121+
entry.Response.Body = model.ArchivalScrubbedMaybeBinaryString(ev.HTTPResponseBody)
122122
entry.Response.BodyIsTruncated = ev.HTTPResponseBodyIsTruncated
123123
entry.Failure = ev.Err.ToFailure()
124124
out = append(out, entry)

internal/legacy/tracex/archival_test.go

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -173,41 +173,41 @@ func TestNewRequestList(t *testing.T) {
173173
Failure: NewFailure(io.EOF),
174174
Request: HTTPRequest{
175175
HeadersList: []model.ArchivalHTTPHeader{{
176-
model.ArchivalMaybeBinaryString("User-Agent"),
177-
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
176+
model.ArchivalScrubbedMaybeBinaryString("User-Agent"),
177+
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
178178
}},
179-
Headers: map[string]model.ArchivalMaybeBinaryString{
179+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
180180
"User-Agent": "miniooni/0.1.0-dev",
181181
},
182182
Method: "GET",
183183
URL: "https://www.example.com/result",
184184
},
185185
Response: HTTPResponse{
186186
HeadersList: []model.ArchivalHTTPHeader{},
187-
Headers: make(map[string]model.ArchivalMaybeBinaryString),
187+
Headers: make(map[string]model.ArchivalScrubbedMaybeBinaryString),
188188
},
189189
T: 0.02,
190190
}, {
191191
Request: HTTPRequest{
192-
Body: model.ArchivalMaybeBinaryString(""),
192+
Body: model.ArchivalScrubbedMaybeBinaryString(""),
193193
HeadersList: []model.ArchivalHTTPHeader{{
194-
model.ArchivalMaybeBinaryString("User-Agent"),
195-
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
194+
model.ArchivalScrubbedMaybeBinaryString("User-Agent"),
195+
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
196196
}},
197-
Headers: map[string]model.ArchivalMaybeBinaryString{
197+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
198198
"User-Agent": "miniooni/0.1.0-dev",
199199
},
200200
Method: "POST",
201201
URL: "https://www.example.com/submit",
202202
},
203203
Response: HTTPResponse{
204-
Body: model.ArchivalMaybeBinaryString("{}"),
204+
Body: model.ArchivalScrubbedMaybeBinaryString("{}"),
205205
Code: 200,
206206
HeadersList: []model.ArchivalHTTPHeader{{
207-
model.ArchivalMaybeBinaryString("Server"),
208-
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
207+
model.ArchivalScrubbedMaybeBinaryString("Server"),
208+
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
209209
}},
210-
Headers: map[string]model.ArchivalMaybeBinaryString{
210+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
211211
"Server": "miniooni/0.1.0-dev",
212212
},
213213
Locations: nil,
@@ -237,10 +237,10 @@ func TestNewRequestList(t *testing.T) {
237237
want: []RequestEntry{{
238238
Request: HTTPRequest{
239239
HeadersList: []model.ArchivalHTTPHeader{{
240-
model.ArchivalMaybeBinaryString("User-Agent"),
241-
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
240+
model.ArchivalScrubbedMaybeBinaryString("User-Agent"),
241+
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
242242
}},
243-
Headers: map[string]model.ArchivalMaybeBinaryString{
243+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
244244
"User-Agent": "miniooni/0.1.0-dev",
245245
},
246246
Method: "GET",
@@ -249,16 +249,16 @@ func TestNewRequestList(t *testing.T) {
249249
Response: HTTPResponse{
250250
Code: 302,
251251
HeadersList: []model.ArchivalHTTPHeader{{
252-
model.ArchivalMaybeBinaryString("Location"),
253-
model.ArchivalMaybeBinaryString("https://x.example.com"),
252+
model.ArchivalScrubbedMaybeBinaryString("Location"),
253+
model.ArchivalScrubbedMaybeBinaryString("https://x.example.com"),
254254
}, {
255-
model.ArchivalMaybeBinaryString("Location"),
256-
model.ArchivalMaybeBinaryString("https://y.example.com"),
255+
model.ArchivalScrubbedMaybeBinaryString("Location"),
256+
model.ArchivalScrubbedMaybeBinaryString("https://y.example.com"),
257257
}, {
258-
model.ArchivalMaybeBinaryString("Server"),
259-
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
258+
model.ArchivalScrubbedMaybeBinaryString("Server"),
259+
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
260260
}},
261-
Headers: map[string]model.ArchivalMaybeBinaryString{
261+
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
262262
"Server": "miniooni/0.1.0-dev",
263263
"Location": "https://x.example.com",
264264
},

0 commit comments

Comments
 (0)