Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ func newUnstartedTestServer() *httptest.Server {
w.Write([]byte("ok"))
})

mux.HandleFunc("/204_enc_gzip", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Encoding", "gzip")
w.WriteHeader(204)
})

mux.HandleFunc("/500", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
w.WriteHeader(500)
Expand Down Expand Up @@ -1372,6 +1377,22 @@ func TestParseHTTPErrorResponse(t *testing.T) {

}

func TestGzipEncodingNoContent(t *testing.T) {
// This is a regression test to ensure successful visits on
// servers that send a "Content-Encoding: gzip" header with
// responses that cannot contain content
ts := newTestServer()
defer ts.Close()

c := NewCollector(
// Allow parsing 204 responses
ParseHTTPErrorResponse(),
)
if err := c.Visit(ts.URL + "/204_enc_gzip"); err != nil {
t.Errorf("visit failed: %v", err)
}
}

func TestHTMLElement(t *testing.T) {
ctx := &Context{}
resp := &Response{
Expand Down
8 changes: 8 additions & 0 deletions http_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,14 @@ func (h *httpBackend) Do(request *http.Request, bodySize int, checkHeadersFunc c
bodyReader = io.LimitReader(bodyReader, int64(bodySize))
}
contentEncoding := strings.ToLower(res.Header.Get("Content-Encoding"))

if strings.Contains(contentEncoding, "gzip") && !res.Uncompressed && (res.StatusCode < 200 || res.StatusCode == 204 || res.StatusCode == 304) {
// RFC 9110, section 15: 1xx, 204, and 304 responses cannot contain content.
// However, some servers may still send "Content-Encoding: gzip" in these scenarios
// so mark the response as uncompressed to avoid trying to read gzip data below.
res.Uncompressed = true
}

if !res.Uncompressed && (strings.Contains(contentEncoding, "gzip") || (contentEncoding == "" && strings.Contains(strings.ToLower(res.Header.Get("Content-Type")), "gzip")) || strings.HasSuffix(strings.ToLower(finalRequest.URL.Path), ".xml.gz")) {
bodyReader, err = gzip.NewReader(bodyReader)
if err != nil {
Expand Down