Skip to content

Commit cb3afbe

Browse files
committed
refactor(reader): misc simplifications
- There is no need for getEncoding to return a string instead of an array of bytes, so let's make it return a []byte instead of a string. - There is no reason why the function used for decoder.CharsetReader has to be defined as a lambda instead of a proper function. One might argue the other way around, but a lambda is living on the heap, while a "real" function doesn't.
1 parent 8adcaed commit cb3afbe

File tree

1 file changed

+23
-22
lines changed

1 file changed

+23
-22
lines changed

internal/reader/xml/decoder.go

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
"encoding/xml"
99
"fmt"
1010
"io"
11-
"strings"
1211
"unicode/utf8"
1312

1413
"miniflux.app/v2/internal/reader/encoding"
@@ -23,34 +22,36 @@ func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
2322
io.Copy(buffer, data)
2423

2524
enc := getEncoding(buffer.Bytes())
26-
if enc == "" || strings.EqualFold(enc, "utf-8") {
27-
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
25+
if enc == nil || bytes.EqualFold(enc, []byte("utf-8")) {
26+
// filter invalid chars now, since decoder.CharsetReader isn't called for utf-8 content
2827
filteredBytes := filterValidXMLChars(buffer.Bytes())
2928
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
3029
} else {
31-
// filter invalid chars later within decoder.CharsetReader
3230
data.Seek(0, io.SeekStart)
31+
// invalid characters will be filtered later via decoder.CharsetReader
3332
decoder = xml.NewDecoder(data)
3433
}
3534

35+
decoder.CharsetReader = charsetReaderFilterInvalidUtf8
3636
decoder.Entity = xml.HTMLEntity
3737
decoder.Strict = false
38-
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
39-
utf8Reader, err := encoding.CharsetReader(charset, input)
40-
if err != nil {
41-
return nil, err
42-
}
43-
rawData, err := io.ReadAll(utf8Reader)
44-
if err != nil {
45-
return nil, fmt.Errorf("encoding: unable to read data: %w", err)
46-
}
47-
filteredBytes := filterValidXMLChars(rawData)
48-
return bytes.NewReader(filteredBytes), nil
49-
}
5038

5139
return decoder
5240
}
5341

42+
func charsetReaderFilterInvalidUtf8(charset string, input io.Reader) (io.Reader, error) {
43+
utf8Reader, err := encoding.CharsetReader(charset, input)
44+
if err != nil {
45+
return nil, err
46+
}
47+
rawData, err := io.ReadAll(utf8Reader)
48+
if err != nil {
49+
return nil, fmt.Errorf("encoding: unable to read data: %w", err)
50+
}
51+
filteredBytes := filterValidXMLChars(rawData)
52+
return bytes.NewReader(filteredBytes), nil
53+
}
54+
5455
// filterValidXMLChars filters inplace invalid XML characters.
5556
// This function is inspired from bytes.Map
5657
func filterValidXMLChars(s []byte) []byte {
@@ -89,23 +90,23 @@ func filterValidXMLChar(r rune) rune {
8990
}
9091

9192
// This function is copied from encoding/xml's procInst and adapted for []bytes instead of string
92-
func getEncoding(b []byte) string {
93+
func getEncoding(b []byte) []byte {
9394
// This parsing is somewhat lame and not exact.
9495
// It works for all actual cases, though.
9596
idx := bytes.Index(b, []byte("encoding="))
9697
if idx == -1 {
97-
return ""
98+
return nil
9899
}
99100
v := b[idx+len("encoding="):]
100101
if len(v) == 0 {
101-
return ""
102+
return nil
102103
}
103104
if v[0] != '\'' && v[0] != '"' {
104-
return ""
105+
return nil
105106
}
106107
idx = bytes.IndexRune(v[1:], rune(v[0]))
107108
if idx == -1 {
108-
return ""
109+
return nil
109110
}
110-
return string(v[1 : idx+1])
111+
return v[1 : idx+1]
111112
}

0 commit comments

Comments
 (0)