Skip to content

Commit b8bc367

Browse files
feat(rewrite): add remove_img_blur_params rule
Adds a new content rewrite rule to strip image URL query parameters from blurred images. This addresses issues with sites like Belgian national news that use blurry placeholder images which get replaced with high-quality versions, allowing Miniflux to fetch the original images instead of the placeholders.
1 parent 04a360a commit b8bc367

File tree

3 files changed

+200
-3
lines changed

3 files changed

+200
-3
lines changed

internal/reader/rewrite/content_rewrite.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
9494
entry.Title = titlelize(entry.Title)
9595
case "fix_ghost_cards":
9696
entry.Content = fixGhostCards(entry.Content)
97+
case "remove_img_blur_params":
98+
entry.Content = removeImgBlurParams(entry.Content)
9799
}
98100
}
99101

@@ -130,7 +132,7 @@ func parseRules(rulesText string) (rules []rule) {
130132
rules[l].args = append(rules[l].args, text)
131133
}
132134
case scanner.EOF:
133-
return
135+
return rules
134136
}
135137
}
136138
}

internal/reader/rewrite/content_rewrite_functions.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"log/slog"
1111
"net/url"
1212
"regexp"
13+
"strconv"
1314
"strings"
1415
"unicode"
1516

@@ -547,3 +548,43 @@ func fixGhostCards(entryContent string) string {
547548
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
548549
return strings.TrimSpace(output)
549550
}
551+
552+
func removeImgBlurParams(entryContent string) string {
553+
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
554+
if err != nil {
555+
return entryContent
556+
}
557+
558+
changed := false
559+
560+
doc.Find("img[src]").Each(func(i int, img *goquery.Selection) {
561+
srcAttr, exists := img.Attr("src")
562+
if !exists {
563+
return
564+
}
565+
566+
parsedURL, err := url.Parse(srcAttr)
567+
if err != nil {
568+
return
569+
}
570+
571+
// Only strip query parameters if this is a blurry placeholder image
572+
if parsedURL.RawQuery != "" {
573+
// Check if there's a blur parameter with a non-zero value
574+
if blurValue := parsedURL.Query().Get("blur"); blurValue != "" {
575+
if blurInt, err := strconv.Atoi(blurValue); err == nil && blurInt > 0 {
576+
parsedURL.RawQuery = ""
577+
img.SetAttr("src", parsedURL.String())
578+
changed = true
579+
}
580+
}
581+
}
582+
})
583+
584+
if changed {
585+
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
586+
return output
587+
}
588+
589+
return entryContent
590+
}

internal/reader/rewrite/content_rewrite_test.go

Lines changed: 156 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ func TestRewriteYoutubeLinkAndCustomEmbedURL(t *testing.T) {
133133
var err error
134134
parser := config.NewConfigParser()
135135
config.Opts, err = parser.ParseEnvironmentVariables()
136-
137136
if err != nil {
138137
t.Fatalf(`Parsing failure: %v`, err)
139138
}
@@ -241,7 +240,6 @@ func TestAddYoutubeVideoFromIdWithCustomEmbedURL(t *testing.T) {
241240
var err error
242241
parser := config.NewConfigParser()
243242
config.Opts, err = parser.ParseEnvironmentVariables()
244-
245243
if err != nil {
246244
t.Fatalf(`Parsing failure: %v`, err)
247245
}
@@ -797,6 +795,7 @@ func TestRewriteRemoveCustom(t *testing.T) {
797795
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
798796
}
799797
}
798+
800799
func TestRewriteRemoveQuotedSelector(t *testing.T) {
801800
controlEntry := &model.Entry{
802801
URL: "https://example.org/article",
@@ -1248,3 +1247,158 @@ func TestFixGhostCardMultipleSplit(t *testing.T) {
12481247
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
12491248
}
12501249
}
1250+
1251+
func TestStripImageQueryParams(t *testing.T) {
1252+
testEntry := &model.Entry{
1253+
URL: "https://example.org/article",
1254+
Title: `News Article Title`,
1255+
Content: `
1256+
<article>
1257+
<p>Article content with images having query parameters:</p>
1258+
<img src="https://example.org/images/image1.jpg?width=200&height=113&q=80&blur=90" alt="Image with params">
1259+
<img src="https://example.org/images/image2.jpg?width=800&height=600&q=85" alt="Another image with params">
1260+
1261+
<p>More images with various query parameters:</p>
1262+
<img src="https://example.org/image123.jpg?blur=50&size=small&format=webp" alt="Complex query params">
1263+
<img src="https://example.org/image123.jpg?size=large&quality=95&cache=123" alt="Different params">
1264+
1265+
<p>Image without query parameters:</p>
1266+
<img src="https://example.org/single-image.jpg" alt="Clean image">
1267+
1268+
<p>Images with various other params:</p>
1269+
<img src="https://example.org/normal1.jpg?width=300&format=jpg" alt="Normal 1">
1270+
<img src="https://example.org/normal1.jpg?width=600&quality=high" alt="Normal 2">
1271+
</article>`,
1272+
}
1273+
1274+
controlEntry := &model.Entry{
1275+
URL: "https://example.org/article",
1276+
Title: `News Article Title`,
1277+
Content: `<article>
1278+
<p>Article content with images having query parameters:</p>
1279+
<img src="https://example.org/images/image1.jpg" alt="Image with params"/>
1280+
<img src="https://example.org/images/image2.jpg?width=800&amp;height=600&amp;q=85" alt="Another image with params"/>
1281+
1282+
<p>More images with various query parameters:</p>
1283+
<img src="https://example.org/image123.jpg" alt="Complex query params"/>
1284+
<img src="https://example.org/image123.jpg?size=large&amp;quality=95&amp;cache=123" alt="Different params"/>
1285+
1286+
<p>Image without query parameters:</p>
1287+
<img src="https://example.org/single-image.jpg" alt="Clean image"/>
1288+
1289+
<p>Images with various other params:</p>
1290+
<img src="https://example.org/normal1.jpg?width=300&amp;format=jpg" alt="Normal 1"/>
1291+
<img src="https://example.org/normal1.jpg?width=600&amp;quality=high" alt="Normal 2"/>
1292+
</article>`,
1293+
}
1294+
ApplyContentRewriteRules(testEntry, `remove_img_blur_params`)
1295+
1296+
if !reflect.DeepEqual(testEntry, controlEntry) {
1297+
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
1298+
}
1299+
}
1300+
1301+
func TestStripImageQueryParamsNoChanges(t *testing.T) {
1302+
testEntry := &model.Entry{
1303+
URL: "https://example.org/article",
1304+
Title: `Article Without Images`,
1305+
Content: `<p>No images here:</p>
1306+
<div>Just some text content</div>
1307+
<a href="https://example.org">A link</a>`,
1308+
}
1309+
1310+
controlEntry := &model.Entry{
1311+
URL: "https://example.org/article",
1312+
Title: `Article Without Images`,
1313+
Content: `<p>No images here:</p>
1314+
<div>Just some text content</div>
1315+
<a href="https://example.org">A link</a>`,
1316+
}
1317+
ApplyContentRewriteRules(testEntry, `remove_img_blur_params`)
1318+
1319+
if !reflect.DeepEqual(testEntry, controlEntry) {
1320+
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
1321+
}
1322+
}
1323+
1324+
func TestStripImageQueryParamsEdgeCases(t *testing.T) {
1325+
testEntry := &model.Entry{
1326+
URL: "https://example.org/article",
1327+
Title: `Edge Cases`,
1328+
Content: `
1329+
<p>Edge cases for image query parameter stripping:</p>
1330+
1331+
<!-- Various query parameters -->
1332+
<img src="https://example.org/image1.jpg?blur=80&width=300" alt="Multiple params">
1333+
1334+
<!-- Complex query parameters -->
1335+
<img src="https://example.org/image2.jpg?BLUR=60&format=webp&cache=123" alt="Complex params">
1336+
<img src="https://example.org/image3.jpg?quality=high&version=2" alt="Other params">
1337+
1338+
<!-- Query params in middle of string -->
1339+
<img src="https://example.org/image4.jpg?size=large&blur=30&format=webp&quality=90" alt="Middle params">
1340+
1341+
<!-- Image without query params -->
1342+
<img src="https://example.org/clean.jpg" alt="Clean image">
1343+
`,
1344+
}
1345+
1346+
controlEntry := &model.Entry{
1347+
URL: "https://example.org/article",
1348+
Title: `Edge Cases`,
1349+
Content: `<p>Edge cases for image query parameter stripping:</p>
1350+
1351+
<!-- Various query parameters -->
1352+
<img src="https://example.org/image1.jpg" alt="Multiple params"/>
1353+
1354+
<!-- Complex query parameters -->
1355+
<img src="https://example.org/image2.jpg?BLUR=60&amp;format=webp&amp;cache=123" alt="Complex params"/>
1356+
<img src="https://example.org/image3.jpg?quality=high&amp;version=2" alt="Other params"/>
1357+
1358+
<!-- Query params in middle of string -->
1359+
<img src="https://example.org/image4.jpg" alt="Middle params"/>
1360+
1361+
<!-- Image without query params -->
1362+
<img src="https://example.org/clean.jpg" alt="Clean image"/>
1363+
`,
1364+
}
1365+
ApplyContentRewriteRules(testEntry, `remove_img_blur_params`)
1366+
1367+
if !reflect.DeepEqual(testEntry, controlEntry) {
1368+
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
1369+
}
1370+
}
1371+
1372+
func TestStripImageQueryParamsSimple(t *testing.T) {
1373+
testEntry := &model.Entry{
1374+
URL: "https://example.org/article",
1375+
Title: `Simple Test`,
1376+
Content: `
1377+
<p>Testing query parameter stripping:</p>
1378+
1379+
<!-- Images with various query parameters -->
1380+
<img src="https://example.org/test1.jpg?blur=0&width=300" alt="With blur zero">
1381+
<img src="https://example.org/test2.jpg?blur=50&width=300&format=webp" alt="With blur fifty">
1382+
<img src="https://example.org/test3.jpg?width=800&quality=high" alt="No blur param">
1383+
<img src="https://example.org/test4.jpg" alt="No params at all">
1384+
`,
1385+
}
1386+
1387+
controlEntry := &model.Entry{
1388+
URL: "https://example.org/article",
1389+
Title: `Simple Test`,
1390+
Content: `<p>Testing query parameter stripping:</p>
1391+
1392+
<!-- Images with various query parameters -->
1393+
<img src="https://example.org/test1.jpg?blur=0&amp;width=300" alt="With blur zero"/>
1394+
<img src="https://example.org/test2.jpg" alt="With blur fifty"/>
1395+
<img src="https://example.org/test3.jpg?width=800&amp;quality=high" alt="No blur param"/>
1396+
<img src="https://example.org/test4.jpg" alt="No params at all"/>
1397+
`,
1398+
}
1399+
ApplyContentRewriteRules(testEntry, `remove_img_blur_params`)
1400+
1401+
if !reflect.DeepEqual(testEntry, controlEntry) {
1402+
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
1403+
}
1404+
}

0 commit comments

Comments
 (0)