FlintWave · ErikChevalier · Jun 3, 2026 · Jun 3, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,16 @@ All notable changes to SearchMob are documented here. The format is based on
 [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and the project uses Ubuntu-style date
 versioning (`YY.MM.VV`).
 
+## [26.06.02] - 2026-06-03
+
+### Changed
+- **More relevant results.** Ranking now weighs how well each result actually matches what you typed,
+  not just how many engines returned it, so off-topic pages that one engine happened to rank highly
+  get pushed down instead of sitting near the top. Results written in a different alphabet than your
+  search (say, a stray English page for a Russian query) are demoted too. It works in any language,
+  runs entirely on your device, and still lets engine agreement and your own pin, raise, lower, and
+  block rules decide between good matches. This brings Android in line with the desktop app.
+
 ## [26.06.01] - 2026-06-03
 
 ### Added

diff --git a/app/build.gradle.kts b/app/build.gradle.kts
@@ -51,7 +51,7 @@ val hasReleaseSigning =
 // date versioning: YY.MM.VV (two-digit year, month, and per-month build), set manually each release.
 // `versionCode` is derived as (YY*10000 + MM*100 + VV) so it always increases monotonically with the
 // date (e.g. 26.05.00 -> 260500, 26.06.00 -> 260600, 27.01.00 -> 270100). Bump this on each release.
-val appVersionName = "26.06.01"
+val appVersionName = "26.06.02"
 val appVersionCode =
     appVersionName
         .split("-")[0]

diff --git a/app/src/main/java/org/searchmob/engine/aggregate/Aggregator.kt b/app/src/main/java/org/searchmob/engine/aggregate/Aggregator.kt
@@ -61,7 +61,7 @@ class Aggregator(
                         }
                     }.awaitAll()
             }.filterNotNull()
-        val results = rank(successes.flatMap { it.items })
+        val results = rank(successes.flatMap { it.items }, query.terms)
         return AggregationResult(results, consensusCorrection(query.terms, successes))
     }
 
@@ -84,7 +84,10 @@ class Aggregator(
         return byKey.values.maxByOrNull { it.second }?.first
     }
 
-    private fun rank(items: List<EngineResultItem>): List<AggregatedResult> {
+    private fun rank(
+        items: List<EngineResultItem>,
+        query: String,
+    ): List<AggregatedResult> {
         val nowMillis = System.currentTimeMillis()
         val buckets = LinkedHashMap<String, MutableBucket>()
         for (item in items) {
@@ -114,13 +117,28 @@ class Aggregator(
                 }
             }
         }
+        // Fold a lexical query-match score into the RRF score so the final order leads with relevance
+        // (does the result actually contain the query's content words, especially the title) and keeps
+        // engine consensus as a strong secondary signal. Without this, near-tied RRF scores let an
+        // irrelevant result one engine ranked highly sit among the top hits. The blend is demotion-only
+        // and language-agnostic; the existing tie-breakers keep ordering deterministic. See Relevance.
+        val terms = Relevance.contentTerms(query)
         return buckets.values
             .map { AggregatedResult(it.title, it.url, it.snippet, it.engines.toList(), it.score, it.publishedMillis) }
+            .map {
+                it to
+                    Relevance.blendedScore(
+                        it.score,
+                        Relevance.lexicalScore(it.title, it.snippet, terms),
+                        Relevance.languageAffinity(query, it.title, it.snippet),
+                    )
+            }
             .sortedWith(
-                compareByDescending<AggregatedResult> { it.score }
-                    .thenBy { UrlNormalizer.normalize(it.url) }
-                    .thenBy { it.engines.joinToString(",") },
+                compareByDescending<Pair<AggregatedResult, Double>> { it.second }
+                    .thenBy { UrlNormalizer.normalize(it.first.url) }
+                    .thenBy { it.first.engines.joinToString(",") },
             )
+            .map { it.first }
     }
 
     /** A structured date from the engine wins; else parse snippet/title. Weak (bare-year) -> null. */

diff --git a/app/src/main/java/org/searchmob/engine/aggregate/Relevance.kt b/app/src/main/java/org/searchmob/engine/aggregate/Relevance.kt
@@ -0,0 +1,216 @@
+package org.searchmob.engine.aggregate
+
+/**
+ * Lexical query-match relevance signal blended into the aggregator's RRF ranking.
+ *
+ * RRF fuses several engines' rankings, but it trusts each engine's order: with mostly single-engine
+ * results the fused scores are nearly tied (1/60 .. 1/69), so an irrelevant result one engine happened
+ * to rank highly slips into the top. Nothing in the pipeline asks "does this result actually match the
+ * query?".
+ *
+ * This object adds that missing signal: a deterministic, on-device lexical match score over the
+ * result's title and snippet (how many of the query's content words appear, weighted toward the title,
+ * with a small exact-phrase bonus). The aggregator multiplies each result's RRF score by a factor
+ * derived from this lexical score, so query-match leads the ranking and engine consensus stays a
+ * secondary signal. No corpus, model, or network: pure string work, the Kotlin twin of the desktop
+ * `engines/relevance.py`.
+ *
+ * The blend is deliberately bounded (a non-matching result keeps [BASE] of its RRF weight rather than
+ * zero) so a relevant result phrased differently from the query (e.g. "artificial intelligence" for the
+ * query term "ai") that several engines agree on is demoted, not deleted. Everything here is
+ * language-agnostic: the tokenizer is Unicode-aware, English stemming is gated to ASCII, and language
+ * affinity is script-relative, so it works in whatever language the user searches in.
+ */
+object Relevance {
+    /**
+     * The blend is DEMOTION-ONLY: the factor is capped at 1.0, so a well-matching result keeps its
+     * full RRF weight and engine consensus still decides the order among good matches (we never promote
+     * a keyword-stuffed title over a result several engines agree on). A poorly-matching result is sunk
+     * toward [BASE] of its RRF weight. With BASE=0.5, GAIN=1.0 a result matching half the query terms
+     * is already at full weight; only weaker matches are penalized.
+     */
+    const val BASE = 0.5
+    const val GAIN = 1.0
+
+    // Conservative stopword set: function words and generic query modifiers that carry little subject
+    // intent. Kept short on purpose so the actual subject of a query is never stripped. If a query is
+    // nothing but stopwords, `contentTerms` falls back to all tokens so matching still works.
+    private val STOPWORDS =
+        setOf(
+            "a", "an", "the", "of", "to", "in", "on", "for", "and", "or", "is", "are", "be", "do",
+            "does", "did", "how", "what", "why", "when", "where", "who", "which", "with", "this",
+            "that", "it", "at", "by", "from", "as", "your", "my", "i", "vs", "into", "about", "best",
+            "top", "good", "vs.", "near", "me",
+        )
+
+    // Unicode code-point ranges that count as Latin script (so accented and Vietnamese text is NOT
+    // treated as "foreign"): Basic Latin + Latin-1 Supplement + Extended-A/B, Latin Extended Additional,
+    // and Extended-C/D. A letter outside these is from another script (Cyrillic, CJK, Arabic, Greek...).
+    private val LATIN_RANGES =
+        listOf(0x41..0x24F, 0x1E00..0x1EFF, 0x2C60..0x2C7F, 0xA720..0xA7FF)
+
+    /**
+     * Maximal runs of letters/digits (any script), the Unicode-aware twin of Python's
+     * `re.compile(r"[^\W_]+", re.UNICODE)`. Java/Kotlin regex `\w` is ASCII-only by default, which
+     * would silently drop all non-Latin text, so this scans code points with [Character.isLetterOrDigit]
+     * instead (the underscore is naturally excluded). Space-less scripts (CJK) tokenize as one run;
+     * finer segmentation is left to the localization pass.
+     */
+    private fun tokens(text: String): List<String> {
+        val out = ArrayList<String>()
+        val current = StringBuilder()
+        var i = 0
+        while (i < text.length) {
+            val cp = text.codePointAt(i)
+            if (Character.isLetterOrDigit(cp)) {
+                current.appendCodePoint(cp)
+            } else if (current.isNotEmpty()) {
+                out.add(current.toString())
+                current.setLength(0)
+            }
+            i += Character.charCount(cp)
+        }
+        if (current.isNotEmpty()) out.add(current.toString())
+        return out
+    }
+
+    /**
+     * Very light English suffix folding so "keyboards" matches "keyboard", "reviews" "review".
+     *
+     * Not a real stemmer: it just trims the commonest inflectional endings on longer words, applied to
+     * both the query and the document so matching is symmetric. Conservative on length so short words
+     * (e.g. "ios", "css", "vs") are never mangled. Gated to ASCII since the suffix rules are English;
+     * non-ASCII words (other languages) pass through untouched, never corrupted. Per-language stemming
+     * is a future refinement for the localization pass.
+     */
+    private fun stem(word: String): String {
+        if (!word.all { it.code <= 0x7F }) return word
+        if (word.length >= 5) {
+            if (word.endsWith("ies")) return word.dropLast(3) + "y"
+            for (suffix in listOf("ing", "ers")) {
+                if (word.endsWith(suffix)) return word.dropLast(suffix.length)
+            }
+        }
+        if (word.length >= 4) {
+            for (suffix in listOf("es", "ed", "er")) {
+                if (word.endsWith(suffix)) return word.dropLast(suffix.length)
+            }
+            if (word.endsWith("s") && !word.endsWith("ss")) return word.dropLast(1)
+        }
+        return word
+    }
+
+    /** Coarse script bucket for a letter code point. Language-agnostic: works for whatever the query is. */
+    private fun scriptOf(cp: Int): String {
+        if (LATIN_RANGES.any { cp in it }) return "latin"
+        if (cp in 0x0400..0x052F) return "cyrillic"
+        if (cp in 0x0370..0x03FF) return "greek"
+        if (cp in 0x0590..0x05FF) return "hebrew"
+        if (cp in 0x0600..0x06FF || cp in 0x0750..0x077F) return "arabic"
+        if (cp in 0x0900..0x097F) return "devanagari"
+        if (cp in 0x0E00..0x0E7F) return "thai"
+        if (cp in 0x4E00..0x9FFF || cp in 0x3400..0x4DBF || cp in 0x3040..0x30FF || cp in 0xAC00..0xD7AF) {
+            return "cjk"
+        }
+        return "other"
+    }
+
+    /** Most common letter script in [text], or null when it has no letters (e.g. only digits). */
+    private fun dominantScript(text: String): String? {
+        val counts = HashMap<String, Int>()
+        var i = 0
+        while (i < text.length) {
+            val cp = text.codePointAt(i)
+            if (Character.isLetter(cp)) {
+                val script = scriptOf(cp)
+                counts[script] = (counts[script] ?: 0) + 1
+            }
+            i += Character.charCount(cp)
+        }
+        return counts.maxByOrNull { it.value }?.key
+    }
+
+    /**
+     * 1.0 when the result is in the same script as the query, else a demotion factor.
+     *
+     * Script-relative on purpose so this works in any UI/query language, not just English: a result
+     * dominated by a different script than the query (Cyrillic results for a Latin query, or Latin for a
+     * CJK query...) is almost never what the searcher wanted and is sunk. A query with no letters (pure
+     * digits/symbols) or a result whose dominant script matches is never penalized. Distinguishing
+     * languages that share a script (e.g. English vs French) needs real language detection and is left
+     * to the localization pass; this catches the jarring cross-script case.
+     */
+    fun languageAffinity(
+        query: String,
+        title: String,
+        snippet: String,
+    ): Double {
+        val queryScript = dominantScript(query) ?: return 1.0
+        val resultScript = dominantScript("$title $snippet")
+        return if (resultScript == null || resultScript == queryScript) 1.0 else 0.4
+    }
+
+    /**
+     * Distinct content tokens of [query] (lowercased, length >= 2, stopwords removed, order kept).
+     *
+     * Falls back to all tokens when every token is a stopword, so a query like "how to" still matches on
+     * something rather than scoring every result zero.
+     */
+    fun contentTerms(query: String): List<String> {
+        val distinct = LinkedHashSet<String>()
+        for (token in tokens(query.lowercase())) {
+            if (token.length >= 2) distinct.add(token)
+        }
+        val content = distinct.filter { it !in STOPWORDS }
+        return content.ifEmpty { distinct.toList() }
+    }
+
+    /**
+     * How well [title]/[snippet] match [terms], in [0, 1]. Higher = better query match.
+     *
+     * Combines whole-word coverage (fraction of query terms present anywhere), title coverage (the same
+     * but title-only, weighted equally because a title hit is a strong relevance signal), and a small
+     * bonus when the terms appear as a contiguous phrase in the title. Whole-word membership (not
+     * substring) avoids false hits like the term "ai" matching inside "available".
+     */
+    fun lexicalScore(
+        title: String,
+        snippet: String,
+        terms: List<String>,
+    ): Double {
+        if (terms.isEmpty()) return 0.0
+        val titleStems = tokens(title.lowercase()).map { stem(it) }.toSet()
+        val snippetStems = tokens(snippet.lowercase()).map { stem(it) }.toSet()
+        val stems = terms.map { stem(it) }
+        val n = stems.size
+        val inTitle = stems.count { it in titleStems }
+        val inAny = stems.count { it in titleStems || it in snippetStems }
+        val coverage = inAny.toDouble() / n
+        val titleCoverage = inTitle.toDouble() / n
+        val titleSeq = tokens(title.lowercase()).joinToString(" ") { stem(it) }
+        val phrase = if (n >= 2 && titleSeq.contains(stems.joinToString(" "))) 1.0 else 0.0
+        val base = 0.5 * coverage + 0.4 * titleCoverage + 0.1 * phrase
+        // The head term is usually the query's subject (after stopwords: "ai" in "ai news today",
+        // "mechanical" in "best mechanical keyboard"). A result missing the subject entirely is a poor
+        // match even if it covers the generic remainder, so halve its score.
+        val headPresent = stems[0] in titleStems || stems[0] in snippetStems
+        return if (headPresent) base else base * 0.5
+    }
+
+    /**
+     * Fold the lexical match and language affinity into an RRF score (demotion-only).
+     *
+     * The lexical factor is capped at 1.0, so a well-matching result keeps its full RRF weight and
+     * engine consensus still orders the good matches (a keyword-stuffed title is never promoted over a
+     * result several engines agree on). A weak match is sunk toward [BASE]. The language [affinity]
+     * (<= 1.0 for a foreign-script result) multiplies on top, demoting wrong-language hits.
+     */
+    fun blendedScore(
+        rrfScore: Double,
+        lexical: Double,
+        affinity: Double = 1.0,
+    ): Double {
+        val lexicalFactor = minOf(1.0, BASE + GAIN * lexical)
+        return rrfScore * lexicalFactor * affinity
+    }
+}
diff --git a/app/src/test/java/org/searchmob/engine/aggregate/RelevanceTest.kt b/app/src/test/java/org/searchmob/engine/aggregate/RelevanceTest.kt
@@ -0,0 +1,102 @@
+package org.searchmob.engine.aggregate
+
+import org.junit.Assert.assertEquals
+import org.junit.Assert.assertTrue
+import org.junit.Test
+
+/** Mirrors the desktop `tests/engines/test_relevance.py` suite for the Kotlin port. */
+class RelevanceTest {
+    // --- contentTerms -------------------------------------------------------------------------
+
+    @Test
+    fun contentTermsStripsStopwordsKeepsSubject() {
+        assertEquals(listOf("mechanical", "keyboard", "2026"), Relevance.contentTerms("best mechanical keyboard 2026"))
+    }
+
+    @Test
+    fun contentTermsDistinctAndLowercased() {
+        assertEquals(listOf("tie", "knot"), Relevance.contentTerms("Tie a TIE knot tie"))
+    }
+
+    @Test
+    fun contentTermsFallsBackWhenAllStopwords() {
+        // "how to" is all stopwords; rather than score everything zero, keep the tokens.
+        assertEquals(listOf("how", "to"), Relevance.contentTerms("how to"))
+    }
+
+    // --- lexicalScore -------------------------------------------------------------------------
+
+    @Test
+    fun fullMatchScoresHigh() {
+        val terms = Relevance.contentTerms("mechanical keyboard")
+        assertTrue(Relevance.lexicalScore("Best Mechanical Keyboard Guide", "review", terms) >= 0.9)
+    }
+
+    @Test
+    fun stemmingMatchesPlural() {
+        // "keyboard" should match a title that says "keyboards" (light stemming).
+        assertTrue(Relevance.lexicalScore("The Best Keyboards", "", listOf("keyboard")) >= 0.9)
+    }
+
+    @Test
+    fun missingSubjectIsPenalized() {
+        val terms = Relevance.contentTerms("ai news today") // subject/head term is "ai"
+        val hasSubject = Relevance.lexicalScore("AI News Today", "latest ai coverage", terms)
+        val noSubject = Relevance.lexicalScore("Viral News Today", "trending news today", terms)
+        assertTrue(noSubject < hasSubject)
+        // The head penalty halves a result that never mentions the subject anywhere.
+        assertTrue(noSubject <= 0.5 * hasSubject + 0.01)
+    }
+
+    @Test
+    fun noTermsScoresZero() {
+        assertEquals(0.0, Relevance.lexicalScore("anything", "anything", emptyList()), 1e-9)
+    }
+
+    @Test
+    fun nonAsciiTermsNotMangledByEnglishStemmer() {
+        // A Cyrillic term must still match itself (the English stemmer is gated to ASCII).
+        assertTrue(Relevance.lexicalScore("Новости сегодня", "", listOf("новости")) >= 0.9)
+    }
+
+    // --- languageAffinity (script-relative, multilingual) -------------------------------------
+
+    @Test
+    fun sameScriptQueryAndResultIsKept() {
+        assertEquals(1.0, Relevance.languageAffinity("ai news", "AI News Today", "latest"), 1e-9)
+        assertEquals(1.0, Relevance.languageAffinity("новости ии", "Новости ИИ", "статья"), 1e-9)
+    }
+
+    @Test
+    fun crossScriptResultIsDemotedEitherDirection() {
+        assertEquals(0.4, Relevance.languageAffinity("ai news", "Новости искусственного интеллекта", "сегодня"), 1e-9)
+        assertEquals(0.4, Relevance.languageAffinity("新闻 人工智能", "AI News Today", "english article"), 1e-9)
+    }
+
+    @Test
+    fun letterlessQueryIsNeverPenalized() {
+        assertEquals(1.0, Relevance.languageAffinity("2026 / 1080", "Любой результат", "текст"), 1e-9)
+    }
+
+    // --- blendedScore (demotion-only) ---------------------------------------------------------
+
+    @Test
+    fun blendIsDemotionOnlyCappedAtOne() {
+        // A strong and a perfect match both keep full RRF weight (keyword stuffing is not promoted).
+        assertEquals(1.0, Relevance.blendedScore(1.0, 1.0), 1e-9)
+        assertEquals(1.0, Relevance.blendedScore(1.0, 0.6), 1e-9)
+    }
+
+    @Test
+    fun blendSinksWeakMatchTowardBase() {
+        assertEquals(Relevance.BASE, Relevance.blendedScore(1.0, 0.0), 1e-9)
+        assertTrue(Relevance.blendedScore(1.0, 0.0) < Relevance.blendedScore(1.0, 0.3))
+        assertTrue(Relevance.blendedScore(1.0, 0.3) < 1.0)
+    }
+
+    @Test
+    fun affinityMultipliesOnTop() {
+        // A perfect lexical match in the wrong script is still demoted by the affinity factor.
+        assertEquals(0.4, Relevance.blendedScore(1.0, 1.0, affinity = 0.4), 1e-9)
+    }
+}