From 2f67a4e1d8b06d65a4153426b30a3f96263c0afd Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 29 Oct 2025 16:02:46 -0500 Subject: [PATCH] Setting to split concatenated words Since using this keyboard a major issue i've found is how easy it is to accidentally write concatenated words where I accidentally press a letter instead of the space bar. I'm constantly writing c, v, b, n, and m (bottom row keys) instead of space leading to words like "hellobthere" or "whatbarenyouvdoingvtonight?" My proposal is adding a setting which would correct for this specific issue by trying to split unknown words by the trouble keys and testing for dictionary values in the split words. In my own testing this works great and my number of mispells and retypes has gone down drastically. An alternative fix for this is to enlarge the whole keyboard thus increasing padding around the space bar, but it affects ergonomics and what you can read on screen. Another possibility is to enlarge the space bar but that also affects screen size. This was not an issue with major competing keyboards as I suspect they either correct using their own proprietary models, or they do something similar to this change. I have no intention of using those keyboards so I believe Heliboard should be able to solve the problem. I believe this solution is a better enhancement and provides for a good user experience when enabled. --- .../java/helium314/keyboard/latin/Suggest.kt | 56 +++++++++ .../keyboard/latin/settings/Defaults.kt | 1 + .../keyboard/latin/settings/Settings.java | 1 + .../latin/settings/SettingsValues.java | 2 + .../settings/screens/TextCorrectionScreen.kt | 6 + app/src/main/res/values/strings.xml | 4 + .../helium314/keyboard/latin/SuggestTest.kt | 118 ++++++++++++++++++ 7 files changed, 188 insertions(+) diff --git a/app/src/main/java/helium314/keyboard/latin/Suggest.kt b/app/src/main/java/helium314/keyboard/latin/Suggest.kt index a1b516f3d6..f6ffa28ec1 100644 --- a/app/src/main/java/helium314/keyboard/latin/Suggest.kt +++ b/app/src/main/java/helium314/keyboard/latin/Suggest.kt @@ -113,6 +113,11 @@ class Suggest(private val mDictionaryFacilitator: DictionaryFacilitator) { getSuggestionsInfoListWithDebugInfo(capitalizedTypedWord, suggestionsContainer) else suggestionsContainer + // Detect and suggest corrections for concatenated words with accidental bottom-row key presses + if (!resultsArePredictions && typedWordString.length > 4 && Settings.getValues().mSuggestSplitConcatenatedWords) { + tryAddConcatenatedWordSuggestions(typedWordString, suggestionsList, firstOccurrenceOfTypedWordInSuggestions) + } + val inputStyle = if (resultsArePredictions) { if (suggestionResults.mIsBeginningOfSentence) SuggestedWords.INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION else SuggestedWords.INPUT_STYLE_PREDICTION @@ -252,6 +257,57 @@ class Suggest(private val mDictionaryFacilitator: DictionaryFacilitator) { return allowsToBeAutoCorrected to hasAutoCorrection } + /** + * Detects concatenated words with accidental bottom-row key presses (c, v, b, n, m) + * and adds spaced suggestions if both parts are valid dictionary words. + * Example: "hellobthere" -> suggests "hello there" + */ + internal fun tryAddConcatenatedWordSuggestions( + typedWord: String, + suggestions: ArrayList, + firstOccurrenceOfTypedWord: Int + ) { + if (firstOccurrenceOfTypedWord >= 0) return // typed word is already valid + + val bottomRowChars = setOf('c', 'v', 'b', 'n', 'm') + val lowerTypedWord = typedWord.lowercase(mDictionaryFacilitator.mainLocale) + + // Try splitting at each bottom-row character position + for (i in 2 until lowerTypedWord.length - 2) { // min 2 chars on each side + val char = lowerTypedWord[i] + if (char !in bottomRowChars) continue + + // Try splitting at this position (removing the accidental character) + val part1 = lowerTypedWord.substring(0, i) + val part2 = lowerTypedWord.substring(i + 1) + + if (mDictionaryFacilitator.isValidSpellingWord(part1) && + mDictionaryFacilitator.isValidSpellingWord(part2)) { + + val spacedSuggestion = "$part1 $part2" + // Add with high score to make it a prominent suggestion + val suggestionInfo = SuggestedWordInfo( + spacedSuggestion, + "", + SuggestedWordInfo.MAX_SCORE - 1, // high score but below typed word + SuggestedWordInfo.KIND_CORRECTION, + Dictionary.DICTIONARY_USER_TYPED, + SuggestedWordInfo.NOT_AN_INDEX, + SuggestedWordInfo.NOT_A_CONFIDENCE + ) + + // Insert at appropriate position + if (!suggestions.any { it.mWord == spacedSuggestion }) { + // If there are already suggestions, insert at position 1 (right after typed word) + // Otherwise just add to the list + val insertPosition = if (suggestions.size > 1) 1 else suggestions.size + suggestions.add(insertPosition, suggestionInfo) + } + return // only add one spaced suggestion + } + } + } + // Retrieves suggestions for the batch input // and calls the callback function with the suggestions. private fun getSuggestedWordsForBatchInput( diff --git a/app/src/main/java/helium314/keyboard/latin/settings/Defaults.kt b/app/src/main/java/helium314/keyboard/latin/settings/Defaults.kt index 575fd270e4..bee456dfbf 100644 --- a/app/src/main/java/helium314/keyboard/latin/settings/Defaults.kt +++ b/app/src/main/java/helium314/keyboard/latin/settings/Defaults.kt @@ -63,6 +63,7 @@ object Defaults { const val PREF_AUTO_CORRECT_THRESHOLD = 0.185f const val PREF_AUTOCORRECT_SHORTCUTS = true const val PREF_BACKSPACE_REVERTS_AUTOCORRECT = true + const val PREF_SUGGEST_SPLIT_CONCATENATED_WORDS = false const val PREF_CENTER_SUGGESTION_TEXT_TO_ENTER = false const val PREF_SHOW_SUGGESTIONS = true const val PREF_ALWAYS_SHOW_SUGGESTIONS = false diff --git a/app/src/main/java/helium314/keyboard/latin/settings/Settings.java b/app/src/main/java/helium314/keyboard/latin/settings/Settings.java index e7fe1aaddc..03da901e8c 100644 --- a/app/src/main/java/helium314/keyboard/latin/settings/Settings.java +++ b/app/src/main/java/helium314/keyboard/latin/settings/Settings.java @@ -75,6 +75,7 @@ public final class Settings implements SharedPreferences.OnSharedPreferenceChang public static final String PREF_AUTO_CORRECT_THRESHOLD = "auto_correct_threshold"; public static final String PREF_AUTOCORRECT_SHORTCUTS = "autocorrect_shortcuts"; public static final String PREF_BACKSPACE_REVERTS_AUTOCORRECT = "backspace_reverts_autocorrect"; + public static final String PREF_SUGGEST_SPLIT_CONCATENATED_WORDS = "suggest_split_concatenated_words"; public static final String PREF_CENTER_SUGGESTION_TEXT_TO_ENTER = "center_suggestion_text_to_enter"; public static final String PREF_SHOW_SUGGESTIONS = "show_suggestions"; public static final String PREF_ALWAYS_SHOW_SUGGESTIONS = "always_show_suggestions"; diff --git a/app/src/main/java/helium314/keyboard/latin/settings/SettingsValues.java b/app/src/main/java/helium314/keyboard/latin/settings/SettingsValues.java index 052ad21891..6c3020ae4f 100644 --- a/app/src/main/java/helium314/keyboard/latin/settings/SettingsValues.java +++ b/app/src/main/java/helium314/keyboard/latin/settings/SettingsValues.java @@ -143,6 +143,7 @@ public class SettingsValues { public final boolean mAutoCorrectEnabled; public final float mAutoCorrectionThreshold; public final boolean mBackspaceRevertsAutocorrect; + public final boolean mSuggestSplitConcatenatedWords; public final int mScoreLimitForAutocorrect; public final boolean mAutoCorrectShortcuts; private final boolean mSuggestionsEnabledPerUserSettings; @@ -208,6 +209,7 @@ public SettingsValues(final Context context, final SharedPreferences prefs, fina : (mAutoCorrectionThreshold < 0.07 ? 800000 : 950000); // aggressive or modest mAutoCorrectShortcuts = prefs.getBoolean(Settings.PREF_AUTOCORRECT_SHORTCUTS, Defaults.PREF_AUTOCORRECT_SHORTCUTS); mBackspaceRevertsAutocorrect = prefs.getBoolean(Settings.PREF_BACKSPACE_REVERTS_AUTOCORRECT, Defaults.PREF_BACKSPACE_REVERTS_AUTOCORRECT); + mSuggestSplitConcatenatedWords = prefs.getBoolean(Settings.PREF_SUGGEST_SPLIT_CONCATENATED_WORDS, Defaults.PREF_SUGGEST_SPLIT_CONCATENATED_WORDS); mBigramPredictionEnabled = prefs.getBoolean(Settings.PREF_BIGRAM_PREDICTIONS, Defaults.PREF_BIGRAM_PREDICTIONS); mSuggestPunctuation = prefs.getBoolean(Settings.PREF_SUGGEST_PUNCTUATION, Defaults.PREF_SUGGEST_PUNCTUATION); mSuggestClipboardContent = prefs.getBoolean(Settings.PREF_SUGGEST_CLIPBOARD_CONTENT, Defaults.PREF_SUGGEST_CLIPBOARD_CONTENT); diff --git a/app/src/main/java/helium314/keyboard/settings/screens/TextCorrectionScreen.kt b/app/src/main/java/helium314/keyboard/settings/screens/TextCorrectionScreen.kt index 58b4dc48de..f11d08e964 100644 --- a/app/src/main/java/helium314/keyboard/settings/screens/TextCorrectionScreen.kt +++ b/app/src/main/java/helium314/keyboard/settings/screens/TextCorrectionScreen.kt @@ -64,6 +64,7 @@ fun TextCorrectionScreen( if (autocorrectEnabled) Settings.PREF_AUTOCORRECT_SHORTCUTS else null, if (autocorrectEnabled) Settings.PREF_AUTO_CORRECT_THRESHOLD else null, if (autocorrectEnabled) Settings.PREF_BACKSPACE_REVERTS_AUTOCORRECT else null, + if (autocorrectEnabled) Settings.PREF_SUGGEST_SPLIT_CONCATENATED_WORDS else null, Settings.PREF_AUTO_CAP, R.string.settings_category_space, Settings.PREF_KEY_USE_DOUBLE_SPACE_PERIOD, @@ -134,6 +135,11 @@ fun createCorrectionSettings(context: Context) = listOf( Setting(context, Settings.PREF_BACKSPACE_REVERTS_AUTOCORRECT, R.string.backspace_reverts_autocorrect) { SwitchPreference(it, Defaults.PREF_BACKSPACE_REVERTS_AUTOCORRECT) }, + Setting(context, Settings.PREF_SUGGEST_SPLIT_CONCATENATED_WORDS, + R.string.suggest_split_concatenated_words, R.string.suggest_split_concatenated_words_summary + ) { + SwitchPreference(it, Defaults.PREF_SUGGEST_SPLIT_CONCATENATED_WORDS) + }, Setting(context, Settings.PREF_AUTO_CAP, R.string.auto_cap, R.string.auto_cap_summary ) { diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index f19ab7da7a..3298a5463b 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -141,6 +141,10 @@ When enabled shortcuts might be expanded by autocorrect Backspace reverts autocorrect + + Suggest split words + + Suggest space-separated words when c, v, b, n, or m is accidentally typed instead of space Off diff --git a/app/src/test/java/helium314/keyboard/latin/SuggestTest.kt b/app/src/test/java/helium314/keyboard/latin/SuggestTest.kt index 009995d476..7b60ee7d14 100644 --- a/app/src/test/java/helium314/keyboard/latin/SuggestTest.kt +++ b/app/src/test/java/helium314/keyboard/latin/SuggestTest.kt @@ -275,6 +275,124 @@ class SuggestTest { assertEquals("word'", result.mWord) } + private fun createMockFacilitatorWithValidWords(vararg validWords: String): DictionaryFacilitator { + val mock = org.mockito.Mockito.mock(DictionaryFacilitator::class.java) + org.mockito.Mockito.`when`(mock.mainLocale).thenReturn(Locale.ENGLISH) + validWords.forEach { word -> + org.mockito.Mockito.`when`(mock.isValidSpellingWord(word)).thenReturn(true) + } + return mock + } + + /** + * Test helper for concatenated word splitting + * @param input The typed word with accidental bottom-row char instead of space (e.g., "hellobthere") + * @param validWords Words to mark as valid in the mock dictionary + * @param expectedSuggestionCount Expected number of suggestions added (0 or 1) + * @param expectedSuggestion The expected suggestion text if count > 0 (e.g., "hello there") + * @param firstOccurrence Position of typed word in existing suggestions (-1 = not found/invalid, >=0 = already valid) + */ + private fun testConcatenatedSplit(input: String, validWords: Array, + expectedSuggestionCount: Int, expectedSuggestion: String? = null, + firstOccurrence: Int = -1) { + val mockFacilitator = createMockFacilitatorWithValidWords(*validWords) + val testSuggest = Suggest(mockFacilitator) + val suggestions = ArrayList() + testSuggest.tryAddConcatenatedWordSuggestions(input, suggestions, firstOccurrence) + + assertEquals(expectedSuggestionCount, suggestions.size) + if (expectedSuggestion != null) { + assertEquals(expectedSuggestion, suggestions[0].mWord) + } + } + + @Test fun `all bottom row chars trigger split`() { + testConcatenatedSplit("hellobthere", arrayOf("hello", "there"), 1, "hello there") + testConcatenatedSplit("goodntimes", arrayOf("good", "times"), 1, "good times") + testConcatenatedSplit("lovevlife", arrayOf("love", "life"), 1, "love life") + testConcatenatedSplit("bigcdog", arrayOf("big", "dog"), 1, "big dog") + testConcatenatedSplit("somemday", arrayOf("some", "day"), 1, "some day") + } + + @Test fun `concatenated words with multiple possible splits - only first valid`() { + testConcatenatedSplit("hellomworld", arrayOf("hello", "world"), 1, "hello world") + } + + @Test fun `no split if typed word already in dictionary`() { + // "hellobthere" is already valid (e.g., custom dictionary compound word) + // firstOccurrence=0 means it's found in suggestions at position 0 + testConcatenatedSplit("hellobthere", arrayOf("hello", "there", "hellobthere"), 0, firstOccurrence = 0) + } + + @Test fun `no split if only one part is valid word`() { + // "hello" is valid but "there" is not (e.g., typing in mixed languages) + testConcatenatedSplit("hellobthere", arrayOf("hello"), 0) + } + + @Test fun `minimum word length boundaries`() { + // Works: 2 chars on each side (minimum) + testConcatenatedSplit("atbcat", arrayOf("at", "cat"), 1, "at cat") + testConcatenatedSplit("catbat", arrayOf("cat", "at"), 1, "cat at") + + // Fails: less than 2 chars before or after split + testConcatenatedSplit("abcat", arrayOf("a", "cat"), 0) + testConcatenatedSplit("catba", arrayOf("cat", "a"), 0) + } + + @Test fun `no split for strings of bottom row chars only`() { + testConcatenatedSplit("bvncm", arrayOf("b", "v", "n", "c", "m"), 0) + } + + @Test fun `no split for very short strings`() { + testConcatenatedSplit("ab", arrayOf("a", "b"), 0) + testConcatenatedSplit("abc", arrayOf("a", "b", "c"), 0) + testConcatenatedSplit("abcd", arrayOf("ab", "cd"), 0) + } + + @Test fun `split requires exactly 2 chars on each side minimum`() { + testConcatenatedSplit("thebcat", arrayOf("the", "cat"), 1, "the cat") + } + + @Test fun `no false positive - words containing bottom row chars are not split`() { + // "abacus" contains 'c' but should not split to "aba us" + testConcatenatedSplit("abacus", arrayOf("abacus", "aba", "us"), 0, firstOccurrence = 0) + } + + @Test fun `no false positive - abacus not split when valid`() { + testConcatenatedSplit("abacus", arrayOf("abacus"), 0, firstOccurrence = 0) + } + + @Test fun `no false positive - banish contains ban but should not split`() { + testConcatenatedSplit("banish", arrayOf("banish", "ban", "ish"), 0, firstOccurrence = 0) + } + + @Test fun `no false positive - combat contains com and bat`() { + testConcatenatedSplit("combat", arrayOf("combat", "com", "bat"), 0, firstOccurrence = 0) + } + + @Test fun `no false positive - mania contains bottom row chars`() { + testConcatenatedSplit("mania", arrayOf("mania", "ma", "ia"), 0, firstOccurrence = 0) + } + + @Test fun `split momscabacus to moms abacus`() { + testConcatenatedSplit("momscabacus", arrayOf("moms", "abacus"), 1, "moms abacus") + } + + @Test fun `split bannmermaids to ban mermaids`() { + testConcatenatedSplit("bannmermaids", arrayOf("ban", "mermaids"), 1, "ban mermaids") + } + + @Test fun `split beetlevmania to beetle mania`() { + testConcatenatedSplit("beetlevmania", arrayOf("beetle", "mania"), 1, "beetle mania") + } + + @Test fun `only first split for multiple concatenated words`() { + // "thebboyboughtnthembasketball" would ideally be "the boy bought the basketball" + // but algorithm only splits at first valid bottom-row char, giving "the boyboughtnthembasketball" + testConcatenatedSplit("thebboyboughtnthembasketball", + arrayOf("the", "boyboughtnthembasketball"), 1, "the boyboughtnthembasketball") + } + private fun shouldBeAutoCorrected(word: String, // typed word suggestions: List, // suggestions ordered by score, including suggestion for typed word if in dictionary firstSuggestionForEmpty: SuggestedWordInfo?, // first suggestion if typed word would be empty (null if none)