From 06c7ed4fb3c40ae717fb4ecc8e545f72c9b6135d Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:07:23 +0200 Subject: [PATCH 01/14] Add French (fr-fr) language support --- .../dicio/numbers/ParserFormatterBuilder.kt | 3 + .../lang/fr/FrenchDateTimeExtractor.kt | 243 ++++++ .../dicio/numbers/lang/fr/FrenchFormatter.kt | 363 +++++++++ .../numbers/lang/fr/FrenchNumberExtractor.kt | 408 ++++++++++ .../org/dicio/numbers/lang/fr/FrenchParser.kt | 41 + .../resources/config/fr-fr/date_time.json | 176 ++++ .../src/main/resources/config/fr-fr/day.word | 1 + .../src/main/resources/config/fr-fr/days.word | 1 + .../src/main/resources/config/fr-fr/hour.word | 1 + .../main/resources/config/fr-fr/hours.word | 1 + .../main/resources/config/fr-fr/minute.word | 1 + .../main/resources/config/fr-fr/minutes.word | 1 + .../main/resources/config/fr-fr/second.word | 1 + .../main/resources/config/fr-fr/seconds.word | 1 + .../resources/config/fr-fr/tokenizer.json | 755 ++++++++++++++++++ .../numbers/lang/fr/DateTimeConfigTest.java | 10 + .../lang/fr/DateTimeExtractorUtilsTest.java | 55 ++ .../dicio/numbers/lang/fr/DateTimeTest.java | 36 + .../lang/fr/DurationExtractorUtilsTest.java | 58 ++ .../numbers/lang/fr/ExtractDateTimeTest.java | 36 + .../numbers/lang/fr/ExtractDurationTest.java | 40 + .../numbers/lang/fr/ExtractNumbersTest.java | 76 ++ .../numbers/lang/fr/NiceDurationTest.java | 46 ++ .../dicio/numbers/lang/fr/NiceNumberTest.java | 36 + .../dicio/numbers/lang/fr/NiceTimeTest.java | 83 ++ .../lang/fr/NumberExtractorUtilsTest.java | 36 + .../numbers/lang/fr/ParserParamsTest.java | 117 +++ .../numbers/lang/fr/PronounceNumberTest.java | 103 +++ .../numbers/lang/fr/TokenizerConfigTest.java | 11 + .../config/fr-fr/date_time_test.json | 42 + settings.gradle.kts | 5 + 31 files changed, 2787 insertions(+) create mode 100644 numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchDateTimeExtractor.kt create mode 100644 numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchFormatter.kt create mode 100644 numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt create mode 100644 numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchParser.kt create mode 100644 numbers/src/main/resources/config/fr-fr/date_time.json create mode 100644 numbers/src/main/resources/config/fr-fr/day.word create mode 100644 numbers/src/main/resources/config/fr-fr/days.word create mode 100644 numbers/src/main/resources/config/fr-fr/hour.word create mode 100644 numbers/src/main/resources/config/fr-fr/hours.word create mode 100644 numbers/src/main/resources/config/fr-fr/minute.word create mode 100644 numbers/src/main/resources/config/fr-fr/minutes.word create mode 100644 numbers/src/main/resources/config/fr-fr/second.word create mode 100644 numbers/src/main/resources/config/fr-fr/seconds.word create mode 100644 numbers/src/main/resources/config/fr-fr/tokenizer.json create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeConfigTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeExtractorUtilsTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/DurationExtractorUtilsTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDateTimeTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDurationTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/NiceDurationTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/NiceNumberTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/NiceTimeTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/NumberExtractorUtilsTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/PronounceNumberTest.java create mode 100644 numbers/src/test/java/org/dicio/numbers/lang/fr/TokenizerConfigTest.java create mode 100644 numbers/src/test/resources/config/fr-fr/date_time_test.json diff --git a/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt b/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt index 4f0628f7..9e1e68bd 100644 --- a/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt +++ b/numbers/src/main/java/org/dicio/numbers/ParserFormatterBuilder.kt @@ -3,6 +3,8 @@ package org.dicio.numbers import org.dicio.numbers.formatter.Formatter import org.dicio.numbers.lang.en.EnglishFormatter import org.dicio.numbers.lang.en.EnglishParser +import org.dicio.numbers.lang.fr.FrenchFormatter +import org.dicio.numbers.lang.fr.FrenchParser import org.dicio.numbers.lang.it.ItalianFormatter import org.dicio.numbers.lang.it.ItalianParser import org.dicio.numbers.parser.Parser @@ -11,6 +13,7 @@ import java.util.Locale object ParserFormatterBuilder { private val PARSER_FORMATTER_CLASSES_MAP = mapOf( "en" to ParserFormatterClasses(EnglishFormatter::class.java, EnglishParser::class.java), + "fr" to ParserFormatterClasses(FrenchFormatter::class.java, FrenchParser::class.java), "it" to ParserFormatterClasses(ItalianFormatter::class.java, ItalianParser::class.java), ) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchDateTimeExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchDateTimeExtractor.kt new file mode 100644 index 00000000..05a71a38 --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchDateTimeExtractor.kt @@ -0,0 +1,243 @@ +package org.dicio.numbers.lang.fr + +import org.dicio.numbers.parser.lexer.TokenStream +import org.dicio.numbers.unit.Duration +import org.dicio.numbers.util.DateTimeExtractorUtils +import org.dicio.numbers.util.DurationExtractorUtils +import org.dicio.numbers.util.NumberExtractorUtils +import org.dicio.numbers.util.Utils +import java.time.LocalDate +import java.time.LocalDateTime +import java.time.LocalTime +import java.time.temporal.ChronoUnit + +class FrenchDateTimeExtractor internal constructor( + private val ts: TokenStream, + private val now: LocalDateTime +) { + private val numberExtractor = FrenchNumberExtractor(ts) + private val durationExtractor = DurationExtractorUtils(ts, numberExtractor::numberNoOrdinal) + private val dateTimeExtractor = DateTimeExtractorUtils(ts, now, this::extractIntegerInRange) + + private fun extractIntegerInRange(fromInclusive: Int, toInclusive: Int): Int? { + return NumberExtractorUtils.extractOneIntegerInRange( + ts, fromInclusive, toInclusive + ) { NumberExtractorUtils.signBeforeNumber(ts) { numberExtractor.numberInteger(false) } } + } + + fun dateTime(): LocalDateTime? { + return ts.firstWhichUsesMostTokens({ dateTime(false) }, { dateTime(true) }) + } + + private fun dateTime(timeFirst: Boolean): LocalDateTime? { + var date: LocalDate? = null + var time: LocalTime? = null + + if (!timeFirst) { + date = relativeSpecialDay() + + if (date == null) { + val duration = Utils.firstNotNull( + this::relativeDuration, + dateTimeExtractor::relativeMonthDuration + ) + if (duration == null) { + date = date() + } else if (duration.nanos == 0L && duration.days != 0L) { + date = duration.applyAsOffsetToDateTime(now).toLocalDate() + } else if (duration.nanos != 0L && duration.days == 0L + && duration.months == 0L && duration.years == 0L + ) { + time = duration.applyAsOffsetToDateTime(now).toLocalTime() + } else { + return duration.applyAsOffsetToDateTime(now) + } + } + } + + if (time == null) { + time = ts.tryOrSkipDateTimeIgnore(date != null) { this.timeWithAmpm() } + } + + if (date == null && time != null) { + val originalPosition = ts.position + val duration = ts.tryOrSkipDateTimeIgnore(true) { this.relativeDuration() } + if (duration == null) { + date = ts.tryOrSkipDateTimeIgnore(true) { + Utils.firstNotNull(this::relativeSpecialDay, this::date) + } + } else if (duration.nanos == 0L && duration.days != 0L) { + date = duration.applyAsOffsetToDateTime(now).toLocalDate() + } else { + ts.position = originalPosition + } + } + + return if (date == null) { + time?.atDate(now.toLocalDate()) + } else { + if (time == null) date.atTime(now.toLocalTime()) else date.atTime(time) + } + } + + fun timeWithAmpm(): LocalTime? { + var time = time() + val pm: Boolean? + if (time == null) { + val momentOfDay = momentOfDay() ?: return null + time = ts.tryOrSkipDateTimeIgnore(true) { this.time() } + if (time == null) { + return LocalTime.of(momentOfDay, 0) + } else { + pm = DateTimeExtractorUtils.isMomentOfDayPm(momentOfDay) + } + } else { + pm = ts.tryOrSkipDateTimeIgnore(true) { + Utils.firstNotNull( + dateTimeExtractor::ampm, + { momentOfDay()?.let(DateTimeExtractorUtils::isMomentOfDayPm) } + ) + } + } + + if (time.hour != 0 && pm != null) { + if (pm && !DateTimeExtractorUtils.isMomentOfDayPm(time.hour)) { + time = time.withHour((time.hour + 12) % DateTimeExtractorUtils.HOURS_IN_DAY) + } + } + return time + } + + fun time(): LocalTime? { + val hour = Utils.firstNotNull(this::noonMidnightLike, this::hour) ?: return null + var result = LocalTime.of(hour, 0) + + val minute = ts.tryOrSkipDateTimeIgnore(true) { + Utils.firstNotNull(this::specialMinute, dateTimeExtractor::minute) + } + if (minute == null) return result + result = result.withMinute(minute) + + val second = ts.tryOrSkipDateTimeIgnore(true) { dateTimeExtractor.second() } + if (second == null) return result + return result.withSecond(second) + } + + fun date(): LocalDate? { + var result = now.toLocalDate() + + val dayOfWeek = dateTimeExtractor.dayOfWeek() + val day = ts.tryOrSkipDateTimeIgnore(dayOfWeek != null) { extractIntegerInRange(1, 31) } + + val originalMonth = result.month.value + if (day == null) { + if (dayOfWeek != null) { + return result.plus( + (dayOfWeek - result.dayOfWeek.ordinal).toLong(), ChronoUnit.DAYS + ) + } + result = result.withMonth(1).withDayOfMonth(1) + } else { + result = result.withMonth(1).withDayOfMonth(day) + } + + val month = ts.tryOrSkipDateTimeIgnore(day != null) { + Utils.firstNotNull(dateTimeExtractor::monthName, { extractIntegerInRange(1, 12) }) + } + if (month == null) { + if (day != null) { + result = result.withMonth(originalMonth) + return result + } + } else { + result = result.withMonth(month) + } + + val year = ts.tryOrSkipDateTimeIgnore(month != null) { + extractIntegerInRange(0, 999999999) + } + if (year == null) { + if (month != null) return result + return null + } + + val bcad = dateTimeExtractor.bcad() + return result.withYear(year * (if (bcad == null || bcad) 1 else -1)) + } + + fun specialMinute(): Int? { + val originalPosition = ts.position + // Check for "moins le quart" = -15 minutes (relative to next hour) + if (ts[0].hasCategory("special_minute_before")) { + ts.movePositionForwardBy(ts.indexOfWithoutCategory("date_time_ignore", 1)) + val minute = extractIntegerInRange(1, 59) + if (minute != null) { + return 60 - minute + } + } + ts.position = originalPosition + return null + } + + fun noonMidnightLike(): Int? { + return noonMidnightLikeOrMomentOfDay("noon_midnight_like") + } + + fun momentOfDay(): Int? { + return noonMidnightLikeOrMomentOfDay("moment_of_day") + } + + private fun noonMidnightLikeOrMomentOfDay(category: String): Int? { + val originalPosition = ts.position + + var relativeIndicator = 0 + if (ts[0].hasCategory("pre_special_hour")) { + if (ts[0].hasCategory("pre_relative_indicator")) { + relativeIndicator = if (ts[0].hasCategory("negative")) -1 else 1 + ts.movePositionForwardBy(ts.indexOfWithoutCategory("date_time_ignore", 1)) + } else { + ts.movePositionForwardBy(1) + } + } + + if (ts[0].hasCategory(category)) { + ts.movePositionForwardBy(1) + return ((ts[-1].number!!.integerValue().toInt() + + DateTimeExtractorUtils.HOURS_IN_DAY + relativeIndicator) + % DateTimeExtractorUtils.HOURS_IN_DAY) + } + + ts.position = originalPosition + return null + } + + fun hour(): Int? { + val originalPosition = ts.position + ts.movePositionForwardBy(ts.indexOfWithoutCategory("pre_hour", 0)) + + val number = extractIntegerInRange(0, DateTimeExtractorUtils.HOURS_IN_DAY) + if (number == null) { + ts.position = originalPosition + return null + } + return number % DateTimeExtractorUtils.HOURS_IN_DAY + } + + private fun relativeSpecialDay(): LocalDate? { + val days = Utils.firstNotNull( + dateTimeExtractor::relativeYesterday, + dateTimeExtractor::relativeToday, + dateTimeExtractor::relativeTomorrow, + dateTimeExtractor::relativeDayOfWeekDuration + ) + if (days == null) return null + return now.toLocalDate().plusDays(days.toLong()) + } + + fun relativeDuration(): Duration? { + return dateTimeExtractor.relativeIndicatorDuration( + { durationExtractor.duration() }, + { duration -> duration.multiply(-1) } + ) + } +} diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchFormatter.kt new file mode 100644 index 00000000..190ca606 --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchFormatter.kt @@ -0,0 +1,363 @@ +package org.dicio.numbers.lang.fr + +import org.dicio.numbers.formatter.Formatter +import org.dicio.numbers.unit.MixedFraction +import org.dicio.numbers.util.Utils +import java.time.LocalTime +import java.time.format.DateTimeFormatter +import java.util.Locale +import kotlin.math.abs + +class FrenchFormatter : Formatter("config/fr-fr") { + + override fun niceNumber(mixedFraction: MixedFraction, speech: Boolean): String { + if (speech) { + val sign = if (mixedFraction.negative) "moins " else "" + if (mixedFraction.numerator == 0) { + return sign + pronounceNumber(mixedFraction.whole.toDouble(), 0, true, false, false) + } + + val denominatorString = when (mixedFraction.denominator) { + 2 -> if (mixedFraction.numerator == 1L) "demi" else "demis" + 4 -> if (mixedFraction.numerator == 1L) "quart" else "quarts" + else -> { + val base = pronounceNumber( + mixedFraction.denominator.toDouble(), 0, true, false, true + ) + if (mixedFraction.numerator == 1L) base else base + "s" + } + } + + val numeratorString = pronounceNumber( + mixedFraction.numerator.toDouble(), 0, true, false, false + ) + + return if (mixedFraction.whole == 0L) { + "$sign$numeratorString $denominatorString" + } else { + (sign + pronounceNumber(mixedFraction.whole.toDouble(), 0, true, false, false) + + " et " + numeratorString + " " + denominatorString) + } + } else { + return niceNumberNotSpeech(mixedFraction) + } + } + + override fun pronounceNumber( + number: Double, + places: Int, + shortScale: Boolean, + scientific: Boolean, + ordinal: Boolean + ): String { + if (number == Double.POSITIVE_INFINITY) { + return "infini" + } else if (number == Double.NEGATIVE_INFINITY) { + return "moins infini" + } else if (java.lang.Double.isNaN(number)) { + return "pas un nombre" + } + + if (scientific || abs(number) > 999999999999999934463.0) { + val scientificFormatted = String.format(Locale.ENGLISH, "%E", number) + val parts = scientificFormatted.split("E".toRegex(), limit = 2).toTypedArray() + val power = parts[1].toInt().toDouble() + + if (power != 0.0) { + val n = parts[0].toDouble() + return String.format( + "%s fois dix à la %s", + pronounceNumber(n, places, shortScale, false, false), + pronounceNumber(power, places, shortScale, false, false) + ) + } + } + + val result = StringBuilder() + var varNumber = number + if (varNumber < 0) { + varNumber = -varNumber + if (places != 0 || varNumber >= 0.5) { + result.append("moins ") + } + } + + val realPlaces = Utils.decimalPlacesNoFinalZeros(varNumber, places) + val numberIsWhole = realPlaces == 0 + val realOrdinal = ordinal && numberIsWhole + val numberLong = varNumber.toLong() + (if (varNumber % 1 >= 0.5 && numberIsWhole) 1 else 0) + + if (realOrdinal && ORDINAL_NAMES.containsKey(numberLong)) { + result.append(ORDINAL_NAMES[numberLong]) + } else if (!realOrdinal && NUMBER_NAMES.containsKey(numberLong)) { + result.append(NUMBER_NAMES[numberLong]) + } else { + val groups = Utils.splitByModulus(numberLong, 1000) + val groupNames: MutableList = ArrayList() + + for (i in groups.indices) { + val z = groups[i] + if (z == 0L) continue + + var groupName = subThousand(z) + + when { + i == 1 -> { + // thousands: "mille" is invariable in French + groupName = if (z == 1L) "mille" else "$groupName mille" + } + i != 0 -> { + val magnitude = Utils.longPow(1000, i) + val magnitudeName = NUMBER_NAMES[magnitude]!! + // million/milliard take plural -s when > 1 and not followed by another number + val suffix = if (z > 1 && groups.take(i).all { it == 0L }) "s" else "" + groupName = if (z == 1L) "un $magnitudeName$suffix" + else "$groupName $magnitudeName$suffix" + } + } + + groupNames.add(groupName) + } + + if (groupNames.isEmpty()) { + result.append("zéro") + } else { + // Groups are in ascending order of magnitude, reverse for output + for (i in groupNames.indices.reversed()) { + if (result.isNotEmpty()) result.append(" ") + result.append(groupNames[i]) + } + } + + if (ordinal && numberIsWhole) { + return buildOrdinal(numberLong, result.toString()) + } + } + + if (realPlaces > 0) { + if (varNumber < 1.0 && (result.isEmpty() || "moins ".contentEquals(result))) { + result.append("zéro") + } + result.append(" virgule") + + val fractionalPart = String.format("%." + realPlaces + "f", varNumber % 1) + for (i in 2 until fractionalPart.length) { + result.append(" ") + result.append(NUMBER_NAMES[(fractionalPart[i].code - '0'.code).toLong()]) + } + } + + return result.toString().trim() + } + + override fun niceTime( + time: LocalTime, + speech: Boolean, + use24Hour: Boolean, + showAmPm: Boolean + ): String { + if (speech) { + val result = StringBuilder() + + when (time.hour) { + 0 -> result.append("minuit") + 12 -> result.append("midi") + else -> { + val hour = if (use24Hour) time.hour else time.hour % 12 + result.append(pronounceNumberDuration(hour.toLong())) + result.append(if (hour <= 1) " heure" else " heures") + } + } + + when (time.minute) { + 0 -> { /* nothing */ } + 15 -> result.append(" et quart") + 30 -> result.append(" et demie") + 45 -> { + // e.g. "moins le quart" relative to next hour + val nextHour = (time.hour + 1) % 24 + result.clear() + result.append( + when (nextHour) { + 0 -> "minuit" + 12 -> "midi" + else -> { + val h = if (use24Hour) nextHour else nextHour % 12 + pronounceNumberDuration(h.toLong()) + + (if (h <= 1) " heure" else " heures") + } + } + ) + result.append(" moins le quart") + } + else -> { + result.append(" ") + if (time.minute < 10) result.append("zéro ") + result.append(pronounceNumberDuration(time.minute.toLong())) + } + } + + if (!use24Hour && showAmPm && time.hour != 0 && time.hour != 12) { + result.append( + when { + time.hour >= 21 -> " du soir" + time.hour >= 17 -> " de l'après-midi" + time.hour >= 12 -> " de l'après-midi" + time.hour >= 4 -> " du matin" + else -> " du matin" + } + ) + } + + return result.toString() + } else { + if (use24Hour) { + return time.format(DateTimeFormatter.ofPattern("HH:mm", Locale.FRENCH)) + } else { + val result = time.format( + DateTimeFormatter.ofPattern( + if (showAmPm) "K:mm a" else "K:mm", Locale.ENGLISH + ) + ) + return if (result.startsWith("0:")) "12:" + result.substring(2) else result + } + } + } + + override fun pronounceNumberDuration(number: Long): String { + if (number == 1L) return "un" + return super.pronounceNumberDuration(number) + } + + /** + * Converts n (0-99) to French words. + */ + private fun subHundred(n: Long): String = when { + NUMBER_NAMES.containsKey(n) -> NUMBER_NAMES[n]!! + n < 70 -> { + val tens = n / 10 + val unit = n % 10 + when { + unit == 1L -> NUMBER_NAMES[tens * 10]!! + " et un" + else -> NUMBER_NAMES[tens * 10]!! + "-" + NUMBER_NAMES[unit]!! + } + } + n < 80 -> { + // 70-79: soixante + (dix..dix-neuf) + val addon = n - 60 // 10..19 + if (addon == 11L) "soixante et onze" + else "soixante-" + NUMBER_NAMES[addon]!! + } + n == 80L -> "quatre-vingts" + n < 90 -> "quatre-vingt-" + NUMBER_NAMES[n - 80]!! + else -> { + // 90-99: quatre-vingt + (dix..dix-neuf) + val addon = n - 80 // 10..19 + "quatre-vingt-" + NUMBER_NAMES[addon]!! + } + } + + /** + * Converts n (0-999) to French words. + */ + private fun subThousand(n: Long): String { + val builder = StringBuilder() + if (n >= 100) { + val hundred = n / 100 + val rest = n % 100 + if (hundred == 1L) { + builder.append("cent") + } else { + builder.append(subHundred(hundred)) + builder.append(" cent") + if (rest == 0L) builder.append("s") // "deux cents" but "deux cent un" + } + if (rest > 0) { + builder.append(" ") + builder.append(subHundred(rest)) + } + } else { + builder.append(subHundred(n)) + } + return builder.toString() + } + + /** + * Converts a cardinal number string to ordinal by appending "-ième". + * Handles French-specific rules: strip trailing "s" (quatre-vingts, deux cents), + * add "u" after "cinq", change "f" to "v" for "neuf". + */ + private fun buildOrdinal(numberLong: Long, cardinal: String): String { + if (numberLong == 1L) return "premier" + // Strip trailing "s" if present (e.g. "quatre-vingts" → "quatre-vingt") + val s = if (cardinal.endsWith("s")) cardinal.dropLast(1) else cardinal + return when { + s.endsWith("cinq") -> s + "uième" + s.endsWith("neuf") -> s.dropLast(1) + "vième" + s.endsWith("e") -> s.dropLast(1) + "ième" + else -> s + "ième" + } + } + + companion object { + private val NUMBER_NAMES: Map = mapOf( + 0L to "zéro", + 1L to "un", + 2L to "deux", + 3L to "trois", + 4L to "quatre", + 5L to "cinq", + 6L to "six", + 7L to "sept", + 8L to "huit", + 9L to "neuf", + 10L to "dix", + 11L to "onze", + 12L to "douze", + 13L to "treize", + 14L to "quatorze", + 15L to "quinze", + 16L to "seize", + 17L to "dix-sept", + 18L to "dix-huit", + 19L to "dix-neuf", + 20L to "vingt", + 30L to "trente", + 40L to "quarante", + 50L to "cinquante", + 60L to "soixante", + 100L to "cent", + 1000L to "mille", + 1000000L to "million", + 1000000000L to "milliard", + 1000000000000L to "billion", + 1000000000000000L to "billiard", + 1000000000000000000L to "trillion", + ) + + private val ORDINAL_NAMES: Map = mapOf( + 1L to "premier", + 2L to "deuxième", + 3L to "troisième", + 4L to "quatrième", + 5L to "cinquième", + 6L to "sixième", + 7L to "septième", + 8L to "huitième", + 9L to "neuvième", + 10L to "dixième", + 11L to "onzième", + 12L to "douzième", + 13L to "treizième", + 14L to "quatorzième", + 15L to "quinzième", + 16L to "seizième", + 17L to "dix-septième", + 18L to "dix-huitième", + 19L to "dix-neuvième", + 1000L to "millième", + 1000000L to "millionième", + 1000000000L to "milliardième", + ) + } +} diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt new file mode 100644 index 00000000..aba634b9 --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt @@ -0,0 +1,408 @@ +package org.dicio.numbers.lang.fr + +import org.dicio.numbers.parser.lexer.TokenStream +import org.dicio.numbers.unit.Number +import org.dicio.numbers.util.NumberExtractorUtils + +class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { + fun numberPreferOrdinal(): Number? { + val number = numberSuffixMultiplier() + ?: numberSignPoint(true) + + return if (number == null) { + null + } else { + divideByDenominatorIfPossible(number) + } + } + + fun numberPreferFraction(): Number? { + val number = numberSuffixMultiplier() + ?: numberSignPoint(false) + + return if (number == null) { + numberSignPoint(true) + } else { + divideByDenominatorIfPossible(number) + } + } + + fun numberNoOrdinal(): Number? { + val number = numberSuffixMultiplier() + ?: numberSignPoint(false) + + return if (number == null) { + null + } else { + divideByDenominatorIfPossible(number) + } + } + + fun numberMustBeInteger(): Number? { + val number = numberSuffixMultiplierInteger() + ?: numberSignInteger(true) + + return if (number == null) { + null + } else { + val multiplier = numberSuffixMultiplierInteger() + if (multiplier == null) { + number + } else { + number.multiply(multiplier) + } + } + } + + fun divideByDenominatorIfPossible(numberToEdit: Number): Number { + if (!numberToEdit.isOrdinal && !numberToEdit.isDecimal + && !ts[0].hasCategory("ignore") + ) { + val originalPosition = ts.position + val denominator = numberInteger(true) + if (denominator == null) { + if (ts[0].hasCategory("suffix_multiplier")) { + ts.movePositionForwardBy(1) + val multiplier = ts[-1].number + if (multiplier?.isDecimal == true && + (1 / multiplier.decimalValue()).toLong().toDouble() + == (1 / multiplier.decimalValue()) + ) { + return numberToEdit.divide((1 / multiplier.decimalValue()).toLong()) + } + return numberToEdit.multiply(multiplier) + } + } else if (denominator.isOrdinal && denominator.moreThan(2)) { + return numberToEdit.divide(denominator) + } else { + ts.position = originalPosition + } + } + return numberToEdit + } + + fun numberSuffixMultiplier(): Number? { + return if (ts[0].hasCategory("suffix_multiplier")) { + ts.movePositionForwardBy(1) + ts[-1].number + } else { + null + } + } + + fun numberSuffixMultiplierInteger(): Number? { + return if (ts[0].hasCategory("suffix_multiplier") && ts[0].number!!.isInteger) { + ts.movePositionForwardBy(1) + ts[-1].number + } else { + null + } + } + + fun numberSignPoint(allowOrdinal: Boolean): Number? { + return NumberExtractorUtils.signBeforeNumber(ts) { numberPoint(allowOrdinal) } + } + + fun numberSignInteger(allowOrdinal: Boolean): Number? { + return NumberExtractorUtils.signBeforeNumber(ts) { numberInteger(allowOrdinal) } + } + + fun numberPoint(allowOrdinal: Boolean): Number? { + var n = numberInteger(allowOrdinal).let { + if (it == null || it.isOrdinal) { + return@numberPoint it + } + it + } + + if (ts[0].hasCategory("point")) { + if (!ts[1].hasCategory("digit_after_point") + && (!NumberExtractorUtils.isRawNumber(ts[1]) || ts[2].hasCategory("ordinal_suffix")) + ) { + return n + } + ts.movePositionForwardBy(1) + + var magnitude = 0.1 + if (ts[0].value.length > 1 && NumberExtractorUtils.isRawNumber(ts[0])) { + for (i in 0 until ts[0].value.length) { + n = n.plus((ts[0].value[i].code - '0'.code) * magnitude) + magnitude /= 10.0 + } + ts.movePositionForwardBy(1) + } else { + while (true) { + if (ts[0].hasCategory("digit_after_point") + || (ts[0].value.length == 1 && NumberExtractorUtils.isRawNumber(ts[0]) + && !ts[1].hasCategory("ordinal_suffix")) + ) { + n = n.plus(ts[0].number!!.multiply(magnitude)) + magnitude /= 10.0 + } else { + break + } + ts.movePositionForwardBy(1) + } + } + } else if (ts[0].hasCategory("fraction_separator")) { + var separatorLength = 1 + if (ts[1].hasCategory("fraction_separator_secondary")) { + separatorLength = 2 + } + ts.movePositionForwardBy(separatorLength) + val denominator = numberInteger(false) + if (denominator == null) { + ts.movePositionForwardBy(-separatorLength) + } else { + return n.divide(denominator) + } + } + + return n + } + + fun numberInteger(allowOrdinal: Boolean): Number? { + var n = NumberExtractorUtils.numberMadeOfGroups(ts) { ts, lastMultiplier -> + numberGroupFr(ts, allowOrdinal, lastMultiplier) + } + if (n == null) { + return NumberExtractorUtils.numberBigRaw(ts, allowOrdinal) + } else if (n.isOrdinal) { + return n + } + + if (n.lessThan(1000)) { + if (NumberExtractorUtils.isRawNumber(ts[-1]) + && ts[0].hasCategory("thousand_separator") + && ts[1].value.length == 3 + && NumberExtractorUtils.isRawNumber(ts[1]) + ) { + val originalPosition = ts.position - 1 + while (ts[0].hasCategory("thousand_separator") + && ts[1].value.length == 3 + && NumberExtractorUtils.isRawNumber(ts[1]) + ) { + n = n!!.multiply(1000).plus(ts[1].number) + ts.movePositionForwardBy(2) + } + if (ts[0].hasCategory("ordinal_suffix")) { + if (allowOrdinal) { + ts.movePositionForwardBy(1) + return n!!.withOrdinal(true) + } else { + ts.position = originalPosition + return null + } + } + } + } + + return n + } + + /** + * Custom group parsing for French, handling "soixante + teen" = 70-79 + * and "quatre + vingt(s)" = 80-99 for non-hyphenated input. + */ + private fun numberGroupFr( + ts: TokenStream, + allowOrdinal: Boolean, + lastMultiplier: Double + ): Number? { + if (lastMultiplier < 1000) return null + + val originalPosition = ts.position + val groupValue = numberLessThan1000Fr(allowOrdinal) + if (groupValue != null && groupValue.isOrdinal) { + return groupValue + } + + val nextNotIgnore = if (groupValue == null) 0 + else ts.indexOfWithoutCategory("ignore", 0) + val ordinal = ts[nextNotIgnore].hasCategory("ordinal") + if (ts[nextNotIgnore].hasCategory("multiplier") && (allowOrdinal || !ordinal)) { + val multiplier = ts[nextNotIgnore].number + if (multiplier!!.lessThan(lastMultiplier)) { + ts.movePositionForwardBy(nextNotIgnore + 1) + return if (groupValue == null) { + multiplier.withOrdinal(ordinal) + } else { + multiplier.multiply(groupValue).withOrdinal(ordinal) + } + } + } else { + return groupValue + } + + ts.position = originalPosition + return null + } + + /** + * Parses a number < 1000 with French-specific handling: + * - "soixante" (60) + teen (10-19) = 70-79 + * - "quatre" (4) + "vingt/vingts" (20) = 80, optionally + digit/teen = 81-99 + */ + private fun numberLessThan1000Fr(allowOrdinal: Boolean): Number? { + var hundred: Long = -1 + var ten: Long = -1 + var digit: Long = -1 + var ordinal = false + var firstIteration = true + + while (true) { + val nextNotIgnore = if (firstIteration) { + firstIteration = false + 0 + } else { + ts.indexOfWithoutCategory("ignore", 0) + } + + if (!allowOrdinal && ts[nextNotIgnore].hasCategory("ordinal")) break + + when { + ts[nextNotIgnore].hasCategory("digit") -> { + val digitValue = ts[nextNotIgnore].number!!.integerValue() + + // French special: "quatre" (4) + "vingt(s)" (20) = 80 + if (digitValue == 4L && ten < 0 && digit < 0 && hundred < 0) { + val savedPos = ts.position + ts.movePositionForwardBy(nextNotIgnore + 1) + val nextIdx2 = ts.indexOfWithoutCategory("ignore", 0) + if (ts[nextIdx2].hasCategory("tens") + && ts[nextIdx2].number!!.integerValue() == 20L + ) { + ts.movePositionForwardBy(nextIdx2 + 1) + // Now check for additional digit or teen (81-99) + val nextIdx3 = ts.indexOfWithoutCategory("ignore", 0) + val addCat = when { + ts[nextIdx3].hasCategory("teen") -> "teen" + ts[nextIdx3].hasCategory("digit") + && !ts[nextIdx3].isNumberEqualTo(0) -> "digit" + else -> null + } + if (addCat != null) { + ts.movePositionForwardBy(nextIdx3 + 1) + return Number( + 80L + ts[-1].number!!.integerValue(), + ts[-1].hasCategory("ordinal") && allowOrdinal + ) + } + // Just 80 + ten = 80 + digit = 0 // block further digit + if (ts[-1].hasCategory("ordinal") && allowOrdinal) { + ordinal = true; break + } + continue + } + ts.position = savedPos + } + + if (digit < 0 && (!ts[nextNotIgnore].isNumberEqualTo(0) + || (ten < 0 && hundred < 0)) + ) { + digit = digitValue + } else break + } + + ts[nextNotIgnore].hasCategory("teen") -> { + if (ten < 0 && digit < 0) { + ten = ts[nextNotIgnore].number!!.integerValue() + digit = 0 + } else break + } + + ts[nextNotIgnore].hasCategory("tens") -> { + val tensValue = ts[nextNotIgnore].number!!.integerValue() + + // French special: "soixante" (60) + teen (10-19) = 70-79 + if (tensValue == 60L && ten < 0 && digit < 0) { + val savedPos = ts.position + ts.movePositionForwardBy(nextNotIgnore + 1) + val nextIdx2 = ts.indexOfWithoutCategory("ignore", 0) + if (ts[nextIdx2].hasCategory("teen")) { + val teenVal = ts[nextIdx2].number!!.integerValue() + if (teenVal >= 10) { + ts.movePositionForwardBy(nextIdx2 + 1) + ten = 60 + teenVal + digit = 0 + if (ts[-1].hasCategory("ordinal") && allowOrdinal) { + ordinal = true; break + } + continue + } + } + // Normal soixante = 60 + ts.position = savedPos + } + + if (ten < 0 && digit < 0) { + ten = tensValue + } else break + } + + ts[nextNotIgnore].hasCategory("hundred") -> { + if (hundred < 0 && ten < 0) { + if (digit < 0) { + hundred = 100 + } else if (digit == 0L) { + break + } else { + hundred = digit * 100 + digit = -1 + } + } else break + } + + NumberExtractorUtils.isRawNumber(ts[nextNotIgnore]) -> { + val rawNumber = ts[nextNotIgnore].number + if (rawNumber!!.isDecimal) break + + if (!allowOrdinal && ts[nextNotIgnore + 1].hasCategory("ordinal_suffix")) break + + when { + rawNumber.lessThan(10) -> { + if (digit < 0) digit = rawNumber.integerValue() + else break + } + rawNumber.lessThan(100) -> { + if (ten < 0 && digit < 0) { + ten = rawNumber.integerValue() + digit = 0 + } else break + } + rawNumber.lessThan(1000) -> { + if (hundred < 0 && ten < 0 && digit < 0) { + hundred = rawNumber.integerValue() + ten = 0; digit = 0 + } else break + } + else -> break + } + + ordinal = ts[nextNotIgnore + 1].hasCategory("ordinal_suffix") + if (ordinal) { + ts.movePositionForwardBy(nextNotIgnore + 2) + break + } + } + + else -> break + } + + ts.movePositionForwardBy(nextNotIgnore + 1) + if (ts[-1].hasCategory("ordinal")) { + ordinal = true; break + } + } + + return if (hundred < 0 && ten < 0 && digit < 0) null + else Number( + (if (hundred < 0) 0L else hundred) + + (if (ten < 0) 0L else ten) + + (if (digit < 0) 0L else digit), + ordinal + ) + } +} diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchParser.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchParser.kt new file mode 100644 index 00000000..ba476fcd --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchParser.kt @@ -0,0 +1,41 @@ +package org.dicio.numbers.lang.fr + +import org.dicio.numbers.parser.Parser +import org.dicio.numbers.parser.lexer.TokenStream +import org.dicio.numbers.unit.Duration +import org.dicio.numbers.unit.Number +import org.dicio.numbers.util.DurationExtractorUtils +import java.time.LocalDateTime + +class FrenchParser : Parser("config/fr-fr") { + override fun extractNumber( + tokenStream: TokenStream, + shortScale: Boolean, + preferOrdinal: Boolean, + integerOnly: Boolean + ): () -> Number? { + val numberExtractor = FrenchNumberExtractor(tokenStream) + return when { + integerOnly -> numberExtractor::numberMustBeInteger + preferOrdinal -> numberExtractor::numberPreferOrdinal + else -> numberExtractor::numberPreferFraction + } + } + + override fun extractDuration( + tokenStream: TokenStream, + shortScale: Boolean + ): () -> Duration? { + val numberExtractor = FrenchNumberExtractor(tokenStream) + return DurationExtractorUtils(tokenStream, numberExtractor::numberNoOrdinal)::duration + } + + override fun extractDateTime( + tokenStream: TokenStream, + shortScale: Boolean, + preferMonthBeforeDay: Boolean, + now: LocalDateTime + ): () -> LocalDateTime? { + return FrenchDateTimeExtractor(tokenStream, now)::dateTime + } +} diff --git a/numbers/src/main/resources/config/fr-fr/date_time.json b/numbers/src/main/resources/config/fr-fr/date_time.json new file mode 100644 index 00000000..9fe1beb5 --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/date_time.json @@ -0,0 +1,176 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1[0-6]$", "format": "{xx}"}, + "3": {"match": "^17$", "format": "dix-sept"}, + "4": {"match": "^18$", "format": "dix-huit"}, + "5": {"match": "^19$", "format": "dix-neuf"}, + "6": {"match": "^[2-6]0$", "format": "{x0}"}, + "7": {"match": "^[2-6]1$", "format": "{xx}"}, + "8": {"match": "^[2-6][2-9]$", "format": "{x0}-{x}"}, + "9": {"match": "^70$", "format": "soixante-dix"}, + "10": {"match": "^71$", "format": "soixante et onze"}, + "11": {"match": "^7[2-9]$", "format": "{xx}"}, + "12": {"match": "^80$", "format": "quatre-vingts"}, + "13": {"match": "^8[1-9]$", "format": "{xx}"}, + "14": {"match": "^90$", "format": "quatre-vingt-dix"}, + "15": {"match": "^9[1-9]$", "format": "{xx}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "cent"}, + "2": {"match": "^\\d{3}$", "format": "{x_in_x00} cent"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^1\\d{3}$", "format": "mille"}, + "2": {"match": "^\\d{4}$", "format": "{x_in_x000} mille"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d000$", "format": "{formatted_thousand} {bc}"}, + "5": {"match": "^1\\d00$", "format": "{formatted_thousand} {formatted_hundreds} {bc}"}, + "6": {"match": "^[2-9]\\d00$", "format": "{formatted_thousand} {formatted_hundreds} {bc}"}, + "7": {"match": "^10\\d{2}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "8": {"match": "^1\\d{3}$", "format": "{formatted_thousand} {formatted_hundreds} {formatted_decade} {bc}"}, + "9": {"match": "^2\\d{3}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "10": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_hundreds} {formatted_decade} {bc}"}, + "default": "{number} {bc}", + "bc": "av. J.-C." + }, + "date_format": { + "date_full": "{weekday}, le {day} {month} {formatted_year}", + "date_full_no_year": "{weekday}, le {day} {month}", + "date_full_no_year_month": "{weekday}, le {day}", + "today": "aujourd'hui", + "tomorrow": "demain", + "yesterday": "hier" + }, + "date_time_format": { + "date_time": "{formatted_date} à {formatted_time}" + }, + "weekday": { + "0": "lundi", + "1": "mardi", + "2": "mercredi", + "3": "jeudi", + "4": "vendredi", + "5": "samedi", + "6": "dimanche" + }, + "date": { + "1": "premier", + "2": "deux", + "3": "trois", + "4": "quatre", + "5": "cinq", + "6": "six", + "7": "sept", + "8": "huit", + "9": "neuf", + "10": "dix", + "11": "onze", + "12": "douze", + "13": "treize", + "14": "quatorze", + "15": "quinze", + "16": "seize", + "17": "dix-sept", + "18": "dix-huit", + "19": "dix-neuf", + "20": "vingt", + "21": "vingt et un", + "22": "vingt-deux", + "23": "vingt-trois", + "24": "vingt-quatre", + "25": "vingt-cinq", + "26": "vingt-six", + "27": "vingt-sept", + "28": "vingt-huit", + "29": "vingt-neuf", + "30": "trente", + "31": "trente et un" + }, + "month": { + "1": "janvier", + "2": "février", + "3": "mars", + "4": "avril", + "5": "mai", + "6": "juin", + "7": "juillet", + "8": "août", + "9": "septembre", + "10": "octobre", + "11": "novembre", + "12": "décembre" + }, + "number": { + "0": "zéro", + "1": "un", + "2": "deux", + "3": "trois", + "4": "quatre", + "5": "cinq", + "6": "six", + "7": "sept", + "8": "huit", + "9": "neuf", + "10": "dix", + "11": "onze", + "12": "douze", + "13": "treize", + "14": "quatorze", + "15": "quinze", + "16": "seize", + "17": "dix-sept", + "18": "dix-huit", + "19": "dix-neuf", + "20": "vingt", + "21": "vingt et un", + "30": "trente", + "31": "trente et un", + "40": "quarante", + "41": "quarante et un", + "50": "cinquante", + "51": "cinquante et un", + "60": "soixante", + "61": "soixante et un", + "70": "soixante-dix", + "71": "soixante et onze", + "72": "soixante-douze", + "73": "soixante-treize", + "74": "soixante-quatorze", + "75": "soixante-quinze", + "76": "soixante-seize", + "77": "soixante-dix-sept", + "78": "soixante-dix-huit", + "79": "soixante-dix-neuf", + "80": "quatre-vingts", + "81": "quatre-vingt-un", + "82": "quatre-vingt-deux", + "83": "quatre-vingt-trois", + "84": "quatre-vingt-quatre", + "85": "quatre-vingt-cinq", + "86": "quatre-vingt-six", + "87": "quatre-vingt-sept", + "88": "quatre-vingt-huit", + "89": "quatre-vingt-neuf", + "90": "quatre-vingt-dix", + "91": "quatre-vingt-onze", + "92": "quatre-vingt-douze", + "93": "quatre-vingt-treize", + "94": "quatre-vingt-quatorze", + "95": "quatre-vingt-quinze", + "96": "quatre-vingt-seize", + "97": "quatre-vingt-dix-sept", + "98": "quatre-vingt-dix-huit", + "99": "quatre-vingt-dix-neuf", + "100": "cent", + "1000": "mille", + "2000": "deux mille" + } +} diff --git a/numbers/src/main/resources/config/fr-fr/day.word b/numbers/src/main/resources/config/fr-fr/day.word new file mode 100644 index 00000000..3e1393b8 --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/day.word @@ -0,0 +1 @@ +jour diff --git a/numbers/src/main/resources/config/fr-fr/days.word b/numbers/src/main/resources/config/fr-fr/days.word new file mode 100644 index 00000000..6a0300aa --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/days.word @@ -0,0 +1 @@ +jours diff --git a/numbers/src/main/resources/config/fr-fr/hour.word b/numbers/src/main/resources/config/fr-fr/hour.word new file mode 100644 index 00000000..cfa09b25 --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/hour.word @@ -0,0 +1 @@ +heure diff --git a/numbers/src/main/resources/config/fr-fr/hours.word b/numbers/src/main/resources/config/fr-fr/hours.word new file mode 100644 index 00000000..5afb41b1 --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/hours.word @@ -0,0 +1 @@ +heures diff --git a/numbers/src/main/resources/config/fr-fr/minute.word b/numbers/src/main/resources/config/fr-fr/minute.word new file mode 100644 index 00000000..cfcd96c9 --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/minute.word @@ -0,0 +1 @@ +minute diff --git a/numbers/src/main/resources/config/fr-fr/minutes.word b/numbers/src/main/resources/config/fr-fr/minutes.word new file mode 100644 index 00000000..5cf0e30b --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/minutes.word @@ -0,0 +1 @@ +minutes diff --git a/numbers/src/main/resources/config/fr-fr/second.word b/numbers/src/main/resources/config/fr-fr/second.word new file mode 100644 index 00000000..110f9689 --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/second.word @@ -0,0 +1 @@ +seconde diff --git a/numbers/src/main/resources/config/fr-fr/seconds.word b/numbers/src/main/resources/config/fr-fr/seconds.word new file mode 100644 index 00000000..7eac62af --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/seconds.word @@ -0,0 +1 @@ +secondes diff --git a/numbers/src/main/resources/config/fr-fr/tokenizer.json b/numbers/src/main/resources/config/fr-fr/tokenizer.json new file mode 100644 index 00000000..e045a6a5 --- /dev/null +++ b/numbers/src/main/resources/config/fr-fr/tokenizer.json @@ -0,0 +1,755 @@ +{ + "spaces": " \t\n\f\r:;_!?<>|=()[]{}»«*~^`'\"", + "characters_as_word": "%‰#-+.,/", + "raw_number_categories": [ + "number", + "raw" + ], + "plural_endings": [ + "s" + ], + "word_matches": [ + { + "categories": [ + "ignore", + "date_time_ignore" + ], + "values": [ + "et" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "thousand_separator" + ], + "values": [ + "." + ] + }, + { + "categories": [ + "point", + "ignore", + "date_time_ignore" + ], + "values": [ + "," + ] + }, + { + "categories": [ + "point" + ], + "values": [ + "virgule", + "point" + ] + }, + { + "categories": [ + "fraction_separator" + ], + "values": [ + "sur", + "divisé" + ] + }, + { + "categories": [ + "fraction_separator", + "date_time_ignore" + ], + "values": [ + "/" + ] + }, + { + "categories": [ + "sign", + "positive" + ], + "values": [ + "positif", + "plus", + "+" + ] + }, + { + "categories": [ + "sign", + "negative" + ], + "values": [ + "négatif" + ] + }, + { + "categories": [ + "sign", + "negative", + "special_minute_before", + "date_time_ignore" + ], + "values": [ + "moins" + ] + }, + { + "categories": [ + "ignore", + "date_time_ignore", + "sign", + "negative" + ], + "values": [ + "-" + ] + }, + { + "categories": [ + "duration_separator", + "date_time_ignore" + ], + "values": [ + "de" + ] + }, + { + "categories": [ + "date_time_ignore" + ], + "values": [ + "le", + "la", + "les", + "l", + "du", + "des", + "au", + "aux" + ] + }, + { + "categories": [ + "date_time_ignore", + "pre_hour", + "pre_special_hour" + ], + "values": [ + "à" + ] + }, + { + "categories": [ + "pre_hour" + ], + "values": [ + "heure", + "heures" + ] + }, + { + "categories": [ + "date_time_ignore", + "pre_special_hour" + ], + "values": [ + "ce", + "cet", + "cette", + "ces" + ] + }, + { + "categories": [ + "yesterday" + ], + "values": [ + "hier" + ] + }, + { + "categories": [ + "today" + ], + "values": [ + "aujourd'hui", + "aujourd", + "hui" + ] + }, + { + "categories": [ + "tomorrow" + ], + "values": [ + "demain" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "post_relative_indicator", + "positive" + ], + "values": [ + "prochain", + "prochaine", + "prochains", + "prochaines", + "suivant", + "suivante", + "suivants", + "suivantes" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "positive" + ], + "values": [ + "dans", + "en" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "post_relative_indicator", + "positive" + ], + "values": [ + "après", + "apres" + ] + }, + { + "categories": [ + "post_relative_indicator", + "negative" + ], + "values": [ + "il y a", + "dernier", + "dernière", + "derniere" + ] + }, + { + "categories": [ + "pre_relative_indicator", + "post_relative_indicator", + "negative" + ], + "values": [ + "précédent", + "precedent", + "précédente", + "precedente", + "avant" + ] + }, + { + "categories": [ + "bcad_before_combined" + ], + "values": [ + "av", + "av.", + "av.j.-c.", + "av.j.c.", + "avjc" + ] + }, + { + "categories": [ + "bcad_after_combined" + ], + "values": [ + "ap", + "ap.", + "ap.j.-c.", + "ap.j.c.", + "apjc", + "ad", + "j.-c.", + "j.c." + ] + }, + { + "categories": [ + "bcad_identifier" + ], + "values": [ + "christ", + "jésus", + "jesus" + ] + }, + { + "categories": [ + "ampm_before_combined" + ], + "values": [ + "am" + ] + }, + { + "categories": [ + "ampm_after_combined" + ], + "values": [ + "pm" + ] + }, + { + "categories": [ + "special_minute_after", + "post_relative_indicator", + "negative" + ], + "values": [ + "passé", + "passe" + ] + }, + { + "categories": [ + "oclock_combined" + ], + "values": [ + "pile", + "exact", + "précis" + ] + } + ], + "number_mappings": [ + { + "categories": [ + "number", + "digit", + "digit_after_point" + ], + "values": { + "zéro": 0, + "zero": 0, + "un": 1, + "une": 1, + "deux": 2, + "trois": 3, + "quatre": 4, + "cinq": 5, + "six": 6, + "sept": 7, + "huit": 8, + "neuf": 9 + } + }, + { + "categories": [ + "number", + "teen" + ], + "values": { + "dix": 10, + "onze": 11, + "douze": 12, + "treize": 13, + "quatorze": 14, + "quinze": 15, + "seize": 16, + "dix-sept": 17, + "dix-huit": 18, + "dix-neuf": 19, + "soixante-dix": 70, + "soixante-et-onze": 71, + "soixante-onze": 71, + "soixante-douze": 72, + "soixante-treize": 73, + "soixante-quatorze": 74, + "soixante-quinze": 75, + "soixante-seize": 76, + "soixante-dix-sept": 77, + "soixante-dix-huit": 78, + "soixante-dix-neuf": 79, + "quatre-vingt-onze": 91, + "quatre-vingt-douze": 92, + "quatre-vingt-treize": 93, + "quatre-vingt-quatorze": 94, + "quatre-vingt-quinze": 95, + "quatre-vingt-seize": 96, + "quatre-vingt-dix-sept": 97, + "quatre-vingt-dix-huit": 98, + "quatre-vingt-dix-neuf": 99, + "quatre-vingt-dix": 90 + } + }, + { + "categories": [ + "number", + "tens" + ], + "values": { + "vingt": 20, + "trente": 30, + "quarante": 40, + "cinquante": 50, + "soixante": 60, + "quatre-vingts": 80, + "quatre-vingt": 80, + "quatre-vingt-un": 81, + "quatre-vingt-une": 81, + "quatre-vingt-deux": 82, + "quatre-vingt-trois": 83, + "quatre-vingt-quatre": 84, + "quatre-vingt-cinq": 85, + "quatre-vingt-six": 86, + "quatre-vingt-sept": 87, + "quatre-vingt-huit": 88, + "quatre-vingt-neuf": 89 + } + }, + { + "categories": [ + "number", + "hundred" + ], + "values": { + "cent": 100, + "cents": 100 + } + }, + { + "categories": [ + "number", + "multiplier" + ], + "values": { + "mille": 1000, + "million": 1000000, + "millions": 1000000, + "milliard": 1000000000, + "milliards": 1000000000, + "billion": 1000000000000, + "billions": 1000000000000, + "billiard": 1000000000000000, + "billiards": 1000000000000000, + "trillion": 1000000000000000000, + "trillions": 1000000000000000000 + } + }, + { + "categories": [ + "number", + "ordinal", + "digit" + ], + "values": { + "premier": 1, + "première": 1, + "premiere": 1, + "deuxième": 2, + "deuxieme": 2, + "second": 2, + "seconde": 2, + "troisième": 3, + "troisieme": 3, + "quatrième": 4, + "quatrieme": 4, + "cinquième": 5, + "cinquieme": 5, + "sixième": 6, + "sixieme": 6, + "septième": 7, + "septieme": 7, + "huitième": 8, + "huitieme": 8, + "neuvième": 9, + "neuvieme": 9 + } + }, + { + "categories": [ + "number", + "ordinal", + "teen" + ], + "values": { + "dixième": 10, + "dixieme": 10, + "onzième": 11, + "onzieme": 11, + "douzième": 12, + "douzieme": 12, + "treizième": 13, + "treizieme": 13, + "quatorzième": 14, + "quatorzieme": 14, + "quinzième": 15, + "quinzieme": 15, + "seizième": 16, + "seizieme": 16, + "dix-septième": 17, + "dix-septieme": 17, + "dix-huitième": 18, + "dix-huitieme": 18, + "dix-neuvième": 19, + "dix-neuvieme": 19 + } + }, + { + "categories": [ + "number", + "ordinal", + "tens" + ], + "values": { + "vingtième": 20, + "vingtieme": 20, + "trentième": 30, + "trentieme": 30, + "quarantième": 40, + "quarantieme": 40, + "cinquantième": 50, + "cinquantieme": 50, + "soixantième": 60, + "soixantieme": 60, + "soixante-dixième": 70, + "soixante-dixieme": 70, + "quatre-vingtième": 80, + "quatre-vingtieme": 80, + "quatre-vingt-dixième": 90, + "quatre-vingt-dixieme": 90 + } + }, + { + "categories": [ + "number", + "ordinal", + "hundred" + ], + "values": { + "centième": 100, + "centieme": 100 + } + }, + { + "categories": [ + "number", + "ordinal", + "multiplier" + ], + "values": { + "millième": 1000, + "millieme": 1000, + "millionième": 1000000, + "millionieme": 1000000, + "milliardième": 1000000000, + "milliardieme": 1000000000 + } + }, + { + "categories": [ + "number", + "suffix_multiplier" + ], + "values": { + "demi": 0.5, + "demie": 0.5, + "moitié": 0.5, + "moitie": 0.5, + "quart": 0.25, + "paire": 2, + "paires": 2, + "couple": 2, + "couples": 2, + "douzaine": 12, + "douzaines": 12, + "pourcent": 0.01, + "%": 0.01, + "‰": 0.001 + } + }, + { + "categories": [ + "month_name" + ], + "values": { + "janvier": 1, + "jan": 1, + "février": 2, + "fevrier": 2, + "fév": 2, + "fev": 2, + "mars": 3, + "avril": 4, + "avr": 4, + "mai": 5, + "juin": 6, + "jun": 6, + "juillet": 7, + "jul": 7, + "août": 8, + "aout": 8, + "aoû": 8, + "septembre": 9, + "sep": 9, + "octobre": 10, + "oct": 10, + "novembre": 11, + "nov": 11, + "décembre": 12, + "decembre": 12, + "déc": 12, + "dec": 12 + } + }, + { + "categories": [ + "day_of_week" + ], + "values": { + "lundi": 0, + "lun": 0, + "mardi": 1, + "mar": 1, + "mercredi": 2, + "mer": 2, + "jeudi": 3, + "jeu": 3, + "vendredi": 4, + "ven": 4, + "samedi": 5, + "sam": 5, + "dimanche": 6, + "dim": 6 + } + }, + { + "categories": [ + "noon_midnight_like", + "moment_of_day" + ], + "values": { + "midi": 12, + "minuit": 0 + } + }, + { + "categories": [ + "moment_of_day" + ], + "values": { + "nuit": 3, + "nuits": 3, + "aube": 6, + "aurore": 6, + "matin": 9, + "matins": 9, + "matinée": 9, + "matinee": 9, + "déjeuner": 12, + "dejeuner": 12, + "après-midi": 15, + "apres-midi": 15, + "coucher": 18, + "crépuscule": 18, + "crepuscule": 18, + "dîner": 20, + "diner": 20, + "soirée": 21, + "soiree": 21, + "soir": 21, + "soirs": 21, + "ce soir": 23, + "nuitée": 23, + "nuitee": 23 + } + } + ], + "duration_words": { + "1 NANOS": [ + "nanoseconde", + "nanosecondes", + "ns" + ], + "1 MICROS": [ + "microseconde", + "microsecondes", + "μs" + ], + "1 MILLIS": [ + "milliseconde", + "millisecondes", + "ms" + ], + "1 SECONDS": [ + "seconde", + "secondes", + "s", + "sec" + ], + "1 MINUTES": [ + "minute", + "minutes", + "m", + "min" + ], + "1 HOURS": [ + "heure", + "heures", + "h" + ], + "1 DAYS": [ + "jour", + "jours", + "j" + ], + "1 WEEKS": [ + "semaine", + "semaines" + ], + "1 MONTHS": [ + "mois" + ], + "1 YEARS": [ + "an", + "ans", + "année", + "années", + "annee", + "annees" + ], + "1 DECADES": [ + "décennie", + "decennie", + "décennies", + "decennies" + ], + "1 CENTURIES": [ + "siècle", + "siecle", + "siècles", + "siecles" + ], + "1 MILLENNIA": [ + "millénaire", + "millenaire", + "millénaires", + "millenaires" + ] + }, + "duration_restrict_after_number": [ + "ns", + "μs", + "ms", + "s", + "m", + "h", + "j" + ] +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeConfigTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeConfigTest.java new file mode 100644 index 00000000..eaaf9ac0 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeConfigTest.java @@ -0,0 +1,10 @@ +package org.dicio.numbers.lang.fr; + +import org.dicio.numbers.test.DateTimeConfigTestBase; + +public class DateTimeConfigTest extends DateTimeConfigTestBase { + @Override + public String configFolder() { + return "config/fr-fr"; + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeExtractorUtilsTest.java new file mode 100644 index 00000000..93659e62 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeExtractorUtilsTest.java @@ -0,0 +1,55 @@ +package org.dicio.numbers.lang.fr; + +import static org.dicio.numbers.test.TestUtils.t; +import static org.dicio.numbers.util.NumberExtractorUtils.signBeforeNumber; +import static java.time.temporal.ChronoUnit.MONTHS; + +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.DateTimeExtractorUtilsTestBase; +import org.dicio.numbers.util.DateTimeExtractorUtils; +import org.dicio.numbers.util.NumberExtractorUtils; +import org.junit.Test; + +import java.time.LocalDateTime; + +public class DateTimeExtractorUtilsTest extends DateTimeExtractorUtilsTestBase { + + // Tuesday the 10th of May, 2022, 19:38:36 + private static final LocalDateTime NOW = LocalDateTime.of(2022, 5, 10, 19, 38, 36, 295834726); + + @Override + public String configFolder() { + return "config/fr-fr"; + } + + @Override + public DateTimeExtractorUtils build(final TokenStream ts) { + final FrenchNumberExtractor numberExtractor = new FrenchNumberExtractor(ts); + return new DateTimeExtractorUtils(ts, NOW, (fromInclusive, toInclusive) -> + NumberExtractorUtils.extractOneIntegerInRange(ts, fromInclusive, toInclusive, + () -> signBeforeNumber(ts, () -> numberExtractor.numberInteger(false))) + ); + } + + @Test + public void testRelativeMonthDuration() { + assertRelativeMonthDuration("septembre prochain", t(4, MONTHS), 2); + assertRelativeMonthDuration("avril prochain", t(11, MONTHS), 2); + assertRelativeMonthDuration("avril dernier", t(-1, MONTHS), 2); + assertRelativeMonthDuration("en janvier", t(8, MONTHS), 2); + } + + @Test + public void testRelativeMonthDurationNull() { + assertRelativeMonthDurationNull("bonjour comment va"); + assertRelativeMonthDurationNull("octobre"); + assertRelativeMonthDurationNull("dans deux mois"); + } + + @Test + public void testRelativeDayOfWeekDuration() { + assertRelativeDayOfWeekDuration("jeudi prochain", 3, 2); + assertRelativeDayOfWeekDuration("jeudi dernier", -4, 2); + assertRelativeDayOfWeekDuration("lundi prochain", -6, 2); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeTest.java new file mode 100644 index 00000000..4deec17e --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/DateTimeTest.java @@ -0,0 +1,36 @@ +package org.dicio.numbers.lang.fr; + +import org.dicio.numbers.formatter.Formatter; +import org.dicio.numbers.test.DateTimeTestBase; +import org.junit.Test; + +import java.time.LocalDate; + +import static org.junit.Assert.assertEquals; + +public class DateTimeTest extends DateTimeTestBase { + + @Override + public String configFolder() { + return "config/fr-fr"; + } + + @Override + public Formatter buildNumberFormatter() { + return new FrenchFormatter(); + } + + @Test + public void testNiceDate() { + assertEquals("mercredi, le vingt-huit avril deux mille vingt et un", + pf.niceDate(LocalDate.of(2021, 4, 28)).get()); + assertEquals("dimanche, le treize août", + pf.niceDate(LocalDate.of(-84, 8, 13)).now(LocalDate.of(-84, 8, 23)).get()); + assertEquals("hier", + pf.niceDate(LocalDate.of(2021, 4, 27)).now(LocalDate.of(2021, 4, 28)).get()); + assertEquals("aujourd'hui", + pf.niceDate(LocalDate.of(2021, 4, 28)).now(LocalDate.of(2021, 4, 28)).get()); + assertEquals("demain", + pf.niceDate(LocalDate.of(2021, 4, 29)).now(LocalDate.of(2021, 4, 28)).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/DurationExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/DurationExtractorUtilsTest.java new file mode 100644 index 00000000..381189cf --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/DurationExtractorUtilsTest.java @@ -0,0 +1,58 @@ +package org.dicio.numbers.lang.fr; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.HOUR; +import static org.dicio.numbers.test.TestUtils.MILLIS; +import static org.dicio.numbers.test.TestUtils.MINUTE; +import static org.dicio.numbers.test.TestUtils.MONTH; +import static org.dicio.numbers.test.TestUtils.WEEK; +import static org.dicio.numbers.test.TestUtils.YEAR; +import static org.dicio.numbers.test.TestUtils.t; + +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.DurationExtractorUtilsTestBase; +import org.dicio.numbers.unit.Duration; +import org.dicio.numbers.util.DurationExtractorUtils; +import org.junit.Test; + +public class DurationExtractorUtilsTest extends DurationExtractorUtilsTestBase { + + @Override + public String configFolder() { + return "config/fr-fr"; + } + + @Override + public Duration extractDuration(final TokenStream ts, final boolean shortScale) { + final FrenchNumberExtractor numberExtractor = new FrenchNumberExtractor(ts); + return new DurationExtractorUtils(ts, numberExtractor::numberNoOrdinal).duration(); + } + + private void assertDuration(final String s, final java.time.Duration duration) { + assertDuration(s, true, duration); + } + + private void assertNoDuration(final String s) { + assertNoDuration(s, true); + } + + @Test + public void testDurationNumberAndUnit() { + assertDuration("18s", t(18)); + assertDuration("une seconde", t(1)); + assertDuration("cinquante-neuf minutes", t(59 * MINUTE)); + assertDuration("vingt-trois heures", t(23 * HOUR)); + assertDuration("cinq jours", t(5 * DAY)); + assertDuration("dix semaines", t(10 * WEEK)); + assertDuration("six mois", t(6 * MONTH)); + assertDuration("trois ans", t(3 * YEAR)); + assertDuration("cinq millisecondes", t(0, 5 * MILLIS)); + } + + @Test + public void testNoDuration() { + assertNoDuration("bonjour"); + assertNoDuration(""); + assertNoDuration("mois"); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDateTimeTest.java new file mode 100644 index 00000000..1db48ccc --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDateTimeTest.java @@ -0,0 +1,36 @@ +package org.dicio.numbers.lang.fr; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.time.LocalDateTime; + +public class ExtractDateTimeTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new FrenchFormatter(), new FrenchParser()); + } + + private static final LocalDateTime NOW = LocalDateTime.of(2022, 5, 10, 19, 38, 36); + + @Test + public void testExtractDateTime() { + assertEquals(LocalDateTime.of(2021, 4, 28, 19, 38, 36), + pf.extractDateTime("le vingt-huit avril deux mille vingt et un") + .now(NOW).get()); + assertEquals(LocalDateTime.of(2022, 5, 10, 14, 0, 0), + pf.extractDateTime("à quatorze heures").now(NOW).get()); + } + + @Test + public void testExtractDateTimeNull() { + assertNull(pf.extractDateTime("bonjour le monde").now(NOW).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDurationTest.java new file mode 100644 index 00000000..1ecc846f --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDurationTest.java @@ -0,0 +1,40 @@ +package org.dicio.numbers.lang.fr; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.HOUR; +import static org.dicio.numbers.test.TestUtils.MINUTE; +import static org.dicio.numbers.test.TestUtils.YEAR; +import static org.dicio.numbers.test.TestUtils.t; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +public class ExtractDurationTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new FrenchFormatter(), new FrenchParser()); + } + + @Test + public void testExtractDuration() { + assertEquals(t(2 * MINUTE + 30), + pf.extractDuration("un minuteur de deux minutes et trente secondes").get()); + assertEquals(t(2 * YEAR), + pf.extractDuration("il y a deux ans").get()); + assertEquals(t(23 * HOUR), + pf.extractDuration("vingt-trois heures").get()); + assertEquals(t(5 * DAY), + pf.extractDuration("cinq jours").get()); + } + + @Test + public void testExtractDurationNull() { + assertNull(pf.extractDuration("bonjour le monde").get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java new file mode 100644 index 00000000..a7e1d99f --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java @@ -0,0 +1,76 @@ +package org.dicio.numbers.lang.fr; + +import org.dicio.numbers.parser.lexer.TokenStream; +import org.dicio.numbers.test.WithTokenizerTestBase; +import org.dicio.numbers.unit.Number; +import org.junit.Test; + +import java.util.function.Function; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.n; +import static org.dicio.numbers.test.TestUtils.numberDeduceType; +import static org.junit.Assert.assertEquals; + +public class ExtractNumbersTest extends WithTokenizerTestBase { + + @Override + public String configFolder() { + return "config/fr-fr"; + } + + private void assertNumberFunction(final String s, + final Number value, + final int finalTokenStreamPosition, + final Function numberFunction) { + final TokenStream ts = new TokenStream(tokenizer.tokenize(s)); + final Number number = numberFunction.apply(new FrenchNumberExtractor(ts)); + assertEquals("wrong value for string " + s, value, number); + assertEquals("wrong final token position for number " + value, + finalTokenStreamPosition, ts.position); + } + + private void assertNumberFunctionNull(final String s, + final Function numberFunction) { + assertNumberFunction(s, null, 0, numberFunction); + } + + private void assertNumberInteger(final String s, final boolean allowOrdinal, + final double value, final boolean isOrdinal, + final int finalTokenStreamPosition) { + assertNumberFunction(s, numberDeduceType(value).withOrdinal(isOrdinal), + finalTokenStreamPosition, (enp) -> enp.numberInteger(allowOrdinal)); + } + + private void assertNumberIntegerNull(final String s, final boolean allowOrdinal) { + assertNumberFunctionNull(s, (enp) -> enp.numberInteger(allowOrdinal)); + } + + @Test + public void testNumberInteger() { + assertNumberInteger("deux mille vingt et un", F, 2021, F, 5); + assertNumberInteger("cent soixante-quatre", F, 164, F, 2); + assertNumberInteger("neuf cent dix", T, 910, F, 3); + assertNumberInteger("soixante-dix", F, 70, F, 1); + assertNumberInteger("quatre-vingt-dix-neuf", F, 99, F, 1); + assertNumberInteger("soixante et onze", F, 71, F, 3); + assertNumberInteger("quatre-vingts", F, 80, F, 1); + assertNumberInteger("quatre-vingt-trois", F, 83, F, 1); + assertNumberInteger("premier", T, 1, T, 1); + assertNumberIntegerNull("premier", F); + assertNumberIntegerNull("bonjour", F); + } + + @Test + public void testSpecialFrenchNumbers() { + // Test soixante + teen = 70-79 + assertNumberInteger("soixante dix", F, 70, F, 2); + assertNumberInteger("soixante onze", F, 71, F, 2); + assertNumberInteger("soixante douze", F, 72, F, 2); + // Test quatre + vingt = 80 + assertNumberInteger("quatre vingts", F, 80, F, 2); + assertNumberInteger("quatre vingt un", F, 81, F, 3); + assertNumberInteger("quatre vingt dix", F, 90, F, 3); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceDurationTest.java new file mode 100644 index 00000000..ab1df0cf --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceDurationTest.java @@ -0,0 +1,46 @@ +package org.dicio.numbers.lang.fr; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; + +import org.dicio.numbers.formatter.Formatter; +import org.dicio.numbers.test.NiceDurationTestBase; +import org.junit.Test; + +public class NiceDurationTest extends NiceDurationTestBase { + + @Override + public Formatter buildNumberFormatter() { + return new FrenchFormatter(); + } + + @Test + public void zero() { + assertDuration("zéro secondes", T, 0, 0, 0, 0); + assertDuration("0:00", F, 0, 0, 0, 0); + } + + @Test + public void speechOne() { + // Note: pronounceNumberDuration(1) returns "un" in French (regardless of gender) + assertDuration("un seconde", T, 0, 0, 0, 1); + assertDuration("un minute", T, 0, 0, 1, 0); + assertDuration("un heure", T, 0, 1, 0, 0); + assertDuration("un jour", T, 1, 0, 0, 0); + } + + @Test + public void speechMultiple() { + assertDuration("deux secondes", T, 0, 0, 0, 2); + assertDuration("cinq minutes", T, 0, 0, 5, 0); + assertDuration("trois heures", T, 0, 3, 0, 0); + assertDuration("dix jours", T, 10, 0, 0, 0); + } + + @Test + public void noSpeech() { + assertDuration("0:01", F, 0, 0, 0, 1); + assertDuration("1:00", F, 0, 0, 1, 0); + assertDuration("1:00:00", F, 0, 1, 0, 0); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceNumberTest.java new file mode 100644 index 00000000..8978d601 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceNumberTest.java @@ -0,0 +1,36 @@ +package org.dicio.numbers.lang.fr; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.junit.Assert.assertEquals; + +public class NiceNumberTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new FrenchFormatter(), null); + } + + @Test + public void speech() { + assertEquals("trente-quatre et un demi", pf.niceNumber(34.5).get()); + assertEquals("quatre cents soixante-cinq", pf.niceNumber(465).get()); + assertEquals("moins quatre-vingt-onze", pf.niceNumber(-91).get()); + assertEquals("zéro", pf.niceNumber(0).get()); + } + + @Test + public void noSpeech() { + assertEquals("34 1/2", pf.niceNumber(34.5).speech(F).get()); + assertEquals("-18 3/5", pf.niceNumber(-18.6).speech(F).get()); + assertEquals("465", pf.niceNumber(465).speech(F).get()); + assertEquals("-91", pf.niceNumber(-91).speech(F).get()); + assertEquals("0", pf.niceNumber(0).speech(F).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceTimeTest.java new file mode 100644 index 00000000..d28d9e02 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceTimeTest.java @@ -0,0 +1,83 @@ +package org.dicio.numbers.lang.fr; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.time.LocalTime; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.junit.Assert.assertEquals; + +public class NiceTimeTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new FrenchFormatter(), null); + } + + @Test + public void random() { + final LocalTime dt = LocalTime.of(13, 22, 3); + assertEquals("un heure vingt-deux", pf.niceTime(dt).get()); + assertEquals("un heure vingt-deux de l'après-midi", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("treize heures vingt-deux", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("treize heures vingt-deux", pf.niceTime(dt).use24Hour(T).showAmPm(T).get()); + assertEquals("1:22", pf.niceTime(dt).speech(F).get()); + assertEquals("1:22 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("13:22", pf.niceTime(dt).speech(F).use24Hour(T).get()); + assertEquals("13:22", pf.niceTime(dt).speech(F).use24Hour(T).showAmPm(T).get()); + } + + @Test + public void oClock() { + final LocalTime dt = LocalTime.of(15, 0, 32); + assertEquals("quinze heures", pf.niceTime(dt).get()); + assertEquals("quinze heures de l'après-midi", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("quinze heures", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("3:00", pf.niceTime(dt).speech(F).get()); + assertEquals("3:00 PM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("15:00", pf.niceTime(dt).speech(F).use24Hour(T).get()); + } + + @Test + public void noon() { + final LocalTime dt = LocalTime.of(12, 0, 0); + assertEquals("midi", pf.niceTime(dt).get()); + assertEquals("midi", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("midi", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("12:00", pf.niceTime(dt).speech(F).get()); + } + + @Test + public void midnight() { + final LocalTime dt = LocalTime.of(0, 0, 0); + assertEquals("minuit", pf.niceTime(dt).get()); + assertEquals("minuit", pf.niceTime(dt).showAmPm(T).get()); + assertEquals("minuit", pf.niceTime(dt).use24Hour(T).get()); + assertEquals("12:00", pf.niceTime(dt).speech(F).get()); + assertEquals("12:00 AM", pf.niceTime(dt).speech(F).showAmPm(T).get()); + assertEquals("00:00", pf.niceTime(dt).speech(F).use24Hour(T).get()); + } + + @Test + public void quarterPast() { + final LocalTime dt = LocalTime.of(9, 15, 0); + assertEquals("neuf heures et quart", pf.niceTime(dt).get()); + } + + @Test + public void halfPast() { + final LocalTime dt = LocalTime.of(9, 30, 0); + assertEquals("neuf heures et demie", pf.niceTime(dt).get()); + } + + @Test + public void quarterTo() { + final LocalTime dt = LocalTime.of(9, 45, 0); + assertEquals("dix heures moins le quart", pf.niceTime(dt).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/NumberExtractorUtilsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/NumberExtractorUtilsTest.java new file mode 100644 index 00000000..7e21a502 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/NumberExtractorUtilsTest.java @@ -0,0 +1,36 @@ +package org.dicio.numbers.lang.fr; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; + +import org.dicio.numbers.test.NumberExtractorUtilsTestBase; +import org.junit.Test; + +public class NumberExtractorUtilsTest extends NumberExtractorUtilsTestBase { + + @Override + public String configFolder() { + return "config/fr-fr"; + } + + @Test + public void testNumberLessThan1000() { + assertNumberLessThan1000("zéro", T, 0, F, 1); + assertNumberLessThan1000("un", F, 1, F, 1); + assertNumberLessThan1000("cinq", T, 5, F, 1); + assertNumberLessThan1000("dix-neuf", F, 19, F, 1); + assertNumberLessThan1000("cent", T, 100, F, 1); + assertNumberLessThan1000("trois cent", T, 300, F, 2); + assertNumberLessThan1000("vingt-deux", T, 22, F, 1); + assertNumberLessThan1000("soixante-dix", T, 70, F, 1); + assertNumberLessThan1000("quatre-vingts", T, 80, F, 1); + assertNumberLessThan1000("quatre-vingt-dix", T, 90, F, 1); + assertNumberLessThan1000("quatre-vingt-onze", T, 91, F, 1); + } + + @Test + public void testNumberLessThan1000Null() { + assertNumberLessThan1000Null("bonjour", T); + assertNumberLessThan1000Null("", F); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java new file mode 100644 index 00000000..e864353f --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java @@ -0,0 +1,117 @@ +package org.dicio.numbers.lang.fr; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.HOUR; +import static org.dicio.numbers.test.TestUtils.MINUTE; +import static org.dicio.numbers.test.TestUtils.T; +import static org.dicio.numbers.test.TestUtils.YEAR; +import static org.dicio.numbers.test.TestUtils.n; +import static org.dicio.numbers.test.TestUtils.t; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.parser.Parser; +import org.dicio.numbers.parser.param.ExtractNumberParams; +import org.dicio.numbers.parser.param.ParserParamsTestBase; +import org.dicio.numbers.unit.Number; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +public class ParserParamsTest extends ParserParamsTestBase { + + @Override + protected Parser numberParser() { + return new FrenchParser(); + } + + protected void assertNumberFirst(final String s, final boolean preferOrdinal, + final boolean integerOnly, final Number expectedResult) { + assertNumberFirst(s, true, preferOrdinal, integerOnly, expectedResult); + } + + protected void assertNumberMixedWithText(final String s, final boolean preferOrdinal, + final boolean integerOnly, final Object... expectedResults) { + assertNumberMixedWithText(s, true, preferOrdinal, integerOnly, expectedResults); + } + + protected void assertDurationFirst(final String s, final java.time.Duration expectedResult) { + assertDurationFirst(s, true, expectedResult); + } + + private static String longNumberMixedWithText; + private static int partsOfLongNumberMixedWithText; + + @BeforeClass + public static void setupLongNumberMixedWithText() { + final ParserFormatter npf = new ParserFormatter(new FrenchFormatter(), null); + final List strings = new ArrayList<>(); + for (int i = 0; i < 1100000000;) { + if (i < 2200) { + ++i; + } else if (i < 1000000) { + i += 1207; + } else { + i += 299527; + } + + final double num = (i % 4 == 0) ? (1.0 / i) : i; + strings.add(npf.pronounceNumber(num).places(0).get()); + strings.add(npf.pronounceNumber(num).places(0).ordinal(T).get()); + strings.add(npf.niceNumber(num).speech(false).get()); + strings.add(npf.niceNumber(num).speech(true).get()); + strings.add(String.valueOf(num)); + strings.add(i % 2 == 0 ? " bonjour " : " de "); + strings.add(i % 2 == 0 ? "invalide" : "un centième"); + strings.add(i % 2 == 0 ? " et " : " un "); + strings.add(i % 2 == 0 ? "," : " ; "); + strings.add("-++-+--+-+-"); + strings.add(i % 2 == 0 ? " plus " : " moins "); + } + Collections.shuffle(strings, new Random(42)); + partsOfLongNumberMixedWithText = strings.size(); + longNumberMixedWithText = String.join("", strings); + } + + @Test + public void testNumberFirst() { + assertNumberFirst("un bonjour", F, F, n(1, F)); + assertNumberFirst("dix-neuf euros", F, F, n(19, F)); + assertNumberFirst("premier résultat", T, F, n(1, T)); + assertNumberFirst("premier résultat", F, F, n(1, T)); + assertNumberFirst("inconnu", F, F, null); + } + + @Test + public void testNumberMixedWithText() { + assertNumberMixedWithText("vingt et un et cent soixante-quatre", F, F, + n(21, F), " et ", n(164, F)); + assertNumberMixedWithText("bonjour trois monde", F, F, "bonjour ", n(3, F), " monde"); + } + + @Test + public void testDurationFirst() { + assertDurationFirst("un minuteur de deux minutes et trente secondes test", + t(2 * MINUTE + 30)); + assertDurationFirst("il y a deux ans", t(2 * YEAR)); + } + + @Test(timeout = 5000) + public void testNumberMixedWithTextPerformance() { + assertEquals(73667, partsOfLongNumberMixedWithText); + + for (int i = 0; i < (1 << 2); ++i) { + final List objects = new ExtractNumberParams(numberParser(), longNumberMixedWithText) + .integerOnly(i % 2 == 1).preferOrdinal((i / 2) % 2 == 1) + .parseMixedWithText(); + assertTrue(objects.size() / ((double) partsOfLongNumberMixedWithText) > 0.8); + } + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/PronounceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/PronounceNumberTest.java new file mode 100644 index 00000000..e7face6b --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/PronounceNumberTest.java @@ -0,0 +1,103 @@ +package org.dicio.numbers.lang.fr; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; +import static org.junit.Assert.assertEquals; + +public class PronounceNumberTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new FrenchFormatter(), null); + } + + @Test + public void smallIntegers() { + assertEquals("zéro", pf.pronounceNumber(0).get()); + assertEquals("un", pf.pronounceNumber(1).get()); + assertEquals("dix", pf.pronounceNumber(10).get()); + assertEquals("quinze", pf.pronounceNumber(15).get()); + assertEquals("vingt", pf.pronounceNumber(20).get()); + assertEquals("vingt-sept", pf.pronounceNumber(27).get()); + assertEquals("trente", pf.pronounceNumber(30).get()); + assertEquals("trente-trois", pf.pronounceNumber(33).get()); + } + + @Test + public void negativeSmallIntegers() { + assertEquals("moins un", pf.pronounceNumber(-1).get()); + assertEquals("moins dix", pf.pronounceNumber(-10).get()); + assertEquals("moins quinze", pf.pronounceNumber(-15).get()); + assertEquals("moins vingt", pf.pronounceNumber(-20).get()); + assertEquals("moins vingt-sept", pf.pronounceNumber(-27).get()); + assertEquals("moins trente", pf.pronounceNumber(-30).get()); + assertEquals("moins trente-trois", pf.pronounceNumber(-33).get()); + } + + @Test + public void decimals() { + assertEquals("zéro virgule zéro cinq", pf.pronounceNumber(0.05).get()); + assertEquals("moins zéro virgule zéro cinq", pf.pronounceNumber(-0.05).get()); + assertEquals("un virgule deux trois", pf.pronounceNumber(1.234).get()); + } + + @Test + public void specialFrenchNumbers() { + assertEquals("soixante-dix", pf.pronounceNumber(70).get()); + assertEquals("soixante et onze", pf.pronounceNumber(71).get()); + assertEquals("soixante-douze", pf.pronounceNumber(72).get()); + assertEquals("soixante-dix-neuf", pf.pronounceNumber(79).get()); + assertEquals("quatre-vingts", pf.pronounceNumber(80).get()); + assertEquals("quatre-vingt-un", pf.pronounceNumber(81).get()); + assertEquals("quatre-vingt-dix", pf.pronounceNumber(90).get()); + assertEquals("quatre-vingt-onze", pf.pronounceNumber(91).get()); + assertEquals("quatre-vingt-dix-neuf", pf.pronounceNumber(99).get()); + } + + @Test + public void hundreds() { + assertEquals("cent", pf.pronounceNumber(100).get()); + assertEquals("deux cents", pf.pronounceNumber(200).get()); + assertEquals("deux cent un", pf.pronounceNumber(201).get()); + assertEquals("cinq cent soixante-dix-huit", pf.pronounceNumber(578).get()); + } + + @Test + public void thousands() { + assertEquals("mille", pf.pronounceNumber(1000).get()); + assertEquals("deux mille", pf.pronounceNumber(2000).get()); + assertEquals("mille un", pf.pronounceNumber(1001).get()); + assertEquals("deux mille vingt et un", pf.pronounceNumber(2021).get()); + } + + @Test + public void ordinals() { + assertEquals("premier", pf.pronounceNumber(1).ordinal(T).get()); + assertEquals("deuxième", pf.pronounceNumber(2).ordinal(T).get()); + assertEquals("troisième", pf.pronounceNumber(3).ordinal(T).get()); + assertEquals("cinquième", pf.pronounceNumber(5).ordinal(T).get()); + assertEquals("neuvième", pf.pronounceNumber(9).ordinal(T).get()); + assertEquals("dixième", pf.pronounceNumber(10).ordinal(T).get()); + } + + @Test + public void specialValues() { + assertEquals("infini", pf.pronounceNumber(Double.POSITIVE_INFINITY).get()); + assertEquals("moins infini", pf.pronounceNumber(Double.NEGATIVE_INFINITY).get()); + assertEquals("pas un nombre", pf.pronounceNumber(Double.NaN).get()); + } + + @Test + public void millions() { + assertEquals("un million", pf.pronounceNumber(1000000).get()); + assertEquals("deux millions", pf.pronounceNumber(2000000).get()); + assertEquals("un milliard", pf.pronounceNumber(1000000000).get()); + assertEquals("deux milliards", pf.pronounceNumber(2000000000).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/TokenizerConfigTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/TokenizerConfigTest.java new file mode 100644 index 00000000..b7b62f8e --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/TokenizerConfigTest.java @@ -0,0 +1,11 @@ +package org.dicio.numbers.lang.fr; + +import org.dicio.numbers.test.TokenizerConfigTestBase; + +public class TokenizerConfigTest extends TokenizerConfigTestBase { + + @Override + public String configFolder() { + return "config/fr-fr"; + } +} diff --git a/numbers/src/test/resources/config/fr-fr/date_time_test.json b/numbers/src/test/resources/config/fr-fr/date_time_test.json new file mode 100644 index 00000000..fdfb59bf --- /dev/null +++ b/numbers/src/test/resources/config/fr-fr/date_time_test.json @@ -0,0 +1,42 @@ +{ "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "un av. J.-C." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "dix av. J.-C." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "quatre-vingt-douze av. J.-C." }, + "4": {"datetime_param": "100, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "cent" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "huit cent onze" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "quatre cent cinquante-quatre" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille cinq" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille douze" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille quarante-six" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille huit cent sept" }, + "11": {"datetime_param": "1700, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille sept cent" }, + "12": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille sept cent dix-sept" }, + "13": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille neuf cent quatre-vingt-huit"}, + "14": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux mille neuf"}, + "15": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux mille dix-huit"}, + "16": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux mille vingt et un"}, + "17": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux mille trente"}, + "18": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deux mille cent" }, + "19": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille" }, + "20": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux mille" }, + "21": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "trois mille cent vingt av. J.-C." }, + "22": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "trois mille deux cent quarante et un av. J.-C." }, + "23": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "cinq mille deux cent" }, + "24": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille cent" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "mardi, le trente et un janvier deux mille dix-sept"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "dimanche, le quatre février deux mille dix-huit"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "dimanche, le quatre février"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "dimanche, le quatre"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "demain"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "aujourd'hui"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "hier"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "dimanche, le quatre février"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "dimanche, le quatre février deux mille dix-huit"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "mardi, le trente et un janvier deux mille dix-sept à un heure vingt-deux de l'après-midi"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "mardi, le trente et un janvier deux mille dix-sept à treize heures vingt-deux"} + } +} diff --git a/settings.gradle.kts b/settings.gradle.kts index d6e14ff6..7d5242b6 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -1,9 +1,14 @@ pluginManagement { repositories { + gradlePluginPortal() mavenCentral() } } +plugins { + id("org.gradle.toolchains.foojay-resolver-convention") version "0.8.0" +} + dependencyResolutionManagement { repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS) repositories { From 50b32fd4139db982bc38e09a07b17ed6607cf5b3 Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:10:59 +0200 Subject: [PATCH 02/14] Add French (fr-fr) language support (some bug fixes) --- .../dicio/numbers/lang/fr/FrenchFormatter.kt | 6 +++--- .../numbers/lang/fr/FrenchNumberExtractor.kt | 8 ++++---- .../parser/param/ExtractDateTimeParams.kt | 4 ++++ .../parser/param/ExtractDurationParams.kt | 4 ++++ .../numbers/util/DateTimeExtractorUtils.kt | 18 ++++++++++++++++++ 5 files changed, 33 insertions(+), 7 deletions(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchFormatter.kt index 190ca606..83b0fa3a 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchFormatter.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchFormatter.kt @@ -18,13 +18,13 @@ class FrenchFormatter : Formatter("config/fr-fr") { } val denominatorString = when (mixedFraction.denominator) { - 2 -> if (mixedFraction.numerator == 1L) "demi" else "demis" - 4 -> if (mixedFraction.numerator == 1L) "quart" else "quarts" + 2 -> if (mixedFraction.numerator == 1) "demi" else "demis" + 4 -> if (mixedFraction.numerator == 1) "quart" else "quarts" else -> { val base = pronounceNumber( mixedFraction.denominator.toDouble(), 0, true, false, true ) - if (mixedFraction.numerator == 1L) base else base + "s" + if (mixedFraction.numerator == 1) base else base + "s" } } diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt index aba634b9..d0b4434b 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt @@ -70,7 +70,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { ) { return numberToEdit.divide((1 / multiplier.decimalValue()).toLong()) } - return numberToEdit.multiply(multiplier) + return numberToEdit.multiply(multiplier!!) } } else if (denominator.isOrdinal && denominator.moreThan(2)) { return numberToEdit.divide(denominator) @@ -126,7 +126,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { var magnitude = 0.1 if (ts[0].value.length > 1 && NumberExtractorUtils.isRawNumber(ts[0])) { for (i in 0 until ts[0].value.length) { - n = n.plus((ts[0].value[i].code - '0'.code) * magnitude) + n = n!!.plus((ts[0].value[i].code - '0'.code) * magnitude) magnitude /= 10.0 } ts.movePositionForwardBy(1) @@ -136,7 +136,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { || (ts[0].value.length == 1 && NumberExtractorUtils.isRawNumber(ts[0]) && !ts[1].hasCategory("ordinal_suffix")) ) { - n = n.plus(ts[0].number!!.multiply(magnitude)) + n = n!!.plus(ts[0].number!!.multiply(magnitude)) magnitude /= 10.0 } else { break @@ -154,7 +154,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { if (denominator == null) { ts.movePositionForwardBy(-separatorLength) } else { - return n.divide(denominator) + return n!!.divide(denominator) } } diff --git a/numbers/src/main/java/org/dicio/numbers/parser/param/ExtractDateTimeParams.kt b/numbers/src/main/java/org/dicio/numbers/parser/param/ExtractDateTimeParams.kt index 74fb1c8f..f204a4ac 100644 --- a/numbers/src/main/java/org/dicio/numbers/parser/param/ExtractDateTimeParams.kt +++ b/numbers/src/main/java/org/dicio/numbers/parser/param/ExtractDateTimeParams.kt @@ -45,6 +45,10 @@ class ExtractDateTimeParams(parser: Parser, utterance: String) : return this } + fun get(): LocalDateTime? { + return parseFirst() + } + override fun getExtractorAtCurrentPosition(tokenStream: TokenStream): () -> LocalDateTime? { return parser.extractDateTime(tokenStream, shortScale, preferMonthBeforeDay, now) } diff --git a/numbers/src/main/java/org/dicio/numbers/parser/param/ExtractDurationParams.kt b/numbers/src/main/java/org/dicio/numbers/parser/param/ExtractDurationParams.kt index ac82e990..fdef8a56 100644 --- a/numbers/src/main/java/org/dicio/numbers/parser/param/ExtractDurationParams.kt +++ b/numbers/src/main/java/org/dicio/numbers/parser/param/ExtractDurationParams.kt @@ -22,6 +22,10 @@ class ExtractDurationParams(parser: Parser, utterance: String) : return this } + fun get(): Duration? { + return parseFirst() + } + override fun getExtractorAtCurrentPosition(tokenStream: TokenStream): () -> Duration? { return parser.extractDuration(tokenStream, shortScale) } diff --git a/numbers/src/main/java/org/dicio/numbers/util/DateTimeExtractorUtils.kt b/numbers/src/main/java/org/dicio/numbers/util/DateTimeExtractorUtils.kt index a25f4e42..b82ad43f 100644 --- a/numbers/src/main/java/org/dicio/numbers/util/DateTimeExtractorUtils.kt +++ b/numbers/src/main/java/org/dicio/numbers/util/DateTimeExtractorUtils.kt @@ -111,6 +111,24 @@ class DateTimeExtractorUtils( } } + fun relativeYesterday(): Int? { + if (ts[0].hasCategory("yesterday")) { + ts.movePositionForwardBy(1) + return -1 + } else { + return null + } + } + + fun relativeTomorrow(): Int? { + if (ts[0].hasCategory("tomorrow")) { + ts.movePositionForwardBy(1) + return 1 + } else { + return null + } + } + fun relativeDayOfWeekDuration(): Int? { return relativeIndicatorDuration({ var number = extractIntegerInRange(1, Int.MAX_VALUE) From d532ad59019892f9813145102bc5a74c51f8bfa2 Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:11:49 +0200 Subject: [PATCH 03/14] Add French (fr-fr) language support (some bug fixes) --- .../test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java index e864353f..a34d5d31 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java @@ -1,8 +1,6 @@ package org.dicio.numbers.lang.fr; -import static org.dicio.numbers.test.TestUtils.DAY; import static org.dicio.numbers.test.TestUtils.F; -import static org.dicio.numbers.test.TestUtils.HOUR; import static org.dicio.numbers.test.TestUtils.MINUTE; import static org.dicio.numbers.test.TestUtils.T; import static org.dicio.numbers.test.TestUtils.YEAR; @@ -19,7 +17,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import java.time.LocalDateTime; import java.util.ArrayList; import java.util.Collections; import java.util.List; From 3bbd58e8404a5b0dc4b0b4dae57956c704e3c0e2 Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:12:22 +0200 Subject: [PATCH 04/14] Add French (fr-fr) language support (some bug fixes) --- .../test/java/org/dicio/numbers/lang/fr/PronounceNumberTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/PronounceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/PronounceNumberTest.java index e7face6b..b5986fbb 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/fr/PronounceNumberTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/PronounceNumberTest.java @@ -4,7 +4,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import static org.dicio.numbers.test.TestUtils.F; import static org.dicio.numbers.test.TestUtils.T; import static org.junit.Assert.assertEquals; From 78ce62ab682342ebab36d85eb0ccb8a60a6154f5 Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:12:43 +0200 Subject: [PATCH 05/14] Add French (fr-fr) language support (some bug fixes) --- .../src/test/java/org/dicio/numbers/lang/fr/NiceNumberTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceNumberTest.java index 8978d601..217b255a 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceNumberTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/NiceNumberTest.java @@ -5,7 +5,6 @@ import org.junit.Test; import static org.dicio.numbers.test.TestUtils.F; -import static org.dicio.numbers.test.TestUtils.T; import static org.junit.Assert.assertEquals; public class NiceNumberTest { From 46ae396c48975cf0917c220cee1adcef41917f2b Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:13:13 +0200 Subject: [PATCH 06/14] Add French (fr-fr) language support (some bug fixes) --- .../test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java index a7e1d99f..6a27d200 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java @@ -9,7 +9,6 @@ import static org.dicio.numbers.test.TestUtils.F; import static org.dicio.numbers.test.TestUtils.T; -import static org.dicio.numbers.test.TestUtils.n; import static org.dicio.numbers.test.TestUtils.numberDeduceType; import static org.junit.Assert.assertEquals; From 29200fd54f82e5e08c8c5ebfa4504d05ab7802f4 Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:13:35 +0200 Subject: [PATCH 07/14] Add French (fr-fr) language support (some bug fixes) --- settings.gradle.kts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/settings.gradle.kts b/settings.gradle.kts index 7d5242b6..62b26412 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -1,3 +1,5 @@ +@file:Suppress("UnstableApiUsage") + pluginManagement { repositories { gradlePluginPortal() From 1f6ea45ec76839db0eb5c7996bca6726cec44610 Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:14:06 +0200 Subject: [PATCH 08/14] Add French (fr-fr) language support (some bug fixes) --- .../org/dicio/numbers/lang/fr/ExtractDurationTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDurationTest.java index 1ecc846f..455ceb98 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDurationTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractDurationTest.java @@ -4,11 +4,11 @@ import static org.dicio.numbers.test.TestUtils.HOUR; import static org.dicio.numbers.test.TestUtils.MINUTE; import static org.dicio.numbers.test.TestUtils.YEAR; -import static org.dicio.numbers.test.TestUtils.t; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.unit.Duration; import org.junit.BeforeClass; import org.junit.Test; @@ -23,13 +23,13 @@ public static void setup() { @Test public void testExtractDuration() { - assertEquals(t(2 * MINUTE + 30), + assertEquals(new Duration().plus(2 * MINUTE + 30, java.time.temporal.ChronoUnit.SECONDS), pf.extractDuration("un minuteur de deux minutes et trente secondes").get()); - assertEquals(t(2 * YEAR), + assertEquals(new Duration().plus(2 * YEAR, java.time.temporal.ChronoUnit.SECONDS), pf.extractDuration("il y a deux ans").get()); - assertEquals(t(23 * HOUR), + assertEquals(new Duration().plus(23 * HOUR, java.time.temporal.ChronoUnit.SECONDS), pf.extractDuration("vingt-trois heures").get()); - assertEquals(t(5 * DAY), + assertEquals(new Duration().plus(5 * DAY, java.time.temporal.ChronoUnit.SECONDS), pf.extractDuration("cinq jours").get()); } From 6970cef0d362832786388eb0117e92dbc53e35da Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:14:31 +0200 Subject: [PATCH 09/14] Add French (fr-fr) language support (some bug fixes) --- .../numbers/lang/fr/ParserParamsTest.java | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java index a34d5d31..dd84c16e 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/ParserParamsTest.java @@ -29,14 +29,12 @@ protected Parser numberParser() { return new FrenchParser(); } - protected void assertNumberFirst(final String s, final boolean preferOrdinal, - final boolean integerOnly, final Number expectedResult) { - assertNumberFirst(s, true, preferOrdinal, integerOnly, expectedResult); + protected void assertNumberFirst(final String s, final Number expectedResult) { + assertNumberFirst(s, true, F, F, expectedResult); } - protected void assertNumberMixedWithText(final String s, final boolean preferOrdinal, - final boolean integerOnly, final Object... expectedResults) { - assertNumberMixedWithText(s, true, preferOrdinal, integerOnly, expectedResults); + protected void assertNumberMixedWithText(final String s, final Object... expectedResults) { + assertNumberMixedWithText(s, true, F, F, expectedResults); } protected void assertDurationFirst(final String s, final java.time.Duration expectedResult) { @@ -79,18 +77,17 @@ public static void setupLongNumberMixedWithText() { @Test public void testNumberFirst() { - assertNumberFirst("un bonjour", F, F, n(1, F)); - assertNumberFirst("dix-neuf euros", F, F, n(19, F)); - assertNumberFirst("premier résultat", T, F, n(1, T)); - assertNumberFirst("premier résultat", F, F, n(1, T)); - assertNumberFirst("inconnu", F, F, null); + assertNumberFirst("un bonjour", n(1, F)); + assertNumberFirst("dix-neuf euros", n(19, F)); + assertNumberFirst("premier résultat", n(1, T)); + assertNumberFirst("inconnu", null); } @Test public void testNumberMixedWithText() { - assertNumberMixedWithText("vingt et un et cent soixante-quatre", F, F, + assertNumberMixedWithText("vingt et un et cent soixante-quatre", n(21, F), " et ", n(164, F)); - assertNumberMixedWithText("bonjour trois monde", F, F, "bonjour ", n(3, F), " monde"); + assertNumberMixedWithText("bonjour trois monde", "bonjour ", n(3, F), " monde"); } @Test From b42471fdaa30c442acc03dedf87a97df2c7cc3c8 Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:15:17 +0200 Subject: [PATCH 10/14] Add French (fr-fr) language support (some bug fixes) --- .../numbers/lang/fr/FrenchNumberExtractor.kt | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt index d0b4434b..e9a1e455 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt @@ -70,7 +70,9 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { ) { return numberToEdit.divide((1 / multiplier.decimalValue()).toLong()) } - return numberToEdit.multiply(multiplier!!) + if (multiplier != null) { + return numberToEdit.multiply(multiplier) + } } } else if (denominator.isOrdinal && denominator.moreThan(2)) { return numberToEdit.divide(denominator) @@ -126,7 +128,10 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { var magnitude = 0.1 if (ts[0].value.length > 1 && NumberExtractorUtils.isRawNumber(ts[0])) { for (i in 0 until ts[0].value.length) { - n = n!!.plus((ts[0].value[i].code - '0'.code) * magnitude) + val currentN = n + if (currentN != null) { + n = currentN.plus((ts[0].value[i].code - '0'.code) * magnitude) + } magnitude /= 10.0 } ts.movePositionForwardBy(1) @@ -136,7 +141,10 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { || (ts[0].value.length == 1 && NumberExtractorUtils.isRawNumber(ts[0]) && !ts[1].hasCategory("ordinal_suffix")) ) { - n = n!!.plus(ts[0].number!!.multiply(magnitude)) + val currentN = n + if (currentN != null) { + n = currentN.plus(ts[0].number!!.multiply(magnitude)) + } magnitude /= 10.0 } else { break @@ -154,7 +162,10 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { if (denominator == null) { ts.movePositionForwardBy(-separatorLength) } else { - return n!!.divide(denominator) + val currentN = n + if (currentN != null) { + return currentN.divide(denominator) + } } } @@ -182,13 +193,19 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber(ts[1]) ) { - n = n!!.multiply(1000).plus(ts[1].number) + val currentN = n + if (currentN != null) { + n = currentN.multiply(1000).plus(ts[1].number) + } ts.movePositionForwardBy(2) } if (ts[0].hasCategory("ordinal_suffix")) { if (allowOrdinal) { ts.movePositionForwardBy(1) - return n!!.withOrdinal(true) + val currentN = n + if (currentN != null) { + return currentN.withOrdinal(true) + } } else { ts.position = originalPosition return null From e61e356acf23327bc4e0879aac4b58660d682e3d Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:15:50 +0200 Subject: [PATCH 11/14] Add French (fr-fr) language support (some bug fixes) --- .../org/dicio/numbers/lang/fr/ExtractNumbersTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java index 6a27d200..448eb125 100644 --- a/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java +++ b/numbers/src/test/java/org/dicio/numbers/lang/fr/ExtractNumbersTest.java @@ -42,8 +42,8 @@ private void assertNumberInteger(final String s, final boolean allowOrdinal, finalTokenStreamPosition, (enp) -> enp.numberInteger(allowOrdinal)); } - private void assertNumberIntegerNull(final String s, final boolean allowOrdinal) { - assertNumberFunctionNull(s, (enp) -> enp.numberInteger(allowOrdinal)); + private void assertNumberIntegerNull(final String s) { + assertNumberFunctionNull(s, (enp) -> enp.numberInteger(F)); } @Test @@ -57,8 +57,8 @@ public void testNumberInteger() { assertNumberInteger("quatre-vingts", F, 80, F, 1); assertNumberInteger("quatre-vingt-trois", F, 83, F, 1); assertNumberInteger("premier", T, 1, T, 1); - assertNumberIntegerNull("premier", F); - assertNumberIntegerNull("bonjour", F); + assertNumberIntegerNull("premier"); + assertNumberIntegerNull("bonjour"); } @Test From c3d470e2a5b4df8ae0153cb63eb1e9dd36845b07 Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:18:45 +0200 Subject: [PATCH 12/14] Add French (fr-fr) language support (some bug fixes) --- .../numbers/lang/fr/FrenchNumberExtractor.kt | 51 ++++++++----------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt index e9a1e455..fd40d8eb 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt @@ -93,7 +93,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { } fun numberSuffixMultiplierInteger(): Number? { - return if (ts[0].hasCategory("suffix_multiplier") && ts[0].number!!.isInteger) { + return if (ts[0].hasCategory("suffix_multiplier") && ts[0].number?.isInteger == true) { ts.movePositionForwardBy(1) ts[-1].number } else { @@ -128,10 +128,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { var magnitude = 0.1 if (ts[0].value.length > 1 && NumberExtractorUtils.isRawNumber(ts[0])) { for (i in 0 until ts[0].value.length) { - val currentN = n - if (currentN != null) { - n = currentN.plus((ts[0].value[i].code - '0'.code) * magnitude) - } + n = n?.plus((ts[0].value[i].code - '0'.code) * magnitude) magnitude /= 10.0 } ts.movePositionForwardBy(1) @@ -141,9 +138,9 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { || (ts[0].value.length == 1 && NumberExtractorUtils.isRawNumber(ts[0]) && !ts[1].hasCategory("ordinal_suffix")) ) { - val currentN = n - if (currentN != null) { - n = currentN.plus(ts[0].number!!.multiply(magnitude)) + val digitVal = ts[0].number + if (digitVal != null) { + n = n?.plus(digitVal.multiply(magnitude)) } magnitude /= 10.0 } else { @@ -162,10 +159,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { if (denominator == null) { ts.movePositionForwardBy(-separatorLength) } else { - val currentN = n - if (currentN != null) { - return currentN.divide(denominator) - } + n = n?.divide(denominator) } } @@ -193,19 +187,16 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { && ts[1].value.length == 3 && NumberExtractorUtils.isRawNumber(ts[1]) ) { - val currentN = n - if (currentN != null) { - n = currentN.multiply(1000).plus(ts[1].number) + val groupVal = ts[1].number + if (groupVal != null) { + n = n?.multiply(1000)?.plus(groupVal) } ts.movePositionForwardBy(2) } if (ts[0].hasCategory("ordinal_suffix")) { if (allowOrdinal) { ts.movePositionForwardBy(1) - val currentN = n - if (currentN != null) { - return currentN.withOrdinal(true) - } + n = n?.withOrdinal(true) } else { ts.position = originalPosition return null @@ -239,7 +230,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { val ordinal = ts[nextNotIgnore].hasCategory("ordinal") if (ts[nextNotIgnore].hasCategory("multiplier") && (allowOrdinal || !ordinal)) { val multiplier = ts[nextNotIgnore].number - if (multiplier!!.lessThan(lastMultiplier)) { + if (multiplier != null && multiplier.lessThan(lastMultiplier)) { ts.movePositionForwardBy(nextNotIgnore + 1) return if (groupValue == null) { multiplier.withOrdinal(ordinal) @@ -279,7 +270,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { when { ts[nextNotIgnore].hasCategory("digit") -> { - val digitValue = ts[nextNotIgnore].number!!.integerValue() + val digitValue = ts[nextNotIgnore].number?.integerValue() ?: break // French special: "quatre" (4) + "vingt(s)" (20) = 80 if (digitValue == 4L && ten < 0 && digit < 0 && hundred < 0) { @@ -287,7 +278,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { ts.movePositionForwardBy(nextNotIgnore + 1) val nextIdx2 = ts.indexOfWithoutCategory("ignore", 0) if (ts[nextIdx2].hasCategory("tens") - && ts[nextIdx2].number!!.integerValue() == 20L + && ts[nextIdx2].number?.integerValue() == 20L ) { ts.movePositionForwardBy(nextIdx2 + 1) // Now check for additional digit or teen (81-99) @@ -295,13 +286,13 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { val addCat = when { ts[nextIdx3].hasCategory("teen") -> "teen" ts[nextIdx3].hasCategory("digit") - && !ts[nextIdx3].isNumberEqualTo(0) -> "digit" + && ts[nextIdx3].isNumberEqualTo(0).not() -> "digit" else -> null } if (addCat != null) { ts.movePositionForwardBy(nextIdx3 + 1) return Number( - 80L + ts[-1].number!!.integerValue(), + 80L + (ts[-1].number?.integerValue() ?: 0L), ts[-1].hasCategory("ordinal") && allowOrdinal ) } @@ -316,7 +307,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { ts.position = savedPos } - if (digit < 0 && (!ts[nextNotIgnore].isNumberEqualTo(0) + if (digit < 0 && (ts[nextNotIgnore].isNumberEqualTo(0).not() || (ten < 0 && hundred < 0)) ) { digit = digitValue @@ -325,13 +316,13 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { ts[nextNotIgnore].hasCategory("teen") -> { if (ten < 0 && digit < 0) { - ten = ts[nextNotIgnore].number!!.integerValue() + ten = ts[nextNotIgnore].number?.integerValue() ?: break digit = 0 } else break } ts[nextNotIgnore].hasCategory("tens") -> { - val tensValue = ts[nextNotIgnore].number!!.integerValue() + val tensValue = ts[nextNotIgnore].number?.integerValue() ?: break // French special: "soixante" (60) + teen (10-19) = 70-79 if (tensValue == 60L && ten < 0 && digit < 0) { @@ -339,7 +330,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { ts.movePositionForwardBy(nextNotIgnore + 1) val nextIdx2 = ts.indexOfWithoutCategory("ignore", 0) if (ts[nextIdx2].hasCategory("teen")) { - val teenVal = ts[nextIdx2].number!!.integerValue() + val teenVal = ts[nextIdx2].number?.integerValue() ?: -1L if (teenVal >= 10) { ts.movePositionForwardBy(nextIdx2 + 1) ten = 60 + teenVal @@ -373,8 +364,8 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { } NumberExtractorUtils.isRawNumber(ts[nextNotIgnore]) -> { - val rawNumber = ts[nextNotIgnore].number - if (rawNumber!!.isDecimal) break + val rawNumber = ts[nextNotIgnore].number ?: break + if (rawNumber.isDecimal) break if (!allowOrdinal && ts[nextNotIgnore + 1].hasCategory("ordinal_suffix")) break From 9da5eb4e7d38b37c30c8424e4de1d5e1280d6804 Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:29:31 +0200 Subject: [PATCH 13/14] Add French (fr-fr) language support (some bug fixes) --- .../numbers/lang/fr/FrenchNumberExtractor.kt | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt index fd40d8eb..5e06c7eb 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt @@ -110,11 +110,9 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { } fun numberPoint(allowOrdinal: Boolean): Number? { - var n = numberInteger(allowOrdinal).let { - if (it == null || it.isOrdinal) { - return@numberPoint it - } - it + var n = numberInteger(allowOrdinal) ?: return null + if (n.isOrdinal) { + return n } if (ts[0].hasCategory("point")) { @@ -128,7 +126,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { var magnitude = 0.1 if (ts[0].value.length > 1 && NumberExtractorUtils.isRawNumber(ts[0])) { for (i in 0 until ts[0].value.length) { - n = n?.plus((ts[0].value[i].code - '0'.code) * magnitude) + n = n.plus((ts[0].value[i].code - '0'.code) * magnitude) magnitude /= 10.0 } ts.movePositionForwardBy(1) @@ -140,7 +138,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { ) { val digitVal = ts[0].number if (digitVal != null) { - n = n?.plus(digitVal.multiply(magnitude)) + n = n.plus(digitVal.multiply(magnitude)) } magnitude /= 10.0 } else { @@ -159,7 +157,7 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { if (denominator == null) { ts.movePositionForwardBy(-separatorLength) } else { - n = n?.divide(denominator) + n = n.divide(denominator) } } @@ -189,14 +187,14 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { ) { val groupVal = ts[1].number if (groupVal != null) { - n = n?.multiply(1000)?.plus(groupVal) + n = n.multiply(1000).plus(groupVal) } ts.movePositionForwardBy(2) } if (ts[0].hasCategory("ordinal_suffix")) { if (allowOrdinal) { ts.movePositionForwardBy(1) - n = n?.withOrdinal(true) + n = n.withOrdinal(true) } else { ts.position = originalPosition return null From 60d3e5eba676a5ee072c198d5f27d7922a6187af Mon Sep 17 00:00:00 2001 From: THO Date: Sun, 29 Mar 2026 18:31:31 +0200 Subject: [PATCH 14/14] Add French (fr-fr) language support (some bug fixes) --- .../java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt index 5e06c7eb..16e858f3 100644 --- a/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt +++ b/numbers/src/main/java/org/dicio/numbers/lang/fr/FrenchNumberExtractor.kt @@ -187,14 +187,14 @@ class FrenchNumberExtractor internal constructor(private val ts: TokenStream) { ) { val groupVal = ts[1].number if (groupVal != null) { - n = n.multiply(1000).plus(groupVal) + n = n!!.multiply(1000).plus(groupVal) } ts.movePositionForwardBy(2) } if (ts[0].hasCategory("ordinal_suffix")) { if (allowOrdinal) { ts.movePositionForwardBy(1) - n = n.withOrdinal(true) + n = n!!.withOrdinal(true) } else { ts.position = originalPosition return null