From 31f2bf8542d16f70dd43335b3f28e79be0f6a39a Mon Sep 17 00:00:00 2001
From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com>
Date: Thu, 7 Dec 2023 16:16:41 +0100
Subject: [PATCH] [DOCS] Puts lang ident example back. (#2608) (#2611)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(cherry picked from commit 94237ee60b712ffdee56900453773679730325e6)

Co-authored-by: István Zoltán Szabó <istvan.szabo@elastic.co>
---
 .../stack/ml/nlp/ml-nlp-lang-ident.asciidoc   | 116 +++++++++++++++++-
 1 file changed, 114 insertions(+), 2 deletions(-)

diff --git a/docs/en/stack/ml/nlp/ml-nlp-lang-ident.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-lang-ident.asciidoc
index 0b1878dcb..c6650e695 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-lang-ident.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-lang-ident.asciidoc
@@ -24,9 +24,10 @@ language traditionally uses. These languages are marked in the supported
 languages table (see below) with the `Latn` subtag. {lang-ident-cap} supports 
 Unicode input.
 
+
 [discrete]
 [[ml-lang-ident-supported-languages]]
-=== Supported languages
+== Supported languages
 
 The table below contains the ISO codes and the English names of the languages 
 that {lang-ident} supports. If a language has a 2-letter `ISO 639-1` code, the 
@@ -82,8 +83,119 @@ script.
 <!-- lint enable -->
 ////
 
+
+[discrete]
+[[ml-lang-ident-example]]
+== Example of {lang-ident}
+
+In the following example, we feed the {lang-ident} trained model a short 
+Hungarian text that contains diacritics and a couple of English words. The 
+model identifies the text correctly as Hungarian with high probability.
+
+[source,js]
+----------------------------------
+POST _ingest/pipeline/_simulate
+{
+   "pipeline":{
+      "processors":[
+         {
+            "inference":{
+               "model_id":"lang_ident_model_1", <1>
+               "inference_config":{
+                  "classification":{
+                     "num_top_classes":5 <2>
+                  }
+               },
+               "field_map":{
+               }
+            }
+         }
+      ]
+   },
+   "docs":[
+      {
+         "_source":{ <3>
+            "text":"Sziasztok! Ez egy rövid magyar szöveg. Nézzük, vajon sikerül-e azonosítania a language identification funkciónak? Annak ellenére is sikerülni fog, hogy a szöveg két angol szót is tartalmaz."
+         }
+      }
+   ]
+}
+----------------------------------
+//NOTCONSOLE
+
+<1> ID of the {lang-ident} trained model.
+<2> Specifies the number of languages to report by descending order of 
+probability.
+<3> The source object that contains the text to identify.
+
+
+In the example above, the `num_top_classes` value indicates that only the top 
+five languages (that is to say, the ones with the highest probability) are 
+reported.
+
+The request returns the following response:
+
+[source,js]
+----------------------------------
+{
+  "docs" : [
+    {
+      "doc" : {
+        "_index" : "_index",
+        "_type" : "_doc",
+        "_id" : "_id",
+        "_source" : {
+          "text" : "Sziasztok! Ez egy rövid magyar szöveg. Nézzük, vajon sikerül-e azonosítania a language identification funkciónak? Annak ellenére is sikerülni fog, hogy a szöveg két angol szót is tartalmaz.",
+          "ml" : {
+            "inference" : {
+              "top_classes" : [ <1>
+                {
+                  "class_name" : "hu",
+                  "class_probability" : 0.9999936063740517,
+                  "class_score" : 0.9999936063740517
+                },
+                {
+                  "class_name" : "lv",
+                  "class_probability" : 2.5020248433413966E-6,
+                  "class_score" : 2.5020248433413966E-6
+                },
+                {
+                  "class_name" : "is",
+                  "class_probability" : 1.0150420723037688E-6,
+                  "class_score" : 1.0150420723037688E-6
+                },
+                {
+                  "class_name" : "ga",
+                  "class_probability" : 6.67935962773335E-7,
+                  "class_score" : 6.67935962773335E-7
+                },
+                {
+                  "class_name" : "tr",
+                  "class_probability" : 5.591166324774555E-7,
+                  "class_score" : 5.591166324774555E-7
+                }
+              ],
+              "predicted_value" : "hu", <2>
+              "model_id" : "lang_ident_model_1"
+            }
+          }
+        },
+        "_ingest" : {
+          "timestamp" : "2020-01-22T14:25:14.644912Z"
+        }
+      }
+    }
+  ]
+}
+----------------------------------
+//NOTCONSOLE
+
+<1> Contains scores for the most probable languages.
+<2> The ISO identifier of the language with the highest probability.
+
+
 [discrete]
 [[ml-lang-ident-readings]]
-=== Further reading
+== Further reading
 
 * {blog-ref}multilingual-search-using-language-identification-in-elasticsearch[Multilingual search using {lang-ident} in {es}]