Azure-Samples
diff --git a/‎evals/results/baseline/README.md
Lines changed: 8 additions & 0 deletions b/‎evals/results/baseline/README.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎evals/results/baseline/config.json
Lines changed: 1 addition & 1 deletion b/‎evals/results/baseline/config.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎evals/results/baseline/eval_results.jsonl
Lines changed: 50 additions & 50 deletions b/‎evals/results/baseline/eval_results.jsonl
Lines changed: 50 additions & 50 deletions
diff --git a/‎evals/results/baseline/evaluate_parameters.json
Lines changed: 1 addition & 1 deletion b/‎evals/results/baseline/evaluate_parameters.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎evals/results/baseline/summary.json
Lines changed: 11 additions & 11 deletions b/‎evals/results/baseline/summary.json
Lines changed: 11 additions & 11 deletions
diff --git a/‎evals/results/gpt-35-turbo/config.json renamed to ‎evals/results/gpt35turbo-ada002/config.json b/‎evals/results/gpt-35-turbo/config.json renamed to ‎evals/results/gpt35turbo-ada002/config.json
diff --git a/‎evals/results/gpt-35-turbo/eval_results.jsonl renamed to ‎evals/results/gpt35turbo-ada002/eval_results.jsonl b/‎evals/results/gpt-35-turbo/eval_results.jsonl renamed to ‎evals/results/gpt35turbo-ada002/eval_results.jsonl
diff --git a/‎evals/results/gpt-35-turbo/evaluate_parameters.json renamed to ‎evals/results/gpt35turbo-ada002/evaluate_parameters.json b/‎evals/results/gpt-35-turbo/evaluate_parameters.json renamed to ‎evals/results/gpt35turbo-ada002/evaluate_parameters.json
diff --git a/‎evals/results/gpt-35-turbo/summary.json renamed to ‎evals/results/gpt35turbo-ada002/summary.json b/‎evals/results/gpt-35-turbo/summary.json renamed to ‎evals/results/gpt35turbo-ada002/summary.json
diff --git a/‎evals/results/gpt4omini-ada002/config.json
Lines changed: 28 additions & 0 deletions b/‎evals/results/gpt4omini-ada002/config.json
Lines changed: 28 additions & 0 deletions
diff --git a/‎evals/results/gpt4omini-ada002/eval_results.jsonl
Lines changed: 50 additions & 0 deletions b/‎evals/results/gpt4omini-ada002/eval_results.jsonl
Lines changed: 50 additions & 0 deletions
diff --git a/‎evals/results/gpt4omini-ada002/evaluate_parameters.json
Lines changed: 27 additions & 0 deletions b/‎evals/results/gpt4omini-ada002/evaluate_parameters.json
Lines changed: 27 additions & 0 deletions
diff --git a/‎evals/results/gpt4omini-ada002/summary.json
Lines changed: 33 additions & 0 deletions b/‎evals/results/gpt4omini-ada002/summary.json
Lines changed: 33 additions & 0 deletions
diff --git a/‎evals/results/o3-mini/config.json renamed to ‎evals/results/o3mini-ada002/config.json b/‎evals/results/o3-mini/config.json renamed to ‎evals/results/o3mini-ada002/config.json
diff --git a/‎evals/results/o3-mini/eval_results.jsonl renamed to ‎evals/results/o3mini-ada002/eval_results.jsonl b/‎evals/results/o3-mini/eval_results.jsonl renamed to ‎evals/results/o3mini-ada002/eval_results.jsonl
diff --git a/‎evals/results/o3-mini/evaluate_parameters.json renamed to ‎evals/results/o3mini-ada002/evaluate_parameters.json b/‎evals/results/o3-mini/evaluate_parameters.json renamed to ‎evals/results/o3mini-ada002/evaluate_parameters.json
diff --git a/‎evals/results/o3-mini/summary.json renamed to ‎evals/results/o3mini-ada002/summary.json b/‎evals/results/o3-mini/summary.json renamed to ‎evals/results/o3mini-ada002/summary.json
@@ -0,0 +1,8 @@
+# Baseline Evaluation
+
+This evaluation was done with the application using the following models:
+
+* Chat completion: gpt-4o-mini
+* Embedding: text-embedding-3-large (with binary quantization, 1024 dimension reducation, and oversampling)
+
+These are the default models and settings as of May 8, 2025.
@@ -1,6 +1,6 @@
 {
     "testdata_path": "ground_truth.jsonl",
-    "results_dir": "results/gpt-4o-mini",
+    "results_dir": "results/experiment<TIMESTAMP>",
     "requested_metrics": ["gpt_groundedness", "gpt_relevance", "answer_length", "latency", "citations_matched", "any_citation"],
     "target_url": "http://localhost:50505/chat",
     "target_parameters": {
 
@@ -1,6 +1,6 @@
 {
     "evaluation_gpt_model": "gpt-4o",
-    "evaluation_timestamp": 1744920281,
+    "evaluation_timestamp": 1746818372,
     "testdata_path": "/Users/pamelafox/azure-search-openai-demo/evals/ground_truth.jsonl",
     "target_url": "http://localhost:50505/chat",
     "target_parameters": {
 
@@ -1,27 +1,27 @@
 {
     "gpt_groundedness": {
-        "pass_count": 44,
-        "pass_rate": 0.88,
-        "mean_rating": 4.62
+        "pass_count": 43,
+        "pass_rate": 0.86,
+        "mean_rating": 4.5
     },
     "gpt_relevance": {
         "pass_count": 42,
         "pass_rate": 0.84,
-        "mean_rating": 4.12
+        "mean_rating": 4.22
     },
     "answer_length": {
-        "mean": 922.42,
-        "max": 1616,
+        "mean": 919.26,
+        "max": 1647,
         "min": 193
     },
     "latency": {
-        "mean": 3.14,
-        "max": 7.583068,
-        "min": 1.598833
+        "mean": 4.46,
+        "max": 15.129978,
+        "min": 2.465542
     },
     "citations_matched": {
-        "total": 25,
-        "rate": 0.5
+        "total": 24,
+        "rate": 0.49
     },
     "any_citation": {
         "total": 50,
 
@@ -0,0 +1,28 @@
+{
+    "testdata_path": "ground_truth.jsonl",
+    "results_dir": "results/gpt-4o-mini",
+    "requested_metrics": ["gpt_groundedness", "gpt_relevance", "answer_length", "latency", "citations_matched", "any_citation"],
+    "target_url": "http://localhost:50505/chat",
+    "target_parameters": {
+        "overrides": {
+            "top": 3,
+            "temperature": 0.3,
+            "minimum_reranker_score": 0,
+            "minimum_search_score": 0,
+            "retrieval_mode": "hybrid",
+            "semantic_ranker": true,
+            "semantic_captions": false,
+            "suggest_followup_questions": false,
+            "use_oid_security_filter": false,
+            "use_groups_security_filter": false,
+            "vector_fields": [
+                "embedding"
+            ],
+            "use_gpt4v": false,
+            "gpt4v_input": "textAndImages",
+            "seed": 1
+        }
+    },
+    "target_response_answer_jmespath": "message.content",
+    "target_response_context_jmespath": "context.data_points.text"
+}
@@ -0,0 +1,27 @@
+{
+    "evaluation_gpt_model": "gpt-4o",
+    "evaluation_timestamp": 1744920281,
+    "testdata_path": "/Users/pamelafox/azure-search-openai-demo/evals/ground_truth.jsonl",
+    "target_url": "http://localhost:50505/chat",
+    "target_parameters": {
+        "overrides": {
+            "top": 3,
+            "temperature": 0.3,
+            "minimum_reranker_score": 0,
+            "minimum_search_score": 0,
+            "retrieval_mode": "hybrid",
+            "semantic_ranker": true,
+            "semantic_captions": false,
+            "suggest_followup_questions": false,
+            "use_oid_security_filter": false,
+            "use_groups_security_filter": false,
+            "vector_fields": [
+                "embedding"
+            ],
+            "use_gpt4v": false,
+            "gpt4v_input": "textAndImages",
+            "seed": 1
+        }
+    },
+    "num_questions": null
+}
@@ -0,0 +1,33 @@
+{
+    "gpt_groundedness": {
+        "pass_count": 44,
+        "pass_rate": 0.88,
+        "mean_rating": 4.62
+    },
+    "gpt_relevance": {
+        "pass_count": 42,
+        "pass_rate": 0.84,
+        "mean_rating": 4.12
+    },
+    "answer_length": {
+        "mean": 922.42,
+        "max": 1616,
+        "min": 193
+    },
+    "latency": {
+        "mean": 3.14,
+        "max": 7.583068,
+        "min": 1.598833
+    },
+    "citations_matched": {
+        "total": 25,
+        "rate": 0.5
+    },
+    "any_citation": {
+        "total": 50,
+        "rate": 1.0
+    },
+    "num_questions": {
+        "total": 50
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"testdata_path": "ground_truth.jsonl",`
`3`		`- "results_dir": "results/gpt-4o-mini",`
	`3`	`+ "results_dir": "results/experiment<TIMESTAMP>",`
`4`	`4`	`"requested_metrics": ["gpt_groundedness", "gpt_relevance", "answer_length", "latency", "citations_matched", "any_citation"],`
`5`	`5`	`"target_url": "http://localhost:50505/chat",`
`6`	`6`	`"target_parameters": {`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"evaluation_gpt_model": "gpt-4o",`
`3`		`- "evaluation_timestamp": 1744920281,`
	`3`	`+ "evaluation_timestamp": 1746818372,`
`4`	`4`	`"testdata_path": "/Users/pamelafox/azure-search-openai-demo/evals/ground_truth.jsonl",`
`5`	`5`	`"target_url": "http://localhost:50505/chat",`
`6`	`6`	`"target_parameters": {`