From 97dfb62beb0ccd190ed1ebb4dd6699c767e5d6e0 Mon Sep 17 00:00:00 2001
From: Jiaxin Shan <seedjeffwan@gmail.com>
Date: Fri, 14 Feb 2025 14:40:25 -0800
Subject: [PATCH] Increase the connection bufferLimit to avoid
 end_of_stream=false case

ResponseBody.EndOfStream sometimes is false even we use buffered mode. Current implementation does not take this into the consideration.

Signed-off-by: Jiaxin Shan <seedjeffwan@gmail.com>
---
 config/gateway/gateway.yaml       | 2 +-
 test/e2e/routing_strategy_test.go | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/config/gateway/gateway.yaml b/config/gateway/gateway.yaml
index dae12ceb..8a8f9e18 100644
--- a/config/gateway/gateway.yaml
+++ b/config/gateway/gateway.yaml
@@ -29,7 +29,7 @@ spec:
       kind: Gateway
       name: aibrix-eg
   connection:
-    bufferLimit: 262144
+    bufferLimit: 1048576
 ---
 apiVersion: gateway.envoyproxy.io/v1alpha1
 kind: EnvoyExtensionPolicy
diff --git a/test/e2e/routing_strategy_test.go b/test/e2e/routing_strategy_test.go
index 41f41f98..cf8db9cb 100644
--- a/test/e2e/routing_strategy_test.go
+++ b/test/e2e/routing_strategy_test.go
@@ -26,6 +26,7 @@ import (
 	"github.com/openai/openai-go"
 	"github.com/openai/openai-go/option"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )
 
 func TestPrefixCacheModelInference(t *testing.T) {
@@ -63,7 +64,7 @@ func getTargetPodFromChatCompletion(t *testing.T, message string) string {
 		}),
 		Model: openai.F(modelName),
 	})
-	assert.NoError(t, err, "chat completitions failed")
+	require.NoError(t, err, "chat completitions failed %v", err)
 	assert.Equal(t, modelName, chatCompletion.Model)
 
 	return dst.Header.Get("target-pod")