Skip to content

Commit d6930ea

Browse files
GenerQAQgusye1234
andauthored
feat(core, api, cli, ui, docs): integrate OpenTelemetry tracing in server (#46) (#47)
* feat(docker): add Jaeger service for OpenTelemetry tracing * feat(api): integrate OpenTelemetry tracing * feat(core): integrate OpenTelemetry tracing * fix(docker): restore Jaeger service condition in docker-compose and remove unused server binary * fix(api): initialize OpenTelemetry propagator in mock CoreClient to prevent nil pointer panic * chore(api): update dependencies and integrate OpenTelemetry middleware for S3 * feat(core): enhance OpenTelemetry integration * feat(core): add new OpenTelemetry instrumentation packages for enhanced monitoring * fix(docker): update SeaweedFS image version to 3.96 in docker-compose * feat(core): implement OpenTelemetry tracing for message processing and publishing in async_mq.py * fix(api): use request context in project authentication middleware for improved database queries * fix(docker): update RabbitMQ, SeaweedFS, AWS CLI, and Jaeger image versions in docker-compose * fix(cli): enhance docker-compose with Jaeger service and update healthcheck syntax for improved readability * feat(core): implement comprehensive OpenTelemetry configuration and error handling across services * feat(core): add OpenTelemetry instrumentation for S3 client operations * chore: update local_dev script * fix(core): use core config for otel * fix(core): remove unused * feat(core): enhance OpenTelemetry instrumentation for LLM functions and streamline tracing setup in API * fix(api): edit OpenTelemetry middleware * feat(ui): add Jaeger API endpoints for availability check, services retrieval, and traces listing * feat(docker): add Jaeger UI URLs to client and server docker-compose configurations * feat(ui): add spacing to layout components and introduce IonLanguage icon for language switcher * refactor(ui): skills temporarily hide dropdown menu for page type selection * feat(docs): update dashboard images and add new traces viewer image * feat(ui): add countdown timer and last refresh time display to traces page --------- Co-authored-by: Gus <[email protected]>
1 parent 70a400c commit d6930ea

File tree

54 files changed

+4692
-1490
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+4692
-1490
lines changed

docs/images/dashboard/BI.png

-95.5 KB
Loading
-54.4 KB
Loading
-92.1 KB
Loading
-46.5 KB
Loading
-59.2 KB
Loading
-160 KB
Loading
307 KB
Loading

src/client/acontext-cli/internal/docker/docker-compose.yaml

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@ services:
1414
volumes:
1515
- ${DATABASE_LOCATION:-./acontext_data/pg}:/var/lib/postgresql/data
1616
healthcheck:
17-
test: ["CMD-SHELL", "pg_isready -U ${DATABASE_USER:-acontext} -d ${DATABASE_NAME:-acontext}"]
17+
test:
18+
[
19+
"CMD-SHELL",
20+
"pg_isready -U ${DATABASE_USER:-acontext} -d ${DATABASE_NAME:-acontext}",
21+
]
1822
interval: 30s
1923
timeout: 5s
2024
retries: 5
@@ -37,7 +41,7 @@ services:
3741

3842
# --- RabbitMQ ---
3943
acontext-server-rabbitmq:
40-
image: rabbitmq:3-management
44+
image: rabbitmq:4-management
4145
container_name: acontext-server-rabbitmq
4246
restart: unless-stopped
4347
environment:
@@ -57,7 +61,7 @@ services:
5761

5862
# --- SeaweedFS ---
5963
acontext-server-seaweedfs:
60-
image: chrislusf/seaweedfs:latest
64+
image: chrislusf/seaweedfs:4.00
6165
container_name: acontext-server-seaweedfs
6266
restart: unless-stopped
6367
user: root # https://github.com/seaweedfs/seaweedfs/pull/7399
@@ -96,7 +100,7 @@ services:
96100

97101
# init Bucket for SeaweedFS
98102
acontext-server-seaweedfs-setup:
99-
image: amazon/aws-cli:latest
103+
image: amazon/aws-cli:2.32.6
100104
container_name: acontext-server-seaweedfs-setup
101105
depends_on:
102106
acontext-server-seaweedfs:
@@ -113,6 +117,33 @@ services:
113117
"
114118
restart: "no"
115119

120+
# --- Jaeger (OpenTelemetry Collector) ---
121+
acontext-server-jaeger:
122+
image: jaegertracing/all-in-one:1.75.0
123+
container_name: acontext-server-jaeger
124+
restart: unless-stopped
125+
user: root # Run as root to ensure write permissions to mounted volume
126+
environment:
127+
COLLECTOR_OTLP_ENABLED: "true"
128+
# Use Badger as storage backend (local file storage, suitable for development)
129+
SPAN_STORAGE_TYPE: "badger"
130+
BADGER_EPHEMERAL: "false" # Disable ephemeral storage, enable persistence
131+
BADGER_DIRECTORY: "/badger/data"
132+
BADGER_SPAN_STORE_TTL: "168h" # Data retention: 7 days
133+
ports:
134+
- "${JAEGER_UI_EXPORT_PORT:-16686}:16686" # Jaeger UI
135+
- "${JAEGER_OTLP_GRPC_EXPORT_PORT:-4317}:4317" # OTLP gRPC receiver
136+
- "${JAEGER_OTLP_HTTP_EXPORT_PORT:-4318}:4318" # OTLP HTTP receiver
137+
- "14250:14250" # gRPC (model.proto)
138+
- "14268:14268" # HTTP (thrift)
139+
volumes:
140+
- ${JAEGER_STORAGE_LOCATION:-./acontext_data/jaeger}:/badger/data
141+
healthcheck:
142+
test: ["CMD", "wget", "--spider", "-q", "http://localhost:16686"]
143+
interval: 30s
144+
timeout: 5s
145+
retries: 5
146+
116147
# acontext-server-core
117148
acontext-server-core:
118149
image: ${CORE_IMAGE:-ghcr.io/memodb-io/acontext-core:latest}
@@ -127,6 +158,7 @@ services:
127158
MQ_URL: amqp://${RABBITMQ_USER:-acontext}:${RABBITMQ_PASSWORD:-helloworld}@acontext-server-rabbitmq:5672/
128159
REDIS_URL: redis://:${REDIS_PASSWORD:-helloworld}@acontext-server-redis:6379
129160
S3_ENDPOINT: http://acontext-server-seaweedfs:9000
161+
OTEL_EXPORTER_OTLP_ENDPOINT: acontext-server-jaeger:4317
130162
ports:
131163
- "${CORE_EXPORT_PORT:-8019}:8000"
132164
volumes:
@@ -142,6 +174,8 @@ services:
142174
condition: service_healthy
143175
acontext-server-seaweedfs-setup:
144176
condition: service_completed_successfully
177+
acontext-server-jaeger:
178+
condition: service_healthy
145179

146180
# acontext-server-api
147181
acontext-server-api:
@@ -173,10 +207,16 @@ services:
173207
S3_SECRET_KEY: ${S3_SECRET_KEY:-helloworld}
174208
S3_BUCKET: ${S3_BUCKET:-acontext-assets}
175209
CORE_BASE_URL: http://acontext-server-core:8000
210+
OTEL_EXPORTER_OTLP_ENDPOINT: acontext-server-jaeger:4317
211+
APP_ENV: ${APP_ENV:-development}
176212
ports:
177213
- "${API_EXPORT_PORT:-8029}:8029"
178214
healthcheck:
179-
test: ["CMD-SHELL", "wget -q -O- http://acontext-server-api:8029/health || exit 1"]
215+
test:
216+
[
217+
"CMD-SHELL",
218+
"wget -q -O- http://acontext-server-api:8029/health || exit 1",
219+
]
180220
interval: 30s
181221
timeout: 5s
182222
retries: 5
@@ -199,6 +239,8 @@ services:
199239
API_SERVER_URL: http://acontext-server-api:8029
200240
ROOT_API_BEARER_TOKEN: ${ROOT_API_BEARER_TOKEN:-your-root-api-bearer-token}
201241
DATABASE_URL: postgresql://${DATABASE_USER:-acontext}:${DATABASE_PASSWORD:-helloworld}@acontext-server-pg:5432/${DATABASE_NAME:-acontext}
242+
JAEGER_UI_URL: http://acontext-server-jaeger:16686
243+
NEXT_PUBLIC_JAEGER_UI_URL: http://acontext-server-jaeger:16686
202244
ports:
203245
- "${UI_EXPORT_PORT:-3000}:3000"
204246
depends_on:

src/server/api/go/cmd/server/main.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,13 @@ import (
2424
"github.com/gin-gonic/gin"
2525
"github.com/memodb-io/Acontext/internal/bootstrap"
2626
"github.com/memodb-io/Acontext/internal/config"
27+
"github.com/memodb-io/Acontext/internal/infra/cache"
28+
dbpkg "github.com/memodb-io/Acontext/internal/infra/db"
2729
"github.com/memodb-io/Acontext/internal/modules/handler"
2830
"github.com/memodb-io/Acontext/internal/pkg/tokenizer"
2931
"github.com/memodb-io/Acontext/internal/router"
32+
"github.com/memodb-io/Acontext/internal/telemetry"
33+
"github.com/redis/go-redis/v9"
3034
"github.com/samber/do"
3135
"go.uber.org/zap"
3236
"gorm.io/gorm"
@@ -39,12 +43,42 @@ func main() {
3943
cfg := do.MustInvoke[*config.Config](inj)
4044
log := do.MustInvoke[*zap.Logger](inj)
4145
db := do.MustInvoke[*gorm.DB](inj)
46+
rdb := do.MustInvoke[*redis.Client](inj)
4247

4348
// Initialize tokenizer (vocabulary is already embedded in the package)
4449
if err := tokenizer.Init(log); err != nil {
4550
log.Sugar().Fatalw("failed to initialize tokenizer", "err", err)
4651
}
4752

53+
// Setup OpenTelemetry tracing (using configuration system)
54+
tp, err := telemetry.SetupTracing(cfg)
55+
if err != nil {
56+
log.Sugar().Warnw("failed to setup tracing, continuing without tracing", "err", err)
57+
} else if tp != nil {
58+
log.Sugar().Info("OpenTelemetry tracing enabled", "endpoint", cfg.Telemetry.OtlpEndpoint)
59+
defer func() {
60+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
61+
defer cancel()
62+
if err := telemetry.Shutdown(ctx); err != nil {
63+
log.Sugar().Errorw("failed to shutdown tracer", "err", err)
64+
}
65+
}()
66+
67+
// Register GORM OpenTelemetry plugin after tracer provider is set
68+
if err := dbpkg.RegisterOpenTelemetryPlugin(db); err != nil {
69+
log.Sugar().Warnw("failed to register GORM OpenTelemetry plugin, continuing without database tracing", "err", err)
70+
} else {
71+
log.Sugar().Info("GORM OpenTelemetry plugin registered")
72+
}
73+
74+
// Register Redis OpenTelemetry plugin after tracer provider is set
75+
if err := cache.RegisterOpenTelemetryPlugin(rdb); err != nil {
76+
log.Sugar().Warnw("failed to register Redis OpenTelemetry plugin, continuing without Redis tracing", "err", err)
77+
} else {
78+
log.Sugar().Info("Redis OpenTelemetry plugin registered")
79+
}
80+
}
81+
4882
// init gin
4983
gin.SetMode(cfg.App.Env)
5084

src/server/api/go/configs/config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,8 @@ s3:
4040

4141
core:
4242
baseURL: "${CORE_BASE_URL}"
43+
44+
telemetry:
45+
otlpEndpoint: "${OTEL_EXPORTER_OTLP_ENDPOINT}"
46+
enabled: true
47+
sampleRatio: 1.0 # Sampling ratio, 0.0-1.0, default 1.0 (100%)

0 commit comments

Comments
 (0)