Skip to content

Commit 87fcdd2

Browse files
authored
add metric lightllm_request_mtp_avg_token_per_step (#1169)
1 parent 320345d commit 87fcdd2

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

lightllm/server/httpserver/manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,9 @@ async def _wait_to_token_package(
645645
)
646646
self.metric_client.histogram_observe("lightllm_request_generated_tokens", out_token_counter)
647647
self.metric_client.counter_inc("lightllm_request_success")
648+
self.metric_client.histogram_observe(
649+
"lightllm_request_mtp_avg_token_per_step", mtp_avg_token_per_step
650+
)
648651

649652
return
650653
req_status.out_token_info_list.clear()

lightllm/server/httpserver_for_pd_master/manager.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ async def _wait_to_token_package(
320320
group_request_id = sampling_params.group_request_id
321321
unfinished_count = sampling_params.best_of
322322
is_first_token = True
323+
sub_req_id_to_mtp_accepted_token_num: Dict[int, int] = {}
323324

324325
client_mode: NodeRole = NodeRole(d_node.mode)
325326

@@ -333,6 +334,7 @@ async def _wait_to_token_package(
333334

334335
prompt_tokens = metadata["prompt_tokens"]
335336
out_token_counter += 1
337+
sub_req_id_to_mtp_accepted_token_num[sub_req_id] = metadata.get("mtp_accepted_token_num", 0)
336338
if is_first_token:
337339
first_token_cost_ms = (time.time() - start_time) * 1000
338340
is_first_token = False
@@ -351,6 +353,9 @@ async def _wait_to_token_package(
351353
x_session_id = request.headers.get("X-Session-Id", "")
352354
prompt_cache_len = metadata.pop("prompt_cache_len", 0)
353355
prompt_cache_ratio = prompt_cache_len / prompt_tokens
356+
mtp_avg_token_per_step = out_token_counter / max(
357+
(out_token_counter - sum(sub_req_id_to_mtp_accepted_token_num.values())), 1
358+
)
354359
format_start_time = datetime.datetime.fromtimestamp(start_time).strftime("%Y-%m-%d %H:%M:%S")
355360
logger.info(
356361
f"X-Request-Id:{x_request_id} "
@@ -361,6 +366,7 @@ async def _wait_to_token_package(
361366
f"prompt_token_num:{prompt_tokens} "
362367
f"prompt_cache_len:{prompt_cache_len} "
363368
f"prompt_cache_ratio:{prompt_cache_ratio} "
369+
f"mtp_avg_token_per_step:{mtp_avg_token_per_step} "
364370
)
365371
self.metric_client.histogram_observe("lightllm_request_inference_duration", total_cost_time_ms / 1000.0)
366372
self.metric_client.histogram_observe(
@@ -369,6 +375,7 @@ async def _wait_to_token_package(
369375
self.metric_client.histogram_observe("lightllm_request_first_token_duration", first_token_cost_ms / 1000.0)
370376
self.metric_client.histogram_observe("lightllm_request_generated_tokens", out_token_counter)
371377
self.metric_client.counter_inc("lightllm_request_success")
378+
self.metric_client.histogram_observe("lightllm_request_mtp_avg_token_per_step", mtp_avg_token_per_step)
372379
return
373380

374381
async def abort(

lightllm/server/metrics/metrics.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"lightllm_cache_length": "Length of tokens which hit prompt cache",
2727
"lightllm_cache_ratio": "cache length / input_length",
2828
"lightllm_batch_current_max_tokens": "dynamic max token used for current batch",
29+
"lightllm_request_mtp_avg_token_per_step": "Average number of tokens per step",
2930
}
3031

3132

@@ -94,6 +95,9 @@ def init_metrics(self, args):
9495
ratio_buckets = [(i + 1) / 10.0 for i in range(-1, 10)]
9596
self.create_histogram("lightllm_cache_ratio", ratio_buckets)
9697

98+
mtp_avg_token_per_step_buckets = [i / 10.0 + 1.0 for i in range(0, 10 * args.mtp_step)]
99+
self.create_histogram("lightllm_request_mtp_avg_token_per_step", mtp_avg_token_per_step_buckets)
100+
97101
def create_histogram(self, name, buckets, labelnames=None):
98102
if labelnames is None:
99103
histogram = Histogram(name, MONITOR_INFO[name], buckets=buckets, registry=self.registry)

0 commit comments

Comments
 (0)