Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions model_gateway/src/routers/http/pd_router.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ impl PDRouter {
ports.push(prefill_worker.bootstrap_port());
rooms.push(super::pd_types::generate_room_id());
}
// Use static string keys to avoid per-request allocations
obj.insert(
Self::BOOTSTRAP_HOST_KEY.to_string(),
Value::Array(hosts.into_iter().map(Value::from).collect()),
Expand All @@ -256,7 +255,6 @@ impl PDRouter {
Value::Array(rooms.into_iter().map(Value::from).collect()),
);
} else {
// Use static string keys to avoid per-request allocations
obj.insert(
Self::BOOTSTRAP_HOST_KEY.to_string(),
Value::from(prefill_worker.bootstrap_host()),
Expand Down Expand Up @@ -422,7 +420,6 @@ impl PDRouter {
context,
Arc::clone(&prefill),
Arc::clone(&decode),
start_time,
)
.await;

Expand Down Expand Up @@ -603,7 +600,6 @@ impl PDRouter {
}

// Internal method that performs the actual dual dispatch (without retry logic)
#[expect(clippy::too_many_arguments)]
async fn execute_dual_dispatch_internal(
&self,
headers: Option<&HeaderMap>,
Expand All @@ -612,7 +608,6 @@ impl PDRouter {
context: PDRequestContext<'_>,
prefill: Arc<dyn Worker>,
decode: Arc<dyn Worker>,
_start_time: Instant,
) -> Response {
// For non-streaming: use guard for automatic load management
// For streaming: load will be managed in create_streaming_response
Expand Down Expand Up @@ -1277,8 +1272,6 @@ impl RouterTrait for PDRouter {
}

async fn get_server_info(&self, _req: Request<Body>) -> Response {
// Get info from the first decode server to match sglang's server info format
// Note: We use decode workers for server info to match expected format
self.proxy_to_first_prefill_worker("get_server_info", None)
.await
}
Expand Down
44 changes: 0 additions & 44 deletions model_gateway/src/routers/http/pd_types.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,5 @@
//! Types and utilities for the prefill-decode (PD) disaggregated router.

/// Custom error type for PD router operations
#[derive(Debug, thiserror::Error)]
pub enum PDRouterError {
#[error("Worker already exists: {url}")]
WorkerAlreadyExists { url: String },

#[error("Worker not found: {url}")]
WorkerNotFound { url: String },

#[error("Lock acquisition failed: {operation}")]
LockError { operation: String },

#[error("Health check failed for worker: {url}")]
HealthCheckFailed { url: String },

#[error("Invalid worker configuration: {reason}")]
InvalidConfiguration { reason: String },

#[error("Network error: {message}")]
NetworkError { message: String },

#[error("Timeout waiting for worker: {url}")]
Timeout { url: String },
}

/// Construct a full API URL from a base URL and path.
pub fn api_path(url: &str, api_path: &str) -> String {
if api_path.starts_with('/') {
format!("{url}{api_path}")
} else {
format!("{url}/{api_path}")
}
}

use serde::Serialize;

/// Optimized bootstrap wrapper for single requests.
Expand All @@ -46,16 +12,6 @@ pub struct RequestWithBootstrap<'a, T: Serialize> {
pub bootstrap_room: u64,
}

/// Optimized bootstrap wrapper for batch requests.
#[derive(Serialize)]
pub struct BatchRequestWithBootstrap<'a, T: Serialize> {
#[serde(flatten)]
pub original: &'a T,
pub bootstrap_host: Vec<String>,
pub bootstrap_port: Vec<Option<u16>>,
pub bootstrap_room: Vec<u64>,
}

/// Generate a random bootstrap room ID.
pub fn generate_room_id() -> u64 {
// Generate a value in the range [0, 2^63 - 1] to match Python's random.randint(0, 2**63 - 1)
Expand Down
Loading