From 403cad00ef6de758db009533ee93154f5a3cdabf Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Fri, 27 Sep 2024 13:30:25 -0400 Subject: [PATCH] Remove Qwen2 model (#18444) Removed deprecated Qwen2 7B Instruct model from zed.dev provider (staff only). Release Notes: - N/A --- crates/collab/k8s/collab.template.yml | 12 --- crates/collab/src/lib.rs | 4 - crates/collab/src/llm.rs | 36 -------- crates/collab/src/llm/authorization.rs | 2 - crates/collab/src/llm/db/seed.rs | 9 -- .../collab/src/llm/db/tests/provider_tests.rs | 1 - crates/collab/src/tests/test_server.rs | 2 - .../language_model/src/model/cloud_model.rs | 27 ------ crates/language_model/src/provider/cloud.rs | 84 +------------------ crates/rpc/src/llm.rs | 1 - 10 files changed, 2 insertions(+), 176 deletions(-) diff --git a/crates/collab/k8s/collab.template.yml b/crates/collab/k8s/collab.template.yml index 7ddb871503cccb..7d4ea6eb9a3cb2 100644 --- a/crates/collab/k8s/collab.template.yml +++ b/crates/collab/k8s/collab.template.yml @@ -149,18 +149,6 @@ spec: secretKeyRef: name: google-ai key: api_key - - name: RUNPOD_API_KEY - valueFrom: - secretKeyRef: - name: runpod - key: api_key - optional: true - - name: RUNPOD_API_SUMMARY_URL - valueFrom: - secretKeyRef: - name: runpod - key: summary - optional: true - name: BLOB_STORE_ACCESS_KEY valueFrom: secretKeyRef: diff --git a/crates/collab/src/lib.rs b/crates/collab/src/lib.rs index 81ff3ff21f6926..6c32023a97a287 100644 --- a/crates/collab/src/lib.rs +++ b/crates/collab/src/lib.rs @@ -170,8 +170,6 @@ pub struct Config { pub anthropic_api_key: Option>, pub anthropic_staff_api_key: Option>, pub llm_closed_beta_model_name: Option>, - pub runpod_api_key: Option>, - pub runpod_api_summary_url: Option>, pub zed_client_checksum_seed: Option, pub slack_panics_webhook: Option, pub auto_join_channel_id: Option, @@ -235,8 +233,6 @@ impl Config { stripe_api_key: None, stripe_price_id: None, supermaven_admin_api_key: None, - runpod_api_key: None, - runpod_api_summary_url: None, user_backfiller_github_access_token: None, } } diff --git a/crates/collab/src/llm.rs b/crates/collab/src/llm.rs index 53f0bfdfd0130a..14f10342a78dd7 100644 --- a/crates/collab/src/llm.rs +++ b/crates/collab/src/llm.rs @@ -400,42 +400,6 @@ async fn perform_completion( }) .boxed() } - LanguageModelProvider::Zed => { - let api_key = state - .config - .runpod_api_key - .as_ref() - .context("no Qwen2-7B API key configured on the server")?; - let api_url = state - .config - .runpod_api_summary_url - .as_ref() - .context("no Qwen2-7B URL configured on the server")?; - let chunks = open_ai::stream_completion( - &state.http_client, - api_url, - api_key, - serde_json::from_str(params.provider_request.get())?, - None, - ) - .await?; - - chunks - .map(|event| { - event.map(|chunk| { - let input_tokens = - chunk.usage.as_ref().map_or(0, |u| u.prompt_tokens) as usize; - let output_tokens = - chunk.usage.as_ref().map_or(0, |u| u.completion_tokens) as usize; - ( - serde_json::to_vec(&chunk).unwrap(), - input_tokens, - output_tokens, - ) - }) - }) - .boxed() - } }; Ok(Response::new(Body::wrap_stream(TokenCountingStream { diff --git a/crates/collab/src/llm/authorization.rs b/crates/collab/src/llm/authorization.rs index cc345579eca229..9f82af51c39b73 100644 --- a/crates/collab/src/llm/authorization.rs +++ b/crates/collab/src/llm/authorization.rs @@ -77,7 +77,6 @@ fn authorize_access_for_country( LanguageModelProvider::Anthropic => anthropic::is_supported_country(country_code), LanguageModelProvider::OpenAi => open_ai::is_supported_country(country_code), LanguageModelProvider::Google => google_ai::is_supported_country(country_code), - LanguageModelProvider::Zed => true, }; if !is_country_supported_by_provider { Err(Error::http( @@ -213,7 +212,6 @@ mod tests { (LanguageModelProvider::Anthropic, "T1"), // Tor (LanguageModelProvider::OpenAi, "T1"), // Tor (LanguageModelProvider::Google, "T1"), // Tor - (LanguageModelProvider::Zed, "T1"), // Tor ]; for (provider, country_code) in cases { diff --git a/crates/collab/src/llm/db/seed.rs b/crates/collab/src/llm/db/seed.rs index 24bc224227c8d2..55c6c30cd5d8bf 100644 --- a/crates/collab/src/llm/db/seed.rs +++ b/crates/collab/src/llm/db/seed.rs @@ -40,15 +40,6 @@ pub async fn seed_database(_config: &Config, db: &mut LlmDatabase, _force: bool) price_per_million_input_tokens: 25, // $0.25/MTok price_per_million_output_tokens: 125, // $1.25/MTok }, - ModelParams { - provider: LanguageModelProvider::Zed, - name: "Qwen/Qwen2-7B-Instruct".into(), - max_requests_per_minute: 5, - max_tokens_per_minute: 25_000, // These are arbitrary limits we've set to cap costs; we control this number - max_tokens_per_day: 300_000, - price_per_million_input_tokens: 25, - price_per_million_output_tokens: 125, - }, ]) .await } diff --git a/crates/collab/src/llm/db/tests/provider_tests.rs b/crates/collab/src/llm/db/tests/provider_tests.rs index ef0da1c373fca6..0bb55ee4b69a6c 100644 --- a/crates/collab/src/llm/db/tests/provider_tests.rs +++ b/crates/collab/src/llm/db/tests/provider_tests.rs @@ -26,7 +26,6 @@ async fn test_initialize_providers(db: &mut LlmDatabase) { LanguageModelProvider::Anthropic, LanguageModelProvider::Google, LanguageModelProvider::OpenAi, - LanguageModelProvider::Zed ] ) } diff --git a/crates/collab/src/tests/test_server.rs b/crates/collab/src/tests/test_server.rs index 94c7d3907ff4ff..5ff4a720741bc8 100644 --- a/crates/collab/src/tests/test_server.rs +++ b/crates/collab/src/tests/test_server.rs @@ -679,8 +679,6 @@ impl TestServer { stripe_api_key: None, stripe_price_id: None, supermaven_admin_api_key: None, - runpod_api_key: None, - runpod_api_summary_url: None, user_backfiller_github_access_token: None, }, }) diff --git a/crates/language_model/src/model/cloud_model.rs b/crates/language_model/src/model/cloud_model.rs index 2ce48931f6d4db..9242f80e6e16c7 100644 --- a/crates/language_model/src/model/cloud_model.rs +++ b/crates/language_model/src/model/cloud_model.rs @@ -12,7 +12,6 @@ pub enum CloudModel { Anthropic(anthropic::Model), OpenAi(open_ai::Model), Google(google_ai::Model), - Zed(ZedModel), } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, EnumIter)] @@ -21,26 +20,6 @@ pub enum ZedModel { Qwen2_7bInstruct, } -impl ZedModel { - pub fn id(&self) -> &str { - match self { - ZedModel::Qwen2_7bInstruct => "Qwen/Qwen2-7B-Instruct", - } - } - - pub fn display_name(&self) -> &str { - match self { - ZedModel::Qwen2_7bInstruct => "Qwen2 7B Instruct", - } - } - - pub fn max_token_count(&self) -> usize { - match self { - ZedModel::Qwen2_7bInstruct => 28000, - } - } -} - impl Default for CloudModel { fn default() -> Self { Self::Anthropic(anthropic::Model::default()) @@ -53,7 +32,6 @@ impl CloudModel { Self::Anthropic(model) => model.id(), Self::OpenAi(model) => model.id(), Self::Google(model) => model.id(), - Self::Zed(model) => model.id(), } } @@ -62,7 +40,6 @@ impl CloudModel { Self::Anthropic(model) => model.display_name(), Self::OpenAi(model) => model.display_name(), Self::Google(model) => model.display_name(), - Self::Zed(model) => model.display_name(), } } @@ -78,7 +55,6 @@ impl CloudModel { Self::Anthropic(model) => model.max_token_count(), Self::OpenAi(model) => model.max_token_count(), Self::Google(model) => model.max_token_count(), - Self::Zed(model) => model.max_token_count(), } } @@ -115,9 +91,6 @@ impl CloudModel { LanguageModelAvailability::RequiresPlan(Plan::ZedPro) } }, - Self::Zed(model) => match model { - ZedModel::Qwen2_7bInstruct => LanguageModelAvailability::RequiresPlan(Plan::ZedPro), - }, } } } diff --git a/crates/language_model/src/provider/cloud.rs b/crates/language_model/src/provider/cloud.rs index 3c407b77d929de..b81f6f9fba3363 100644 --- a/crates/language_model/src/provider/cloud.rs +++ b/crates/language_model/src/provider/cloud.rs @@ -3,7 +3,7 @@ use crate::provider::anthropic::map_to_language_model_completion_events; use crate::{ settings::AllLanguageModelSettings, CloudModel, LanguageModel, LanguageModelCacheConfiguration, LanguageModelId, LanguageModelName, LanguageModelProviderId, LanguageModelProviderName, - LanguageModelProviderState, LanguageModelRequest, RateLimiter, ZedModel, + LanguageModelProviderState, LanguageModelRequest, RateLimiter, }; use anthropic::AnthropicError; use anyhow::{anyhow, Result}; @@ -219,9 +219,6 @@ impl LanguageModelProvider for CloudLanguageModelProvider { models.insert(model.id().to_string(), CloudModel::Google(model)); } } - for model in ZedModel::iter() { - models.insert(model.id().to_string(), CloudModel::Zed(model)); - } } else { models.insert( anthropic::Model::Claude3_5Sonnet.id().to_string(), @@ -472,7 +469,7 @@ impl LanguageModel for CloudLanguageModel { min_total_token: cache.min_total_token, }) } - CloudModel::OpenAi(_) | CloudModel::Google(_) | CloudModel::Zed(_) => None, + CloudModel::OpenAi(_) | CloudModel::Google(_) => None, } } @@ -502,9 +499,6 @@ impl LanguageModel for CloudLanguageModel { } .boxed() } - CloudModel::Zed(_) => { - count_open_ai_tokens(request, open_ai::Model::ThreePointFiveTurbo, cx) - } } } @@ -603,35 +597,6 @@ impl LanguageModel for CloudLanguageModel { } .boxed() } - CloudModel::Zed(model) => { - let client = self.client.clone(); - let mut request = request.into_open_ai(model.id().into(), None); - request.max_tokens = Some(4000); - let llm_api_token = self.llm_api_token.clone(); - let future = self.request_limiter.stream(async move { - let response = Self::perform_llm_completion( - client.clone(), - llm_api_token, - PerformCompletionParams { - provider: client::LanguageModelProvider::Zed, - model: request.model.clone(), - provider_request: RawValue::from_string(serde_json::to_string( - &request, - )?)?, - }, - None, - ) - .await?; - Ok(open_ai::extract_text_from_events(response_lines(response))) - }); - async move { - Ok(future - .await? - .map(|result| result.map(LanguageModelCompletionEvent::Text)) - .boxed()) - } - .boxed() - } } } @@ -735,51 +700,6 @@ impl LanguageModel for CloudLanguageModel { CloudModel::Google(_) => { future::ready(Err(anyhow!("tool use not implemented for Google AI"))).boxed() } - CloudModel::Zed(model) => { - // All Zed models are OpenAI-based at the time of writing. - let mut request = request.into_open_ai(model.id().into(), None); - request.tool_choice = Some(open_ai::ToolChoice::Other( - open_ai::ToolDefinition::Function { - function: open_ai::FunctionDefinition { - name: tool_name.clone(), - description: None, - parameters: None, - }, - }, - )); - request.tools = vec![open_ai::ToolDefinition::Function { - function: open_ai::FunctionDefinition { - name: tool_name.clone(), - description: Some(tool_description), - parameters: Some(input_schema), - }, - }]; - - self.request_limiter - .run(async move { - let response = Self::perform_llm_completion( - client.clone(), - llm_api_token, - PerformCompletionParams { - provider: client::LanguageModelProvider::Zed, - model: request.model.clone(), - provider_request: RawValue::from_string(serde_json::to_string( - &request, - )?)?, - }, - None, - ) - .await?; - - Ok(open_ai::extract_tool_args_from_events( - tool_name, - Box::pin(response_lines(response)), - ) - .await? - .boxed()) - }) - .boxed() - } } } } diff --git a/crates/rpc/src/llm.rs b/crates/rpc/src/llm.rs index 6cae54b3090d56..681f2d8db32d18 100644 --- a/crates/rpc/src/llm.rs +++ b/crates/rpc/src/llm.rs @@ -12,7 +12,6 @@ pub enum LanguageModelProvider { Anthropic, OpenAi, Google, - Zed, } #[derive(Debug, Serialize, Deserialize)]