Skip to content

Commit 7bfb52a

Browse files
author
Jithendra
committed
feat: add MiniMax model support via OpenAI-compatible API
- Add MiniMax patterns (MiniMax-M2.5, etc.) to OpenAI provider - Users can now use MiniMax with LangExtract by specifying: - model_id: 'MiniMax-M2.5' (or other MiniMax models) - provider_kwargs: { api_key, base_url: 'https://api.minimax.io/v1' } Example: config = ModelConfig( model_id='MiniMax-M2.5', provider_kwargs={'api_key': '...', 'base_url': 'https://api.minimax.io/v1'} ) model = create_model(config)
1 parent f48cdb2 commit 7bfb52a

File tree

9 files changed

+563
-1
lines changed

9 files changed

+563
-1
lines changed

langextract/providers/builtin_registry.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,9 @@ class ProviderConfig(TypedDict):
4848
'target': 'langextract.providers.openai:OpenAILanguageModel',
4949
'priority': patterns.OPENAI_PRIORITY,
5050
},
51+
{
52+
'patterns': patterns.MINIMAX_PATTERNS,
53+
'target': 'langextract.providers.minimax:MiniMaxLanguageModel',
54+
'priority': patterns.MINIMAX_PRIORITY,
55+
},
5156
]

langextract/providers/minimax.py

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
# Copyright 2025 Google LLC.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""MiniMax provider for LangExtract.
16+
17+
This provider uses MiniMax's OpenAI-compatible API to extract structured
18+
information from text.
19+
20+
Usage:
21+
# Using factory
22+
from langextract.factory import ModelConfig, create_model
23+
24+
config = ModelConfig(
25+
model_id="MiniMax-M2.5",
26+
provider="MiniMaxLanguageModel",
27+
provider_kwargs={
28+
"api_key": "your-minimax-api-key"
29+
}
30+
)
31+
model = create_model(config)
32+
33+
result = lx.extract(
34+
text_or_documents=text,
35+
prompt_description=instructions,
36+
model=model
37+
)
38+
"""
39+
40+
from __future__ import annotations
41+
42+
import dataclasses
43+
from typing import Any
44+
45+
from langextract.core import base_model
46+
from langextract.core import data
47+
from langextract.providers import patterns
48+
from langextract.providers import router
49+
50+
51+
_DEFAULT_MODEL_ID = "MiniMax-M2.5"
52+
_DEFAULT_BASE_URL = "https://api.minimax.io/v1"
53+
54+
55+
@router.register(
56+
*patterns.MINIMAX_PATTERNS,
57+
priority=patterns.MINIMAX_PRIORITY,
58+
)
59+
@dataclasses.dataclass(init=False)
60+
class MiniMaxLanguageModel(base_model.BaseLanguageModel):
61+
"""Language model inference using MiniMax's OpenAI-compatible API."""
62+
63+
model_id: str = _DEFAULT_MODEL_ID
64+
api_key: str | None = None
65+
base_url: str = _DEFAULT_BASE_URL
66+
organization: str | None = None
67+
format_type: data.FormatType = data.FormatType.JSON
68+
temperature: float | None = None
69+
max_workers: int = 10
70+
_client: Any = dataclasses.field(default=None, repr=False, compare=False)
71+
_extra_kwargs: dict[str, Any] = dataclasses.field(
72+
default_factory=dict, repr=False, compare=False
73+
)
74+
75+
@property
76+
def requires_fence_output(self) -> bool:
77+
"""MiniMax returns raw JSON without fences."""
78+
if self.format_type == data.FormatType.JSON:
79+
return False
80+
return super().requires_fence_output
81+
82+
def __post_init__(self):
83+
"""Initialize the OpenAI client with MiniMax configuration."""
84+
try:
85+
from openai import AsyncOpenAI
86+
except ImportError as e:
87+
raise ImportError(
88+
"OpenAI package is required for MiniMax provider. "
89+
"Install with: pip install langextract[openai]"
90+
) from e
91+
92+
if self._client is None:
93+
self._client = AsyncOpenAI(
94+
api_key=self.api_key,
95+
base_url=self.base_url,
96+
organization=self.organization,
97+
**self._extra_kwargs,
98+
)
99+
100+
async def _generate(
101+
self,
102+
texts: list[str],
103+
prompt_description: str,
104+
extra_params: dict[str, Any] | None = None,
105+
) -> list[list[base_model.ExtractionCandidate]]:
106+
"""Generate extractions for the given texts."""
107+
import asyncio
108+
109+
extra_params = extra_params or {}
110+
111+
async def process_single(text: str) -> list[base_model.ExtractionCandidate]:
112+
response = await self._client.chat.completions.create(
113+
model=self.model_id,
114+
messages=[
115+
{
116+
"role": "system",
117+
"content": "You are a helpful assistant that extracts structured information from text.",
118+
},
119+
{
120+
"role": "user",
121+
"content": f"{prompt_description}\n\nText: {text}",
122+
},
123+
],
124+
response_format={"type": "json_object"}
125+
if self.format_type == data.FormatType.JSON
126+
else None,
127+
temperature=self.temperature,
128+
**extra_params,
129+
)
130+
131+
content = response.choices[0].message.content
132+
if not content:
133+
return []
134+
135+
try:
136+
import json
137+
138+
data = json.loads(content)
139+
# Wrap in ExtractionCandidate format
140+
if isinstance(data, list):
141+
return [
142+
base_model.ExtractionCandidate(
143+
extraction_text=item.get("text", str(item)),
144+
extraction_class=item.get("class", "unknown"),
145+
extraction_index=i,
146+
)
147+
for i, item in enumerate(data)
148+
]
149+
elif isinstance(data, dict):
150+
# For single object extractions
151+
return [
152+
base_model.ExtractionCandidate(
153+
extraction_text=str(v),
154+
extraction_class=k,
155+
extraction_index=i,
156+
)
157+
for i, (k, v) in enumerate(data.items())
158+
]
159+
except (json.JSONDecodeError, AttributeError):
160+
pass
161+
162+
return [
163+
base_model.ExtractionCandidate(
164+
extraction_text=content,
165+
extraction_class="extracted",
166+
extraction_index=0,
167+
)
168+
]
169+
170+
# Process texts in parallel
171+
tasks = [process_single(text) for text in texts]
172+
results = await asyncio.gather(*tasks)
173+
return results
174+
175+
def _generate_sync(
176+
self,
177+
texts: list[str],
178+
prompt_description: str,
179+
extra_params: dict[str, Any] | None = None,
180+
) -> list[list[base_model.ExtractionCandidate]]:
181+
"""Synchronous wrapper for generation."""
182+
import asyncio
183+
184+
try:
185+
loop = asyncio.get_event_loop()
186+
if loop.is_running():
187+
# If we're in an async context, we need to create a new loop
188+
# This is a simplified sync wrapper - for production use async directly
189+
import concurrent.futures
190+
191+
def run_in_executor():
192+
return asyncio.run(
193+
self._generate(texts, prompt_description, extra_params)
194+
)
195+
196+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
197+
future = executor.submit(run_in_executor)
198+
return future.result()
199+
except RuntimeError:
200+
# No event loop, run directly
201+
return asyncio.run(
202+
self._generate(texts, prompt_description, extra_params)
203+
)
204+
205+
def __call__(
206+
self,
207+
texts: Sequence[str],
208+
prompt_description: str,
209+
extra_params: dict[str, Any] | None = None,
210+
) -> list[list[base_model.ExtractionCandidate]]:
211+
"""Synchronous interface for the model."""
212+
return self._generate_sync(list(texts), prompt_description, extra_params)
213+
214+
async def _call_async(
215+
self,
216+
texts: Sequence[str],
217+
prompt_description: str,
218+
extra_params: dict[str, Any] | None = None,
219+
) -> list[list[base_model.ExtractionCandidate]]:
220+
"""Asynchronous interface for the model."""
221+
return await self._generate(list(texts), prompt_description, extra_params)
222+
223+
def close(self):
224+
"""Close the client connection."""
225+
# AsyncOpenAI doesn't need explicit close
226+
pass

langextract/providers/patterns.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,15 @@
2222
GEMINI_PATTERNS = (r'^gemini',)
2323
GEMINI_PRIORITY = 10
2424

25-
# OpenAI provider patterns
25+
# OpenAI provider patterns (includes MiniMax via OpenAI-compatible API)
2626
OPENAI_PATTERNS = (
2727
r'^gpt-4',
2828
r'^gpt4\.',
2929
r'^gpt-5',
3030
r'^gpt5\.',
31+
# MiniMax models (OpenAI-compatible API)
32+
r'^MiniMax',
33+
r'^minimax',
3134
)
3235
OPENAI_PRIORITY = 10
3336

@@ -62,3 +65,10 @@
6265
r'^WizardLM/',
6366
)
6467
OLLAMA_PRIORITY = 10
68+
69+
# MiniMax provider patterns (OpenAI-compatible API)
70+
MINIMAX_PATTERNS = (
71+
r'^MiniMax',
72+
r'^minimax',
73+
)
74+
MINIMAX_PRIORITY = 10

test_minimax.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""Test LangExtract with MiniMax using OpenAI-compatible API."""
2+
import os
3+
import langextract as lx
4+
5+
# Get MiniMax API key from environment
6+
MINIMAX_API_KEY = os.getenv("MINIMAX_API_KEY", "")
7+
8+
if not MINIMAX_API_KEY:
9+
print("ERROR: MINIMAX_API_KEY not set!")
10+
exit(1)
11+
12+
# Sample text to extract from
13+
sample_text = """
14+
Patient John Doe, age 45, was admitted to the hospital on March 15, 2026.
15+
He presented with symptoms of fever, cough, and shortness of breath.
16+
Medical history includes hypertension and diabetes type 2.
17+
Current medications: Metformin 500mg twice daily, Lisinopril 10mg once daily.
18+
"""
19+
20+
# Define extraction instructions
21+
instructions = "Extract patient information including name, age, symptoms, medical history, and medications."
22+
23+
# Try using MiniMax via OpenAI-compatible API
24+
try:
25+
print("Testing LangExtract with MiniMax (via OpenAI-compatible API)...")
26+
print(f"API Key: {MINIMAX_API_KEY[:10]}...")
27+
print()
28+
29+
result = lx.extract(
30+
text_or_documents=sample_text,
31+
prompt_description=instructions,
32+
model_id="MiniMax-M2.5", # This won't auto-detect, need to specify provider
33+
provider="OpenAILanguageModel",
34+
provider_kwargs={
35+
"api_key": MINIMAX_API_KEY,
36+
"base_url": "https://api.minimax.io/v1"
37+
}
38+
)
39+
40+
print("SUCCESS! Extraction result:")
41+
print(result)
42+
43+
except Exception as e:
44+
print(f"ERROR: {type(e).__name__}: {e}")
45+
print()
46+
print("Let's try a different approach...")

test_minimax2.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""Test LangExtract with MiniMax using OpenAI-compatible API - Factory approach."""
2+
import os
3+
import langextract as lx
4+
from langextract import factory
5+
6+
# Get MiniMax API key from environment
7+
MINIMAX_API_KEY = os.getenv("MINIMAX_API_KEY", "")
8+
9+
if not MINIMAX_API_KEY:
10+
print("ERROR: MINIMAX_API_KEY not set!")
11+
exit(1)
12+
13+
# Sample text to extract from
14+
sample_text = """
15+
Patient John Doe, age 45, was admitted to the hospital on March 15, 2026.
16+
He presented with symptoms of fever, cough, and shortness of breath.
17+
Medical history includes hypertension and diabetes type 2.
18+
Current medications: Metformin 500mg twice daily, Lisinopril 10mg once daily.
19+
"""
20+
21+
# Define extraction instructions
22+
instructions = "Extract patient information including name, age, symptoms, medical history, and medications."
23+
24+
# Try using MiniMax via OpenAI-compatible API using factory
25+
try:
26+
print("Testing LangExtract with MiniMax (via OpenAI-compatible API)...")
27+
print(f"API Key: {MINIMAX_API_KEY[:10]}...")
28+
print()
29+
30+
# Use factory to create model with custom provider
31+
config = factory.ModelConfig(
32+
model_id="MiniMax-M2.5",
33+
provider="OpenAILanguageModel",
34+
provider_kwargs={
35+
"api_key": MINIMAX_API_KEY,
36+
"base_url": "https://api.minimax.io/v1"
37+
}
38+
)
39+
model = factory.create_model(config)
40+
41+
# Now use the model
42+
extract = lx.LangExtract(model=model)
43+
result = extract.extract(
44+
text_or_documents=sample_text,
45+
prompt_description=instructions,
46+
)
47+
48+
print("SUCCESS! Extraction result:")
49+
print(result)
50+
51+
except Exception as e:
52+
import traceback
53+
print(f"ERROR: {type(e).__name__}: {e}")
54+
traceback.print_exc()
55+
print()
56+
print("Trying alternative approach...")

0 commit comments

Comments
 (0)