google · Mowenhao13 · Feb 13, 2026 · Feb 25, 2026
diff --git a/check_env.py b/check_env.py
@@ -0,0 +1,2 @@
+import os
+print('MATRIXAI_API_KEY:', repr(os.environ.get('MATRIXAI_API_KEY')))
diff --git a/langextract-doubao/.gitignore b/langextract-doubao/.gitignore
@@ -0,0 +1,50 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+
+# Distribution / packaging
+build/
+dist/
+*.egg-info/
+.eggs/
+*.egg
+
+# Virtual environments
+.env
+.venv
+env/
+venv/
+ENV/
+
+# Testing & coverage
+.pytest_cache/
+.tox/
+htmlcov/
+.coverage
+.coverage.*
+
+# Type checking
+.mypy_cache/
+.dmypy.json
+dmypy.json
+.pytype/
+
+# IDEs
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# OS-specific
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
+
+# Temp files
+*.tmp
+*.bak
+*.backup
diff --git a/langextract-doubao/LICENSE b/langextract-doubao/LICENSE
@@ -0,0 +1,13 @@
+# LICENSE
+
+TODO: Add your license here.
+
+This is a placeholder license file for your provider plugin.
+Please replace this with your actual license before distribution.
+
+Common options include:
+- Apache License 2.0
+- MIT License
+- BSD License
+- GPL License
+- Proprietary/Commercial License
diff --git a/langextract-doubao/README.md b/langextract-doubao/README.md
@@ -0,0 +1,41 @@
+        # LangExtract doubao Provider
+
+A provider plugin for LangExtract that supports doubao models.
+
+## Installation
+
+```bash
+pip install -e .
+```
+
+## Supported Model IDs
+
+- `doubao*`: Models matching pattern ^doubao
+
+## Environment Variables
+
+- `DOUBAO_API_KEY`: API key for authentication
+
+## Usage
+
+```python
+import langextract as lx
+
+result = lx.extract(
+    text="Your document here",
+    model_id="doubao-model",
+    prompt_description="Extract entities",
+    examples=[...]
+)
+```
+
+## Development
+
+1. Install in development mode: `pip install -e .`
+2. Run tests: `python test_plugin.py`
+3. Build package: `python -m build`
+4. Publish to PyPI: `twine upload dist/*`
+
+## License
+
+Apache License 2.0
diff --git a/langextract-doubao/langextract_doubao/__init__.py b/langextract-doubao/langextract_doubao/__init__.py
@@ -0,0 +1,6 @@
+"""LangExtract provider plugin for doubao."""
+
+from langextract_doubao.provider import doubaoLanguageModel
+
+__all__ = ['doubaoLanguageModel']
+__version__ = "0.1.0"
diff --git a/langextract-doubao/langextract_doubao/provider.py b/langextract-doubao/langextract_doubao/provider.py
@@ -0,0 +1,80 @@
+"""Provider implementation for doubao."""
+
+import os
+import langextract as lx
+from langextract_doubao.schema import doubaoSchema
+from langextract.core.base_model import BaseLanguageModel
+from langextract.core.types import ScoredOutput
+from volcenginesdkarkruntime import Ark
+
+
+@lx.providers.registry.register(r'^doubao', priority=10)
+class doubaoLanguageModel(BaseLanguageModel):
+    """LangExtract provider for doubao.
+
+    This provider handles model IDs matching: ['^doubao']
+    """
+
+    def __init__(self, model_id: str, api_key: str = None, **kwargs):
+        """Initialize the doubao provider.
+
+        Args:
+            model_id: The model identifier.
+            api_key: API key for authentication.
+            **kwargs: Additional provider-specific parameters.
+        """
+        super().__init__()
+        self.model_id = model_id
+        self.api_key = api_key or os.environ.get('ARK_API_KEY')
+        self.response_schema = kwargs.get('response_schema')
+        self.structured_output = kwargs.get('structured_output', False)
+
+        self.client = Ark(
+            base_url="https://ark.cn-beijing.volces.com/api/v3",
+            api_key=self.api_key
+        )
+        self._extra_kwargs = kwargs
+
+    @classmethod
+    def get_schema_class(cls):
+        """Tell LangExtract about our schema support."""
+        from langextract_doubao.schema import doubaoSchema
+        return doubaoSchema
+
+    def apply_schema(self, schema_instance):
+        """Apply or clear schema configuration."""
+        super().apply_schema(schema_instance)
+        if schema_instance:
+            config = schema_instance.to_provider_config()
+            self.response_schema = config.get('response_schema')
+            self.structured_output = config.get('structured_output', False)
+        else:
+            self.response_schema = None
+            self.structured_output = False
+
+    def infer(self, batch_prompts, **kwargs):
+        """Run inference on a batch of prompts.
+
+        Args:
+            batch_prompts: List of prompts to process.
+            **kwargs: Additional inference parameters.
+
+        Yields:
+            Lists of ScoredOutput objects, one per prompt.
+        """
+        for prompt in batch_prompts:
+            api_params = {
+                "model": self.model_id,
+                "messages": [
+                    {"role": "user", "content": prompt},
+                    {"role": "system", "content": "You are an ai assistant"}
+                ]
+            }
+
+            completion = self.client.chat.completions.create(**api_params)  
+            text = getattr(completion.choices[0].message, "content", "")  
+            # 调试：打印原始输出  
+            print("[DEBUG] Doubao raw output:", repr(text))  
+            if not text:  
+                raise RuntimeError("Doubao returned empty output")  
+            yield [ScoredOutput(score=1.0, output=text)]  
diff --git a/langextract-doubao/langextract_doubao/schema.py b/langextract-doubao/langextract_doubao/schema.py
@@ -0,0 +1,75 @@
+"""Schema implementation for doubao provider."""
+
+import langextract as lx
+from langextract.core.schema import BaseSchema
+
+
+class doubaoSchema(BaseSchema):
+    """Schema implementation for doubao structured output."""
+
+    def __init__(self, schema_dict: dict):
+        """Initialize the schema with a dictionary."""
+        self._schema_dict = schema_dict
+
+    @property
+    def schema_dict(self) -> dict:
+        """Return the schema dictionary."""
+        return self._schema_dict
+
+    @classmethod
+    def from_examples(cls, examples_data, attribute_suffix="_attributes"):
+        """Build schema from example extractions.
+
+        Args:
+            examples_data: Sequence of ExampleData objects.
+            attribute_suffix: Suffix for attribute fields.
+
+        Returns:
+            A configured doubaoSchema instance.
+        """
+        extraction_types = {}
+        for example in examples_data:
+            for extraction in example.extractions:
+                class_name = extraction.extraction_class
+                if class_name not in extraction_types:
+                    extraction_types[class_name] = set()
+                if extraction.attributes:
+                    extraction_types[class_name].update(extraction.attributes.keys())
+
+        schema_dict = {
+            "type": "object",
+            "properties": {
+                "extractions": {
+                    "type": "array",
+                    "items": {"type": "object"}
+                }
+            },
+            "required": ["extractions"]
+        }
+
+        return cls(schema_dict)
+
+    def to_provider_config(self) -> dict:
+        """Convert to provider-specific configuration.
+
+        Returns:
+            Dictionary of provider-specific configuration.
+        """
+        return {
+            "response_schema": self._schema_dict,
+            "structured_output": True
+        }
+
+    @property
+    def supports_strict_mode(self) -> bool:
+        """Whether this schema guarantees valid structured output.
+
+        Returns:
+            True if the provider enforces valid JSON output.
+        """
+        return False  # Set to True only if your provider guarantees valid JSON
+
+    @property  
+    def requires_raw_output(self) -> bool:  
+        """返回 True 表示模型输出原生 JSON（无围栏）。"""  
+        return True  # 或 False，根据豆包 API 行为调整 
diff --git a/langextract-doubao/pyproject.toml b/langextract-doubao/pyproject.toml
@@ -0,0 +1,22 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "langextract-doubao"
+version = "0.1.0"
+description = "LangExtract provider plugin for doubao"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "Apache-2.0"}
+dependencies = [
+    "langextract>=1.0.0",
+    # Add your provider's SDK dependencies here
+]
+
+[project.entry-points."langextract.providers"]
+doubao = "langextract_doubao.provider:doubaoLanguageModel"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["langextract_doubao*"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		import os
		print('MATRIXAI_API_KEY:', repr(os.environ.get('MATRIXAI_API_KEY')))