Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions check_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import os
print('MATRIXAI_API_KEY:', repr(os.environ.get('MATRIXAI_API_KEY')))
50 changes: 50 additions & 0 deletions langextract-doubao/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so

# Distribution / packaging
build/
dist/
*.egg-info/
.eggs/
*.egg

# Virtual environments
.env
.venv
env/
venv/
ENV/

# Testing & coverage
.pytest_cache/
.tox/
htmlcov/
.coverage
.coverage.*

# Type checking
.mypy_cache/
.dmypy.json
dmypy.json
.pytype/

# IDEs
.idea/
.vscode/
*.swp
*.swo

# OS-specific
.DS_Store
Thumbs.db

# Logs
*.log

# Temp files
*.tmp
*.bak
*.backup
13 changes: 13 additions & 0 deletions langextract-doubao/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# LICENSE

TODO: Add your license here.

This is a placeholder license file for your provider plugin.
Please replace this with your actual license before distribution.

Common options include:
- Apache License 2.0
- MIT License
- BSD License
- GPL License
- Proprietary/Commercial License
41 changes: 41 additions & 0 deletions langextract-doubao/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# LangExtract doubao Provider

A provider plugin for LangExtract that supports doubao models.

## Installation

```bash
pip install -e .
```

## Supported Model IDs

- `doubao*`: Models matching pattern ^doubao

## Environment Variables

- `DOUBAO_API_KEY`: API key for authentication

## Usage

```python
import langextract as lx

result = lx.extract(
text="Your document here",
model_id="doubao-model",
prompt_description="Extract entities",
examples=[...]
)
```

## Development

1. Install in development mode: `pip install -e .`
2. Run tests: `python test_plugin.py`
3. Build package: `python -m build`
4. Publish to PyPI: `twine upload dist/*`

## License

Apache License 2.0
6 changes: 6 additions & 0 deletions langextract-doubao/langextract_doubao/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""LangExtract provider plugin for doubao."""

from langextract_doubao.provider import doubaoLanguageModel

__all__ = ['doubaoLanguageModel']
__version__ = "0.1.0"
80 changes: 80 additions & 0 deletions langextract-doubao/langextract_doubao/provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""Provider implementation for doubao."""

import os
import langextract as lx
from langextract_doubao.schema import doubaoSchema
from langextract.core.base_model import BaseLanguageModel
from langextract.core.types import ScoredOutput
from volcenginesdkarkruntime import Ark


@lx.providers.registry.register(r'^doubao', priority=10)
class doubaoLanguageModel(BaseLanguageModel):
"""LangExtract provider for doubao.

This provider handles model IDs matching: ['^doubao']
"""

def __init__(self, model_id: str, api_key: str = None, **kwargs):
"""Initialize the doubao provider.

Args:
model_id: The model identifier.
api_key: API key for authentication.
**kwargs: Additional provider-specific parameters.
"""
super().__init__()
self.model_id = model_id
self.api_key = api_key or os.environ.get('ARK_API_KEY')
self.response_schema = kwargs.get('response_schema')
self.structured_output = kwargs.get('structured_output', False)

self.client = Ark(
base_url="https://ark.cn-beijing.volces.com/api/v3",
api_key=self.api_key
)
self._extra_kwargs = kwargs

@classmethod
def get_schema_class(cls):
"""Tell LangExtract about our schema support."""
from langextract_doubao.schema import doubaoSchema
return doubaoSchema

def apply_schema(self, schema_instance):
"""Apply or clear schema configuration."""
super().apply_schema(schema_instance)
if schema_instance:
config = schema_instance.to_provider_config()
self.response_schema = config.get('response_schema')
self.structured_output = config.get('structured_output', False)
else:
self.response_schema = None
self.structured_output = False

def infer(self, batch_prompts, **kwargs):
"""Run inference on a batch of prompts.

Args:
batch_prompts: List of prompts to process.
**kwargs: Additional inference parameters.

Yields:
Lists of ScoredOutput objects, one per prompt.
"""
for prompt in batch_prompts:
api_params = {
"model": self.model_id,
"messages": [
{"role": "user", "content": prompt},
{"role": "system", "content": "You are an ai assistant"}
]
}

completion = self.client.chat.completions.create(**api_params)
text = getattr(completion.choices[0].message, "content", "")
# 调试:打印原始输出
print("[DEBUG] Doubao raw output:", repr(text))
if not text:
raise RuntimeError("Doubao returned empty output")
yield [ScoredOutput(score=1.0, output=text)]
75 changes: 75 additions & 0 deletions langextract-doubao/langextract_doubao/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Schema implementation for doubao provider."""

import langextract as lx
from langextract.core.schema import BaseSchema


class doubaoSchema(BaseSchema):
"""Schema implementation for doubao structured output."""

def __init__(self, schema_dict: dict):
"""Initialize the schema with a dictionary."""
self._schema_dict = schema_dict

@property
def schema_dict(self) -> dict:
"""Return the schema dictionary."""
return self._schema_dict

@classmethod
def from_examples(cls, examples_data, attribute_suffix="_attributes"):
"""Build schema from example extractions.

Args:
examples_data: Sequence of ExampleData objects.
attribute_suffix: Suffix for attribute fields.

Returns:
A configured doubaoSchema instance.
"""
extraction_types = {}
for example in examples_data:
for extraction in example.extractions:
class_name = extraction.extraction_class
if class_name not in extraction_types:
extraction_types[class_name] = set()
if extraction.attributes:
extraction_types[class_name].update(extraction.attributes.keys())

schema_dict = {
"type": "object",
"properties": {
"extractions": {
"type": "array",
"items": {"type": "object"}
}
},
"required": ["extractions"]
}

return cls(schema_dict)

def to_provider_config(self) -> dict:
"""Convert to provider-specific configuration.

Returns:
Dictionary of provider-specific configuration.
"""
return {
"response_schema": self._schema_dict,
"structured_output": True
}

@property
def supports_strict_mode(self) -> bool:
"""Whether this schema guarantees valid structured output.

Returns:
True if the provider enforces valid JSON output.
"""
return False # Set to True only if your provider guarantees valid JSON

@property
def requires_raw_output(self) -> bool:
"""返回 True 表示模型输出原生 JSON(无围栏)。"""
return True # 或 False,根据豆包 API 行为调整
22 changes: 22 additions & 0 deletions langextract-doubao/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "langextract-doubao"
version = "0.1.0"
description = "LangExtract provider plugin for doubao"
readme = "README.md"
requires-python = ">=3.10"
license = {text = "Apache-2.0"}
dependencies = [
"langextract>=1.0.0",
# Add your provider's SDK dependencies here
]

[project.entry-points."langextract.providers"]
doubao = "langextract_doubao.provider:doubaoLanguageModel"

[tool.setuptools.packages.find]
where = ["."]
include = ["langextract_doubao*"]
Loading
Loading