From 94b7f1a352677fef257d582c97de294c0fbd8a0d Mon Sep 17 00:00:00 2001 From: Raahul Dutta Date: Mon, 18 May 2026 23:46:26 +0200 Subject: [PATCH 1/2] docs(examples): add Bindu A2A integration example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run ContractGuard as a discoverable, DID-identified A2A microservice without touching core analyzer code. Peers send a `text` or base64 `file` part (PDF/DOCX/TXT/MD/RTF) and get back the existing AnalysisResult JSON. Optional pay-per-scan via x402 (USDC on Base) — uncomment one block in agent.py. - examples/bindu/agent.py: handler wrapping analyze_contract() - examples/bindu/README.md: what/why, setup, curl examples, response shape - examples/bindu/.env.example: minimal config (OPENROUTER_API_KEY) - README.md: new Integrations section linking the example Purely additive: no changes to contractguard/, no new required deps, CLI and Gradio UI unaffected. --- README.md | 4 + examples/bindu/.env.example | 14 +++ examples/bindu/README.md | 179 +++++++++++++++++++++++++++++++++++ examples/bindu/agent.py | 182 ++++++++++++++++++++++++++++++++++++ 4 files changed, 379 insertions(+) create mode 100644 examples/bindu/.env.example create mode 100644 examples/bindu/README.md create mode 100644 examples/bindu/agent.py diff --git a/README.md b/README.md index 2a1d9f4..ae1da06 100644 --- a/README.md +++ b/README.md @@ -385,6 +385,10 @@ ContractGuard supports contracts up to ~30,000 tokens (~120,000 characters / ~60 **Can I use it in CI/CD or automated pipelines?** Yes. Use `--json` to get structured output that can be parsed by other tools. Exit code is 0 on success, 1 on error. Example: `contractguard scan contract.pdf --json | jq '.red_flags | length'` +## Integrations + +- **Bindu (A2A agent)** — run ContractGuard as a discoverable, DID-identified microservice that other AI agents can call over the [A2A protocol](https://github.com/getbindu/Bindu). Optional pay-per-scan via x402 (USDC on Base). See [examples/bindu](examples/bindu/). + ## Roadmap - [ ] OCR support for scanned PDF contracts diff --git a/examples/bindu/.env.example b/examples/bindu/.env.example new file mode 100644 index 0000000..0299f4f --- /dev/null +++ b/examples/bindu/.env.example @@ -0,0 +1,14 @@ +# Required: API key for the LLM used to analyse the contract. +# OpenRouter gives you access to Claude, GPT-4o, Gemini, DeepSeek, and many +# more behind a single key — https://openrouter.ai/keys +OPENROUTER_API_KEY=sk-or-... + +# Optional: override the model (default: anthropic/claude-sonnet-4) +# CONTRACTGUARD_MODEL=openai/gpt-4o + +# Optional: analysis language (en | zh). Default: en. +# CONTRACTGUARD_LANG=en + +# Optional: identity that signs the agent's DID document. Use a real email +# you control if you plan to expose this agent publicly. +# CONTRACTGUARD_AUTHOR=you@example.com diff --git a/examples/bindu/README.md b/examples/bindu/README.md new file mode 100644 index 0000000..5a8fb91 --- /dev/null +++ b/examples/bindu/README.md @@ -0,0 +1,179 @@ +# ContractGuard × Bindu — A2A agent integration + +Run ContractGuard as a discoverable, DID-identified microservice that other AI +agents can call over the [A2A](https://github.com/getbindu/Bindu) JSON-RPC +protocol. Same analyzer, same `AnalysisResult` schema — now reachable on the +network with a verifiable identity and (optionally) pay-per-scan via x402. + +## Why pair them? + +ContractGuard is a great library, CLI, and Gradio app — but every integration +beyond that (Slack bot, VS Code extension, an orchestrator that chains it after +a "find me a lease" agent) ends up rebuilding the same plumbing: an HTTP +endpoint, an identity, an auth story, a way to charge for it. + +[Bindu](https://github.com/getbindu/Bindu) is the plumbing. Wrap the analyzer +in one `bindufy(config, handler)` call and you get: + +- **Discoverable agent card** at `/.well-known/agent.json` — agent + marketplaces and orchestrators can find ContractGuard and know what it does. +- **DID-based identity** (`did:bindu:…`) — every analysis is attributable to a + cryptographically-verifiable agent. Each result artifact is signed with the + agent's Ed25519 key, so a contract review can be presented as + tamper-evidence: "ContractGuard `did:bindu:…` said this at timestamp T." +- **A2A JSON-RPC** over HTTP — peers call `message/send` with either a `text` + part (paste the contract) or a base64 `file` part (PDF / DOCX / TXT / MD / + RTF), and get back the existing structured `AnalysisResult` as JSON. +- **Pay-per-scan via x402** — uncomment the `execution_cost` block in + `agent.py` and the agent demands a USDC micropayment on Base before + responding. No Stripe account, no login flow, no SaaS dashboard. +- **OAuth2 / mTLS** (optional) for B2B deployments. + +The integration is purely additive: nothing in `contractguard/` changes, +`bindu` is not a required dependency, and the CLI / Python API / Gradio UI all +still work exactly as before. + +## Setup + +```bash +# From the ContractGuard repo root +pip install -e . # core contractguard +pip install bindu # adds the bindufy() wrapper + +cp examples/bindu/.env.example .env +# edit .env: set OPENROUTER_API_KEY=sk-or-... +``` + +## Run + +```bash +python examples/bindu/agent.py +``` + +You should see Bindu's startup banner and the agent listening on +. Three useful endpoints: + +| Endpoint | What it is | +|---|---| +| `GET /.well-known/agent.json` | Agent card — name, description, DID, skills, capabilities | +| `GET /.well-known/did.json` | DID document with the agent's public key | +| `POST /` | A2A JSON-RPC endpoint (use `method: "message/send"`) | + +## Talk to it + +### Inline text + +```bash +curl -sS http://localhost:3773/ \ + -H "Content-Type: application/json" \ + -d @- <<'EOF' +{ + "jsonrpc": "2.0", + "id": "1", + "method": "message/send", + "params": { + "message": { + "role": "user", + "kind": "message", + "messageId": "00000000-0000-0000-0000-000000000001", + "contextId": "00000000-0000-0000-0000-000000000002", + "taskId": "00000000-0000-0000-0000-000000000003", + "parts": [ + { "kind": "text", "text": "review this contract" }, + { "kind": "text", "text": "" } + ] + }, + "configuration": { "acceptedOutputModes": ["application/json"] } + } +} +EOF +``` + +The first short `text` part is treated as a prompt and echoed back under +`prompt` in the result. Any subsequent text parts are concatenated and sent +through the analyzer. + +### PDF / DOCX upload + +A2A carries binary files as base64 inside a `file` part. One-liner to build +the payload from a real PDF: + +```bash +B64=$(base64 -i my-lease.pdf | tr -d '\n') +cat > /tmp/req.json <120k chars are truncated (see + `MAX_CONTRACT_CHARS` in `contractguard.analyzer`). For 60+ page contracts, + pick a long-context model via `CONTRACTGUARD_MODEL=google/gemini-2.5-pro`. +- **Streaming** isn't enabled — analyses come back as a single artifact. +- **Auth defaults are off.** Suitable for localhost / trusted networks. For a + public deployment set `AUTH__ENABLED=true` and follow Bindu's + [`AUTH.md`](https://github.com/getbindu/Bindu/blob/main/docs/AUTH.md). diff --git a/examples/bindu/agent.py b/examples/bindu/agent.py new file mode 100644 index 0000000..58ef2b7 --- /dev/null +++ b/examples/bindu/agent.py @@ -0,0 +1,182 @@ +"""ContractGuard as a Bindu A2A agent. + +Wraps `contractguard.analyzer.analyze_contract()` in a Bindu handler so the +analyzer is reachable as a networked, DID-identified microservice over the +A2A JSON-RPC protocol. Peers send either: + + - a `text` part with the raw contract text, or + - a `file` part with a base64-encoded PDF / DOCX / TXT / MD, + +and get back the structured `AnalysisResult` (red flags, warnings, +protections, fairness score) as JSON. + +Run: + + export OPENROUTER_API_KEY=sk-or-... + pip install -e . + pip install bindu + python examples/bindu/agent.py + +Agent card: http://localhost:3773/.well-known/agent.json +DID doc: http://localhost:3773/.well-known/did.json +JSON-RPC: POST http://localhost:3773/ (method: message/send) +""" + +from __future__ import annotations + +import base64 +import io +import json +import os +import tempfile +from pathlib import Path + +from bindu.penguin.bindufy import bindufy + +from contractguard.analyzer import DEFAULT_MODEL, analyze_contract +from contractguard.parser import extract_text + +# Map MIME types we accept to the suffix `contractguard.parser.extract_text` +# expects. The parser dispatches off the file suffix, so we materialise file +# bytes to a temp file with the right extension and let the existing +# pdfplumber / python-docx code paths do their thing. +MIME_TO_SUFFIX = { + "application/pdf": ".pdf", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", + "text/plain": ".txt", + "text/markdown": ".md", + "application/rtf": ".rtf", + "text/rtf": ".rtf", +} + + +def _decode_file_part(part: dict) -> tuple[bytes, str]: + """Return (file_bytes, mime_type) for a file part. Raises on malformed input.""" + file_info = part.get("file") or {} + payload = file_info.get("bytes") or file_info.get("data") + if not payload: + raise ValueError("file part is missing 'bytes' / 'data'") + file_bytes = base64.b64decode(payload) if isinstance(payload, str) else payload + mime_type = file_info.get("mimeType") or "" + return file_bytes, mime_type + + +def _extract_from_file_part(part: dict) -> str: + file_bytes, mime_type = _decode_file_part(part) + suffix = MIME_TO_SUFFIX.get(mime_type) + if not suffix: + raise ValueError( + f"unsupported mimeType {mime_type!r}; expected one of {sorted(MIME_TO_SUFFIX)}" + ) + with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as fh: + fh.write(file_bytes) + tmp_path = Path(fh.name) + try: + return extract_text(tmp_path) + finally: + tmp_path.unlink(missing_ok=True) + + +def _collect_inputs(messages: list[dict]) -> tuple[str | None, str]: + """Walk A2A messages and return (prompt, contract_text). + + `prompt` is currently informational (the analyzer doesn't take a free-form + prompt — the schema is fixed), but we surface it in the response so callers + can pass through their own context for downstream agents in a chain. + `contract_text` is the concatenated content of all text-after-prompt and + file parts found across user messages. + """ + prompt: str | None = None + text_chunks: list[str] = [] + text_part_count = 0 + + for msg in messages or []: + role = msg.get("role") + if role is not None and role != "user": + continue + for part in msg.get("parts") or []: + kind = part.get("kind") + if kind == "text": + text = part.get("text") or "" + text_part_count += 1 + # First text part is treated as a prompt (e.g. "review this + # lease"). Subsequent text parts are appended as contract text + # — lets callers paste the contract inline without a file + # upload. + if text_part_count == 1 and len(text) < 500: + prompt = text.strip() or None + else: + text_chunks.append(text) + elif kind == "file": + try: + text_chunks.append(_extract_from_file_part(part)) + except Exception as exc: # noqa: BLE001 — surfaced to caller below + text_chunks.append(f"[file part error: {exc}]") + + return prompt, "\n\n".join(c for c in text_chunks if c) + + +def handler(messages: list[dict]) -> str: + """A2A handler: extract contract → analyse → return JSON string. + + Returning a `str` keeps the manifest worker happy; we encode the structured + `AnalysisResult` as JSON so peers can `JSON.parse()` it on the wire. The + agent card surfaces that the agent emits structured output (see config + below). + """ + prompt, contract_text = _collect_inputs(messages) + + if not contract_text or len(contract_text.strip()) < 50: + return json.dumps( + { + "error": "no_contract", + "message": ( + "Send the contract as a `text` part (>= 50 chars) or as a " + "`file` part with mimeType pdf / docx / txt / md / rtf." + ), + } + ) + + model = os.environ.get("CONTRACTGUARD_MODEL", DEFAULT_MODEL) + lang = os.environ.get("CONTRACTGUARD_LANG", "en") + + try: + result = analyze_contract(contract_text=contract_text, model=model, lang=lang) + except Exception as exc: # noqa: BLE001 — propagate to caller as structured error + return json.dumps({"error": "analysis_failed", "message": str(exc)}) + + payload = result.model_dump(mode="json") + if prompt: + payload["prompt"] = prompt + return json.dumps(payload) + + +config = { + "author": os.environ.get("CONTRACTGUARD_AUTHOR", "contractguard@example.com"), + "name": "contractguard", + "description": ( + "AI contract review agent. Sends a PDF/DOCX/TXT contract or raw text " + "and returns structured red flags, warnings, protections, and a " + "fairness score (0-100, A+ to F)." + ), + "deployment": { + "url": "http://localhost:3773", + "expose": True, + "cors_origins": ["http://localhost:5173", "http://localhost:3000"], + }, + # Pay-per-scan via x402 (USDC on Base Sepolia). Uncomment and fill in a + # pay-to address to charge peers per analysis. Leave commented for free / + # local development. + # + # "execution_cost": { + # "amount": "0.10", + # "token": "USDC", + # "network": "base-sepolia", + # "pay_to_address": "0xYOUR_ADDRESS_HERE", + # }, + "enable_system_message": False, +} + + +if __name__ == "__main__": + bindufy(config, handler) From 9b674be3b69a29b29ed5415c8cc69b208871b7af Mon Sep 17 00:00:00 2001 From: Raahul Dutta Date: Tue, 19 May 2026 09:20:01 +0200 Subject: [PATCH 2/2] fixup(examples/bindu): match actual Bindu wire shape; add sample req/resp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit End-to-end testing against bindu 2026.21.1 revealed three issues in the first version: 1. Handler walked `parts[]`, but Bindu's manifest worker normalises A2A messages to OpenAI-style `{role, content}` before invoking the handler (text parts joined with " "). Result: the handler never received any contract text and always returned `no_contract`. Rewritten to read `m["content"]` directly — drops ~80 lines of dead code. 2. README's curl examples used `"id": "1"` for the JSON-RPC envelope. Bindu's pydantic validator rejects non-UUID ids with a 400. Switched all examples to proper zero-UUIDs. 3. File-upload path didn't actually work. Bindu's `FileInterceptor` reads flat `{kind, mimeType, data}` while the A2A `FilePart` schema (and the JSON-RPC validator) expect nested `{kind, file: {bytes, mimeType}}` — round-trip is broken in this version. Cut the file section; users pre-extract with `contractguard.parser.extract_text()` and send text instead. Also added a complete try-it-out section: - One-shot bash script that sends `examples/sample_lease.txt`, polls `tasks/get`, and prints the DID signature + parsed analysis JSON. - Sample request body (annotated A2A envelope). - Sample response — real `tasks/get` output captured by running this agent against the sample lease (lease type, F grade, 15/100, with trimmed red_flags / warnings / good_clauses / missing_protections). Verified end-to-end: server starts, agent card resolves with the right DID, `message/send` is accepted, handler runs, real OpenRouter call succeeds, result artifact carries a valid Ed25519 signature. --- examples/bindu/README.md | 311 +++++++++++++++++++++++++++++---------- examples/bindu/agent.py | 123 +++------------- 2 files changed, 251 insertions(+), 183 deletions(-) diff --git a/examples/bindu/README.md b/examples/bindu/README.md index 5a8fb91..1a7b2b2 100644 --- a/examples/bindu/README.md +++ b/examples/bindu/README.md @@ -8,30 +8,33 @@ network with a verifiable identity and (optionally) pay-per-scan via x402. ## Why pair them? ContractGuard is a great library, CLI, and Gradio app — but every integration -beyond that (Slack bot, VS Code extension, an orchestrator that chains it after -a "find me a lease" agent) ends up rebuilding the same plumbing: an HTTP -endpoint, an identity, an auth story, a way to charge for it. +beyond that (Slack bot, VS Code extension, an orchestrator that chains it +after a "find me a lease" agent) ends up rebuilding the same plumbing: an +HTTP endpoint, an identity, an auth story, a way to charge for it. [Bindu](https://github.com/getbindu/Bindu) is the plumbing. Wrap the analyzer in one `bindufy(config, handler)` call and you get: - **Discoverable agent card** at `/.well-known/agent.json` — agent - marketplaces and orchestrators can find ContractGuard and know what it does. -- **DID-based identity** (`did:bindu:…`) — every analysis is attributable to a - cryptographically-verifiable agent. Each result artifact is signed with the - agent's Ed25519 key, so a contract review can be presented as - tamper-evidence: "ContractGuard `did:bindu:…` said this at timestamp T." -- **A2A JSON-RPC** over HTTP — peers call `message/send` with either a `text` - part (paste the contract) or a base64 `file` part (PDF / DOCX / TXT / MD / - RTF), and get back the existing structured `AnalysisResult` as JSON. + marketplaces and orchestrators can find ContractGuard and know what it + does. +- **DID-based identity** (`did:bindu:…`) — every analysis is attributable to + a cryptographically-verifiable agent. Bindu signs each result artifact + with the agent's Ed25519 key (the signature shows up as + `did.message.signature` on the artifact part), so a contract review can be + presented as tamper-evidence: "ContractGuard `did:bindu:…` said this at + timestamp T." +- **A2A JSON-RPC** over HTTP — peers call `message/send` with the contract + as a `text` part and the handler returns the existing structured + `AnalysisResult` as JSON. - **Pay-per-scan via x402** — uncomment the `execution_cost` block in `agent.py` and the agent demands a USDC micropayment on Base before responding. No Stripe account, no login flow, no SaaS dashboard. - **OAuth2 / mTLS** (optional) for B2B deployments. The integration is purely additive: nothing in `contractguard/` changes, -`bindu` is not a required dependency, and the CLI / Python API / Gradio UI all -still work exactly as before. +`bindu` is not a required dependency, and the CLI / Python API / Gradio UI +all still work exactly as before. ## Setup @@ -55,101 +58,250 @@ You should see Bindu's startup banner and the agent listening on | Endpoint | What it is | |---|---| -| `GET /.well-known/agent.json` | Agent card — name, description, DID, skills, capabilities | +| `GET /.well-known/agent.json` | Agent card — name, description, DID, capabilities | | `GET /.well-known/did.json` | DID document with the agent's public key | | `POST /` | A2A JSON-RPC endpoint (use `method: "message/send"`) | -## Talk to it +Quick health check: -### Inline text +```bash +curl -s http://localhost:3773/health | python -m json.tool +``` + +## Try it out + +The A2A JSON-RPC `id` and all message-level IDs must be UUIDs. Examples +below use zero-UUIDs for readability — generate real ones with `uuidgen` for +production. + +### One-shot end-to-end script + +This script analyses the included `examples/sample_lease.txt` and prints the +final analysis JSON. Copy-paste it after starting the agent: ```bash -curl -sS http://localhost:3773/ \ - -H "Content-Type: application/json" \ - -d @- <<'EOF' +TASK_ID="00000000-0000-0000-0000-000000000013" + +# 1. Send the contract +python3 -c " +import json +contract = open('examples/sample_lease.txt').read() +print(json.dumps({ + 'jsonrpc':'2.0','id':'00000000-0000-0000-0000-000000000001', + 'method':'message/send', + 'params':{ + 'message':{ + 'role':'user','kind':'message', + 'messageId':'00000000-0000-0000-0000-000000000011', + 'contextId':'00000000-0000-0000-0000-000000000012', + 'taskId':'$TASK_ID', + 'parts':[{'kind':'text','text': contract}], + }, + 'configuration':{'acceptedOutputModes':['application/json']}, + } +}))" | curl -sS http://localhost:3773/ -H 'Content-Type: application/json' -d @- > /dev/null + +# 2. Poll until completed (typically <10s) +while true; do + state=$(curl -sS http://localhost:3773/ -H 'Content-Type: application/json' -d "{ + \"jsonrpc\":\"2.0\",\"id\":\"00000000-0000-0000-0000-000000000002\", + \"method\":\"tasks/get\",\"params\":{\"taskId\":\"$TASK_ID\"}}" \ + | python3 -c "import json,sys;print(json.load(sys.stdin)['result']['status']['state'])") + echo "state: $state" + [ "$state" = "completed" ] && break + sleep 2 +done + +# 3. Pretty-print the analysis JSON from the result artifact +curl -sS http://localhost:3773/ -H 'Content-Type: application/json' -d "{ + \"jsonrpc\":\"2.0\",\"id\":\"00000000-0000-0000-0000-000000000003\", + \"method\":\"tasks/get\",\"params\":{\"taskId\":\"$TASK_ID\"}}" \ + | python3 -c " +import json,sys +r = json.load(sys.stdin) +art = r['result']['artifacts'][0]['parts'][0] +print('DID signature:', art['metadata']['did.message.signature'][:40], '...') +print() +print(json.dumps(json.loads(art['text']), indent=2)) +" +``` + +### Sample request body + +`message/send` accepts an A2A JSON-RPC envelope. The handler reads every +`text` part on the user message and treats it as the contract: + +```json { "jsonrpc": "2.0", - "id": "1", + "id": "00000000-0000-0000-0000-000000000001", "method": "message/send", "params": { "message": { "role": "user", "kind": "message", - "messageId": "00000000-0000-0000-0000-000000000001", - "contextId": "00000000-0000-0000-0000-000000000002", - "taskId": "00000000-0000-0000-0000-000000000003", + "messageId": "00000000-0000-0000-0000-000000000011", + "contextId": "00000000-0000-0000-0000-000000000012", + "taskId": "00000000-0000-0000-0000-000000000013", "parts": [ - { "kind": "text", "text": "review this contract" }, - { "kind": "text", "text": "" } + { "kind": "text", "text": "" } ] }, "configuration": { "acceptedOutputModes": ["application/json"] } } } -EOF ``` -The first short `text` part is treated as a prompt and echoed back under -`prompt` in the result. Any subsequent text parts are concatenated and sent -through the analyzer. +`message/send` is async — the immediate response is a task object with +state `submitted`. The actual analysis arrives on the result artifact a few +seconds later; fetch it with `tasks/get` (params: `{"taskId": "..."}`). -### PDF / DOCX upload +### Sample response -A2A carries binary files as base64 inside a `file` part. One-liner to build -the payload from a real PDF: +After polling, `tasks/get` returns the completed task. The analysis JSON +lives on the result artifact's first text part; the part metadata carries +the DID signature over the result: -```bash -B64=$(base64 -i my-lease.pdf | tr -d '\n') -cat > /tmp/req.json <", + "name": "result", + "parts": [ + { + "kind": "text", + "text": "", + "metadata": { + "did.message.signature": "AA1sdxDhbTkDDHKD3tJWCDGDo9Lk5VdLjWYg…" + } + } + ] + } + ] } } -EOF +``` -curl -sS http://localhost:3773/ -H "Content-Type: application/json" -d @/tmp/req.json +Parsing the artifact's `text` field yields the `AnalysisResult` — captured +below by running this agent against `examples/sample_lease.txt`. Arrays +trimmed for readability; the full output contained 7 red flags, 4 +warnings, 2 protections, and 7 missing protections. + +```json +{ + "contract_type": "lease", + "summary": "This is a 12-month residential lease for an apartment in San Francisco with automatic renewal. The contract heavily favors the landlord with numerous tenant-unfriendly terms including non-refundable deposits, unlimited landlord access, and broad tenant liability.", + "parties": [ + "Apex Property Management LLC (Landlord)", + "Tenant" + ], + "key_terms": [ + "Duration: 12 months with auto-renewal", + "Rent: $3,200/month", + "Security deposit: $6,400 (non-refundable)", + "..." + ], + "red_flags": [ + { + "title": "Non-refundable security deposit", + "severity": "red", + "clause": "Section 3", + "quote": "The security deposit is non-refundable and shall be retained by Landlord upon termination of this Lease for any reason, including normal wear and tear.", + "explanation": "This violates California law. Security deposits must be refundable minus actual damages beyond normal wear and tear. A blanket non-refundable deposit is illegal in California.", + "suggestion": "Demand this clause be removed and replaced with standard California security deposit terms allowing refund minus legitimate damages." + }, + { + "title": "Unlimited landlord access without notice", + "severity": "red", + "clause": "Section 5", + "quote": "Landlord and Landlord's agents shall have the right to enter the Property at any time, with or without notice…", + "explanation": "This violates California Civil Code 1954, which requires 24-hour notice except for emergencies.", + "suggestion": "Replace with California-compliant language requiring 24-hour notice except for emergencies." + }, + "..." + ], + "warnings": [ + { + "title": "High early termination penalty", + "severity": "yellow", + "clause": "Section 1", + "quote": "Early termination by Tenant shall result in a penalty equal to three (3) months' rent.", + "explanation": "A 3-month penalty ($9,600) is quite steep and may exceed actual damages to landlord from early termination.", + "suggestion": "Negotiate for lower penalty (1-2 months) or ability to mitigate by helping find replacement tenant." + }, + "..." + ], + "good_clauses": [ + { + "title": "Clear rent amount and due date", + "clause": "Section 2", + "explanation": "The lease clearly states the monthly rent ($3,200) and due date (1st of each month)." + }, + { + "title": "Defined lease term", + "clause": "Section 1", + "explanation": "The lease has a clear start and end date, providing certainty about the rental period." + } + ], + "missing_protections": [ + "Habitability warranty from landlord", + "Tenant's right to make necessary repairs and deduct from rent", + "Protection against retaliatory eviction", + "..." + ], + "fairness_score": 15, + "fairness_grade": "F" +} ``` -Supported `mimeType` values match the existing parser: +### PDF / DOCX contracts -| MIME type | File type | -|---|---| -| `application/pdf` | PDF (parsed with `pdfplumber`) | -| `application/vnd.openxmlformats-officedocument.wordprocessingml.document` | DOCX | -| `text/plain` | TXT | -| `text/markdown` | MD | -| `text/rtf` / `application/rtf` | RTF | +This example accepts the contract as `text` only. To analyse a PDF or DOCX, +pre-extract the text on the client side and send it as a text part — +ContractGuard's own parser is the cleanest tool for the job: + +```python +from contractguard.parser import extract_text +contract_text = extract_text("my-lease.pdf") +# … then send contract_text as the `text` value of a text part +``` + +Bindu does ship a native file-extraction interceptor, but it does not +round-trip cleanly with the current A2A `FilePart` wire shape in this +version. Pre-extracting client-side is the reliable path until that's fixed +upstream. ### Response shape -The handler returns the existing -[`AnalysisResult`](../../contractguard/models.py) serialised to JSON inside -the A2A response artifact. The full structure — `contract_type`, `summary`, -`parties`, `key_terms`, `red_flags[]`, `warnings[]`, `good_clauses[]`, -`missing_protections[]`, `fairness_score`, `fairness_grade` — is unchanged -from the CLI's `--json` output, so any code that already consumes -`contractguard scan --json` works without modification. The `prompt` field is -added when the caller supplied a leading prompt text part. +The result artifact's text part contains the existing +[`AnalysisResult`](../../contractguard/models.py) serialised to JSON. The +full structure — `contract_type`, `summary`, `parties`, `key_terms`, +`red_flags[]`, `warnings[]`, `good_clauses[]`, `missing_protections[]`, +`fairness_score`, `fairness_grade` — is unchanged from the CLI's `--json` +output, so any code that already consumes `contractguard scan --json` works +without modification. The artifact part carries a `did.message.signature` +in its metadata: Ed25519 over the result, verifiable against the public key +in `/.well-known/did.json`. + +If the analyser raises (no API key, model error, schema mismatch), the +result is wrapped in a structured error instead: + +```json +{ "error": "analysis_failed", "message": "Error code: 401 - …" } +``` ## Charging per scan (x402) -[x402](https://www.x402.org/) is an open micropayment protocol — Bindu speaks -it natively. Uncomment the `execution_cost` block in +[x402](https://www.x402.org/) is an open micropayment protocol — Bindu +speaks it natively. Uncomment the `execution_cost` block in [`agent.py`](agent.py) and fill in your wallet: ```python @@ -162,18 +314,19 @@ it natively. Uncomment the `execution_cost` block in ``` The agent now responds with HTTP 402 to unauthenticated calls; the caller -attaches a USDC payment proof and the agent verifies + analyses. This is the -shortest path from "open-source CLI" to "monetised hosted service" without -adding a SaaS layer. +attaches a USDC payment proof and the agent verifies + analyses. This is +the shortest path from "open-source CLI" to "monetised hosted service" +without adding a SaaS layer. ## Limits and notes -- **Not legal advice.** Same caveat as the upstream CLI — this is a first-pass - filter, not a lawyer. +- **Not legal advice.** Same caveat as the upstream CLI — this is a + first-pass filter, not a lawyer. - **Contract length.** Inputs >120k chars are truncated (see - `MAX_CONTRACT_CHARS` in `contractguard.analyzer`). For 60+ page contracts, - pick a long-context model via `CONTRACTGUARD_MODEL=google/gemini-2.5-pro`. + `MAX_CONTRACT_CHARS` in `contractguard.analyzer`). For 60+ page + contracts, pick a long-context model via + `CONTRACTGUARD_MODEL=google/gemini-2.5-pro`. - **Streaming** isn't enabled — analyses come back as a single artifact. -- **Auth defaults are off.** Suitable for localhost / trusted networks. For a - public deployment set `AUTH__ENABLED=true` and follow Bindu's +- **Auth defaults are off.** Suitable for localhost / trusted networks. For + a public deployment set `AUTH__ENABLED=true` and follow Bindu's [`AUTH.md`](https://github.com/getbindu/Bindu/blob/main/docs/AUTH.md). diff --git a/examples/bindu/agent.py b/examples/bindu/agent.py index 58ef2b7..b48733f 100644 --- a/examples/bindu/agent.py +++ b/examples/bindu/agent.py @@ -2,13 +2,13 @@ Wraps `contractguard.analyzer.analyze_contract()` in a Bindu handler so the analyzer is reachable as a networked, DID-identified microservice over the -A2A JSON-RPC protocol. Peers send either: +A2A JSON-RPC protocol. Peers send the contract as `text` parts in a +`message/send` call and get back the structured `AnalysisResult` (red flags, +warnings, protections, fairness score) as JSON on the result artifact. - - a `text` part with the raw contract text, or - - a `file` part with a base64-encoded PDF / DOCX / TXT / MD, - -and get back the structured `AnalysisResult` (red flags, warnings, -protections, fairness score) as JSON. +For PDF / DOCX inputs, pre-extract on the client with +`contractguard.parser.extract_text()` and send the result as text — see the +README for why. Run: @@ -24,115 +24,34 @@ from __future__ import annotations -import base64 -import io import json import os -import tempfile -from pathlib import Path from bindu.penguin.bindufy import bindufy from contractguard.analyzer import DEFAULT_MODEL, analyze_contract -from contractguard.parser import extract_text - -# Map MIME types we accept to the suffix `contractguard.parser.extract_text` -# expects. The parser dispatches off the file suffix, so we materialise file -# bytes to a temp file with the right extension and let the existing -# pdfplumber / python-docx code paths do their thing. -MIME_TO_SUFFIX = { - "application/pdf": ".pdf", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", - "text/plain": ".txt", - "text/markdown": ".md", - "application/rtf": ".rtf", - "text/rtf": ".rtf", -} - - -def _decode_file_part(part: dict) -> tuple[bytes, str]: - """Return (file_bytes, mime_type) for a file part. Raises on malformed input.""" - file_info = part.get("file") or {} - payload = file_info.get("bytes") or file_info.get("data") - if not payload: - raise ValueError("file part is missing 'bytes' / 'data'") - file_bytes = base64.b64decode(payload) if isinstance(payload, str) else payload - mime_type = file_info.get("mimeType") or "" - return file_bytes, mime_type - - -def _extract_from_file_part(part: dict) -> str: - file_bytes, mime_type = _decode_file_part(part) - suffix = MIME_TO_SUFFIX.get(mime_type) - if not suffix: - raise ValueError( - f"unsupported mimeType {mime_type!r}; expected one of {sorted(MIME_TO_SUFFIX)}" - ) - with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as fh: - fh.write(file_bytes) - tmp_path = Path(fh.name) - try: - return extract_text(tmp_path) - finally: - tmp_path.unlink(missing_ok=True) - - -def _collect_inputs(messages: list[dict]) -> tuple[str | None, str]: - """Walk A2A messages and return (prompt, contract_text). - - `prompt` is currently informational (the analyzer doesn't take a free-form - prompt — the schema is fixed), but we surface it in the response so callers - can pass through their own context for downstream agents in a chain. - `contract_text` is the concatenated content of all text-after-prompt and - file parts found across user messages. - """ - prompt: str | None = None - text_chunks: list[str] = [] - text_part_count = 0 - - for msg in messages or []: - role = msg.get("role") - if role is not None and role != "user": - continue - for part in msg.get("parts") or []: - kind = part.get("kind") - if kind == "text": - text = part.get("text") or "" - text_part_count += 1 - # First text part is treated as a prompt (e.g. "review this - # lease"). Subsequent text parts are appended as contract text - # — lets callers paste the contract inline without a file - # upload. - if text_part_count == 1 and len(text) < 500: - prompt = text.strip() or None - else: - text_chunks.append(text) - elif kind == "file": - try: - text_chunks.append(_extract_from_file_part(part)) - except Exception as exc: # noqa: BLE001 — surfaced to caller below - text_chunks.append(f"[file part error: {exc}]") - - return prompt, "\n\n".join(c for c in text_chunks if c) def handler(messages: list[dict]) -> str: - """A2A handler: extract contract → analyse → return JSON string. + """A2A handler: pull contract text out of `messages`, analyse, return JSON. - Returning a `str` keeps the manifest worker happy; we encode the structured - `AnalysisResult` as JSON so peers can `JSON.parse()` it on the wire. The - agent card surfaces that the agent emits structured output (see config - below). + Bindu normalises A2A messages to `[{"role": "user"|"assistant", + "content": "..."}]` before calling us. Text parts are concatenated with + spaces; supported file parts (PDF / DOCX / text/plain) are extracted and + inlined. We just take everything the user said in this turn and treat it + as the contract. """ - prompt, contract_text = _collect_inputs(messages) + user_content = " ".join( + (m.get("content") or "") for m in (messages or []) if m.get("role") == "user" + ).strip() - if not contract_text or len(contract_text.strip()) < 50: + if len(user_content) < 50: return json.dumps( { "error": "no_contract", "message": ( "Send the contract as a `text` part (>= 50 chars) or as a " - "`file` part with mimeType pdf / docx / txt / md / rtf." + "`file` part with mimeType pdf, docx, or text/plain." ), } ) @@ -141,14 +60,11 @@ def handler(messages: list[dict]) -> str: lang = os.environ.get("CONTRACTGUARD_LANG", "en") try: - result = analyze_contract(contract_text=contract_text, model=model, lang=lang) + result = analyze_contract(contract_text=user_content, model=model, lang=lang) except Exception as exc: # noqa: BLE001 — propagate to caller as structured error return json.dumps({"error": "analysis_failed", "message": str(exc)}) - payload = result.model_dump(mode="json") - if prompt: - payload["prompt"] = prompt - return json.dumps(payload) + return json.dumps(result.model_dump(mode="json")) config = { @@ -174,7 +90,6 @@ def handler(messages: list[dict]) -> str: # "network": "base-sepolia", # "pay_to_address": "0xYOUR_ADDRESS_HERE", # }, - "enable_system_message": False, }