From 37405083755afa94e638d74da08bae30859c62c1 Mon Sep 17 00:00:00 2001 From: Char15Xu Date: Fri, 9 Aug 2024 17:59:05 -0700 Subject: [PATCH 1/6] add anthropic model support, testing needed --- .DS_Store | Bin 0 -> 6148 bytes Makefile | 4 + benchmark/.DS_Store | Bin 0 -> 6148 bytes examples/model/claude.py | 34 +++++ libem/core/model/__init__.py | 6 +- libem/core/model/claude.py | 247 +++++++++++++++++++++++++++++++++++ libem/parameter.py | 2 +- 7 files changed, 291 insertions(+), 2 deletions(-) create mode 100644 .DS_Store create mode 100644 benchmark/.DS_Store create mode 100644 examples/model/claude.py create mode 100644 libem/core/model/claude.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2108caa4ed676c146b85d919c6d3217b69c16dd1 GIT binary patch literal 6148 zcmeHKJ5EC}5S%3`f@o4w`U>2@ioyxFKz>L_1R{abzlw8lG-f|Vq6b}QqFHG@_Ik&b zr+E7ofGv)@2Vf3hMt8)Q4`cIv_mSOH#E5jB@q#z3@qyRf=P3L4fOBuL!86{mJ>_rr zo5Nx6+cGI21*Cu!kOETRf(le|o!wpVOdTZ!q`-A3;NOQvckG2jVthI{L<>NiG91Qv z^b*A80b(y45*eXcQi(~mYB4P7jJL|`g+pS}Vby$C-E7sNSlrI@TcpE!qDCno1N(xAUt5U$G%ctdnPpaBFdYsqVMqklA=bP@vc~Ce+IVMIq=EBSI deI#XG^Evl>;gA?~#)D4O&w%S9lLG&(zz<2<6@LH# literal 0 HcmV?d00001 diff --git a/Makefile b/Makefile index 6314069..dbf84c8 100644 --- a/Makefile +++ b/Makefile @@ -153,3 +153,7 @@ duckdb: python examples/apps/integration/duckdb_cluster.py mongodb: python examples/apps/integration/mongodb_cluster.py + +.PHONY: claude +claude: + python examples/model/claude.py \ No newline at end of file diff --git a/benchmark/.DS_Store b/benchmark/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..601d5e8aa0d9ab99cb009600950a5548a9560fbb GIT binary patch literal 6148 zcmeHKJ5EC}5S)cbM50NV(pTUHRuoRa1rQ|NlOpj*|0>SK(K7ofh#quF5i~2U$6oK) z@)U310@7;iWrg3GX{KO+>P(U+hLadd%(F5_`n_`jz9T_ z!M9{mKnh3!DIf);z=aj40=s;?@VPop3P^$5SHQmyjqcbBr^NVlaEKOwxL`Pp^XMgr z%>%?=eJ0Q^+b(QKnk2IaG%SS z*Z*hwAN~J1Nh>KJ1^$%+w%ERGH+)jn*4g8{);9Vp-E+R_Zkz{&LzH7;lw&Tu9IqoO a^P11O-wUV2pfetHqJ9Qk7nu~ewgM;M))mA6 literal 0 HcmV?d00001 diff --git a/examples/model/claude.py b/examples/model/claude.py new file mode 100644 index 0000000..940ea22 --- /dev/null +++ b/examples/model/claude.py @@ -0,0 +1,34 @@ +import libem + +from libem.match.prompt import rules + +def positive(): + e1 = "Dyson Hot+Cool AM09 Jet Focus heater and fan, White/Silver" + e2 = "Dyson AM09 Hot + Cool Jet Focus Fan Heater - W/S" + + is_match = libem.match(e1, e2) + + print("Entity 1:", e1) + print("Entity 2:", e2) + print("Match:", is_match['answer']) + +def negative(): + e1 = "Dyson Hot+Cool AM09 Jet Focus heater and fan, White/Silver" + e2 = "Dyson AM09 Hot + Cool Jet Focus Fan Heater - Black japan" + + rules.add("Color differentiates entities.") + is_match = libem.match(e1, e2) + + print("Entity 1:", e1) + print("Entity 2:", e2) + print("Match:", is_match['answer']) + +def main(): + libem.calibrate({ + "libem.match.parameter.model": "claude-3-5-sonnet-20240620", + }, verbose=True) + positive() + negative() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/libem/core/model/__init__.py b/libem/core/model/__init__.py index 79f6d6c..51cac24 100644 --- a/libem/core/model/__init__.py +++ b/libem/core/model/__init__.py @@ -1,5 +1,5 @@ from libem.core.model import ( - openai, llama + openai, llama, claude ) from libem.core import exec import libem @@ -15,6 +15,10 @@ async def async_call(*args, **kwargs) -> dict: return llama.call(*args, **kwargs) elif kwargs.get("model", "") == "llama3.1": return llama.call(*args, **kwargs) + elif kwargs.get("model", "") == "llama3.1": + return llama.call(*args, **kwargs) + elif kwargs.get("model", "") == "claude-3-5-sonnet-20240620": + return await claude.call(*args, **kwargs) else: return await openai.async_call(*args, **kwargs) diff --git a/libem/core/model/claude.py b/libem/core/model/claude.py new file mode 100644 index 0000000..e3d7b4f --- /dev/null +++ b/libem/core/model/claude.py @@ -0,0 +1,247 @@ +import os +import json +import httpx +import importlib +import inspect +import asyncio +from anthropic import ( + AsyncAnthropic, APITimeoutError +) + +import libem +from libem.core import exec + +os.environ.setdefault( + "CLAUDE_API_KEY", + libem.LIBEM_CONFIG.get("CLAUDE_API_KEY", "") +) + +_client = None + + +def get_client(): + global _client + + if not os.environ.get("CLAUDE_API_KEY"): + raise EnvironmentError(f"CLAUDE_API_KEY is not set.") + + if not _client: + _client = AsyncAnthropic( + api_key=os.environ["CLAUDE_API_KEY"], + http_client=httpx.AsyncClient( + limits=httpx.Limits( + max_connections=1000, + max_keepalive_connections=100 + ) + ) + ) + return _client + + +def call(*args, **kwargs) -> dict: + return exec.run_async_task( + async_call(*args, **kwargs) + ) + + +# Model call with multiple rounds of tool use +async def async_call( + prompt: str | list | dict, + tools: list[str] = None, + context: list = None, + model: str = "claude-3-5-sonnet-20240620", + temperature: float = 0.0, + seed: int = None, + max_model_call: int = 3, +) -> dict: + client = get_client() + + context = context or [] + + # format the prompt to messages + system_message = None + user_messages = [] + + match prompt: + case list(): + for msg in prompt: + if msg["role"] == "system": + system_message = msg["content"] + else: + user_messages.append(msg) + case dict(): + for role, content in prompt.items(): + if role == "system": + system_message = content + else: + user_messages.append({"role": role, "content": content}) + case str(): + user_messages = [{"role": "user", "content": prompt}] + case _: + raise ValueError(f"Invalid prompt type: {type(prompt)}") + + # Handle context + for msg in context: + if msg["role"] == "system": + if system_message is None: + system_message = msg["content"] + else: + system_message += "\n" + msg["content"] + else: + user_messages.insert(0, msg) + + messages = user_messages + + # trace variables + num_model_calls = 0 + num_input_tokens, num_output_tokens = 0, 0 + tool_usages, tool_outputs = [], [] + + """Start call""" + + if not tools: + try: + response = await client.messages.create( + messages=messages, + system=system_message, + model=model, + temperature=temperature, + max_tokens = 1000, + ) + except APITimeoutError as e: # catch timeout error + raise libem.ModelTimedoutException(e) + + response_message = response.content[0].text + print(response_message) + num_model_calls += 1 + num_input_tokens += response.usage.input_tokens + num_output_tokens += response.usage.input_tokens + else: + # Load the tool modules + tools = [importlib.import_module(tool) for tool in tools] + + # Get the functions from the tools and + # prefer async functions if available + available_functions = { + tool.name: getattr(tool, 'async_func', tool.func) + for tool in tools + } + + # Get the schema from the tools + tools = [tool.schema for tool in tools] + + # Call model + try: + response = await client.messages.create( + messages=messages, + system=system_message, + tools=tools, + tool_choice="auto", + model=model, + temperature=temperature, + max_tokens = 1000, + ) + + except APITimeoutError as e: # catch timeout error + raise libem.ModelTimedoutException(e) + + response_message = response.content[0].text + tool_uses = response_message.tool_use + + num_model_calls += 1 + num_input_tokens += response.usage.input_tokens + num_output_tokens += response.usage.input_tokens + + # Call tools + while tool_use: + messages.append(response_message) + + for tool_use in tool_uses: + function_name = tool_use.name + function_to_call = available_functions[function_name] + function_args = json.loads(tool_use.input) + + libem.debug(f"[{function_name}] {function_args}") + + if inspect.iscoroutinefunction(function_to_call): + function_response = function_to_call(**function_args) + else: + function_response = function_to_call(**function_args) + + messages.append( + { + "role": "tool", + "name": function_name, + "content": str(function_response), + "tool_use_id": tool_use.id, + } + ) + + tool_usages.append({ + "id": tool_use.id, + 'name': function_name, + "arguments": function_args, + "response": function_response, + }) + + tool_outputs.append({ + function_name: function_response, + }) + + tool_uses = [] + + if num_model_calls < max_model_call: + # Call the model again with the tool outcomes + try: + response = await client.messages.create( + messages=messages, + system=system_message, + tools=tools, + tool_choice="auto", + model=model, + temperature=temperature, + max_tokens = 1000, + ) + except APITimeoutError as e: # catch timeout error + raise libem.ModelTimedoutException(e) + + response_message = response.content[0].text + tool_uses = response_message.tool_use + + num_model_calls += 1 + num_input_tokens += response.usage.input_tokens + num_output_tokens += response.usage.input_tokens + + if num_model_calls == max_model_call: + libem.debug(f"[model] max call reached: " + f"{messages}\n{response_message}") + + """End call""" + + messages.append(response_message) + + libem.trace.add({ + "model": { + "messages": messages, + "tool_usages": tool_usages, + "num_model_calls": num_model_calls, + "num_input_tokens": num_input_tokens, + "num_output_tokens": num_output_tokens, + } + }) + + return { + "output": response_message, + "tool_outputs": tool_outputs, + "messages": messages, + "stats": { + "num_model_calls": num_model_calls, + "num_input_tokens": num_input_tokens, + "num_output_tokens": num_output_tokens, + } + } + + +def reset(): + global _client + _client = None \ No newline at end of file diff --git a/libem/parameter.py b/libem/parameter.py index 60b50c2..1535013 100644 --- a/libem/parameter.py +++ b/libem/parameter.py @@ -4,7 +4,7 @@ default="gpt-4o-2024-08-06", options=["gpt-4o","gpt-4o-mini", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo", - "llama3", "llama3.1"] + "llama3", "llama3.1", "claude-3-5-sonnet-20240620"] ) temperature = Parameter( default=0, From eb1f549375ee083b43cf1e71f1e3db91e39e33e7 Mon Sep 17 00:00:00 2001 From: Char15Xu Date: Fri, 9 Aug 2024 18:02:43 -0700 Subject: [PATCH 2/6] fixed conflict error in libem/core/model/init.py --- libem/core/model/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/libem/core/model/__init__.py b/libem/core/model/__init__.py index 51cac24..4e68086 100644 --- a/libem/core/model/__init__.py +++ b/libem/core/model/__init__.py @@ -15,8 +15,6 @@ async def async_call(*args, **kwargs) -> dict: return llama.call(*args, **kwargs) elif kwargs.get("model", "") == "llama3.1": return llama.call(*args, **kwargs) - elif kwargs.get("model", "") == "llama3.1": - return llama.call(*args, **kwargs) elif kwargs.get("model", "") == "claude-3-5-sonnet-20240620": return await claude.call(*args, **kwargs) else: From 963c3b1655a8f4d8c1b47cab2445c6794908ad67 Mon Sep 17 00:00:00 2001 From: Char15Xu Date: Tue, 13 Aug 2024 00:05:41 -0700 Subject: [PATCH 3/6] merge changes --- .DS_Store | Bin 6148 -> 0 bytes benchmark/.DS_Store | Bin 6148 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store delete mode 100644 benchmark/.DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 2108caa4ed676c146b85d919c6d3217b69c16dd1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ5EC}5S%3`f@o4w`U>2@ioyxFKz>L_1R{abzlw8lG-f|Vq6b}QqFHG@_Ik&b zr+E7ofGv)@2Vf3hMt8)Q4`cIv_mSOH#E5jB@q#z3@qyRf=P3L4fOBuL!86{mJ>_rr zo5Nx6+cGI21*Cu!kOETRf(le|o!wpVOdTZ!q`-A3;NOQvckG2jVthI{L<>NiG91Qv z^b*A80b(y45*eXcQi(~mYB4P7jJL|`g+pS}Vby$C-E7sNSlrI@TcpE!qDCno1N(xAUt5U$G%ctdnPpaBFdYsqVMqklA=bP@vc~Ce+IVMIq=EBSI deI#XG^Evl>;gA?~#)D4O&w%S9lLG&(zz<2<6@LH# diff --git a/benchmark/.DS_Store b/benchmark/.DS_Store deleted file mode 100644 index 601d5e8aa0d9ab99cb009600950a5548a9560fbb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ5EC}5S)cbM50NV(pTUHRuoRa1rQ|NlOpj*|0>SK(K7ofh#quF5i~2U$6oK) z@)U310@7;iWrg3GX{KO+>P(U+hLadd%(F5_`n_`jz9T_ z!M9{mKnh3!DIf);z=aj40=s;?@VPop3P^$5SHQmyjqcbBr^NVlaEKOwxL`Pp^XMgr z%>%?=eJ0Q^+b(QKnk2IaG%SS z*Z*hwAN~J1Nh>KJ1^$%+w%ERGH+)jn*4g8{);9Vp-E+R_Zkz{&LzH7;lw&Tu9IqoO a^P11O-wUV2pfetHqJ9Qk7nu~ewgM;M))mA6 From 7d2d09140e2822fb4bd9e55a3551fdc40a93d489 Mon Sep 17 00:00:00 2001 From: Char15Xu Date: Tue, 13 Aug 2024 00:12:49 -0700 Subject: [PATCH 4/6] add claude key configure support in libem.cli and syntax revisions --- Makefile | 11 ++++++----- cli/libem | 20 +++++++++++++++----- libem/core/model/__init__.py | 16 ++++++++-------- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index dbf84c8..dd49133 100644 --- a/Makefile +++ b/Makefile @@ -60,6 +60,11 @@ async_batch: python examples/optimize/async_batch.py batch: async_batch +# claude model example +.PHONY: claude +claude: + python examples/model/claude.py + # opensource model examples .PHONY: mlx_lm llama_cpp llama_ex local mlx_lm: @@ -152,8 +157,4 @@ link: duckdb: python examples/apps/integration/duckdb_cluster.py mongodb: - python examples/apps/integration/mongodb_cluster.py - -.PHONY: claude -claude: - python examples/model/claude.py \ No newline at end of file + python examples/apps/integration/mongodb_cluster.py \ No newline at end of file diff --git a/cli/libem b/cli/libem index 81b870d..11c8b3f 100644 --- a/cli/libem +++ b/cli/libem @@ -82,13 +82,23 @@ def configure(args): config = {} # Prompt for OPENAI_API_KEY - existing_key = config.get('OPENAI_API_KEY', '') - new_key = input(f"Enter OPENAI_API_KEY (press Enter to keep the existing key: " - f"'{mask_key(existing_key)}'): ").strip() + existing_openai_key = config.get('OPENAI_API_KEY', '') + new_openai_key = input(f"Enter OPENAI_API_KEY (press Enter to keep the existing key: " + f"'{mask_key(existing_openai_key)}'): ").strip() # If no input, keep the existing key; otherwise, update - if new_key: - config['OPENAI_API_KEY'] = new_key + if new_openai_key: + config['OPENAI_API_KEY'] = new_openai_key + else: + print("No input provided. Using existing API key.") + + existing_claude_key = config.get('CLAUDE_API_KEY', '') + new_claude_key = input(f"Enter CLAUDE_API_KEY (press Enter to keep the existing key: " + f"'{mask_key(existing_claude_key)}'): ").strip() + + # If no input, keep the existing key; otherwise, update + if new_claude_key: + config['CLAUDE_API_KEY'] = new_claude_key else: print("No input provided. Using existing API key.") diff --git a/libem/core/model/__init__.py b/libem/core/model/__init__.py index 4e68086..807e9d5 100644 --- a/libem/core/model/__init__.py +++ b/libem/core/model/__init__.py @@ -11,16 +11,16 @@ def call(*args, **kwargs) -> dict: async def async_call(*args, **kwargs) -> dict: - if kwargs.get("model", "") == "llama3": - return llama.call(*args, **kwargs) - elif kwargs.get("model", "") == "llama3.1": - return llama.call(*args, **kwargs) - elif kwargs.get("model", "") == "claude-3-5-sonnet-20240620": - return await claude.call(*args, **kwargs) - else: - return await openai.async_call(*args, **kwargs) + match kwargs.get("model", ""): + case "llama3" | "llama3.1": + return llama.call(*args, **kwargs) + case "claude-3-5-sonnet-20240620": + return await claude.call(*args, **kwargs) + case _: + return await openai.async_call(*args, **kwargs) def reset(): openai.reset() + claude.reset() llama.reset() From d2e5c9e9781fbfb20e30f33dcdc1ea5ea12e8c87 Mon Sep 17 00:00:00 2001 From: Char15Xu Date: Tue, 20 Aug 2024 15:17:53 -0700 Subject: [PATCH 5/6] claude successfully work on examples/model/claude.py --- libem/core/model/claude.py | 2 +- libem/match/prompt.py | 2 +- requirements.txt | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libem/core/model/claude.py b/libem/core/model/claude.py index e3d7b4f..59c5e7b 100644 --- a/libem/core/model/claude.py +++ b/libem/core/model/claude.py @@ -3,7 +3,7 @@ import httpx import importlib import inspect -import asyncio + from anthropic import ( AsyncAnthropic, APITimeoutError ) diff --git a/libem/match/prompt.py b/libem/match/prompt.py index c4548d8..ba4f58c 100644 --- a/libem/match/prompt.py +++ b/libem/match/prompt.py @@ -26,7 +26,7 @@ lambda: "strict" if model() in { "llama3", "llama3.1", - "gpt-4o-2024-08-06" + "gpt-4o-2024-08-06", "claude-3-5-sonnet-20240620", } else "standard" ), diff --git a/requirements.txt b/requirements.txt index 34e7c90..976590b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ tqdm recordlinkage duckdb pymongo +anthropic From 9b4c0a5ed9297e90b236765afe0015886b7efc00 Mon Sep 17 00:00:00 2001 From: Char15Xu Date: Sat, 24 Aug 2024 15:17:54 -0700 Subject: [PATCH 6/6] delete print statement in cli/libem --- benchmark/README.md | 2 +- cli/libem | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/benchmark/README.md b/benchmark/README.md index e2936a6..77a2e7b 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -176,7 +176,7 @@ python -m benchmark.run -s ### Meta-Llama3-8B-Instruct-8bit -> Llama model runs on Apple M2 silicon +> Llama model runs on Apple M1 silicon | Benchmark | Precision | Recall | F1 | Cost ($) | Pairs per $ | Throughput (pps) | |:--------------:|:---------:|:------:|:-----:|:----------------:|:----------------:|:----------------:| diff --git a/cli/libem b/cli/libem index 11c8b3f..4db115d 100644 --- a/cli/libem +++ b/cli/libem @@ -89,8 +89,6 @@ def configure(args): # If no input, keep the existing key; otherwise, update if new_openai_key: config['OPENAI_API_KEY'] = new_openai_key - else: - print("No input provided. Using existing API key.") existing_claude_key = config.get('CLAUDE_API_KEY', '') new_claude_key = input(f"Enter CLAUDE_API_KEY (press Enter to keep the existing key: " @@ -99,8 +97,6 @@ def configure(args): # If no input, keep the existing key; otherwise, update if new_claude_key: config['CLAUDE_API_KEY'] = new_claude_key - else: - print("No input provided. Using existing API key.") ...