-
Notifications
You must be signed in to change notification settings - Fork 291
/
Copy pathcreate_benchmark.py
58 lines (52 loc) · 2.29 KB
/
create_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""Test our agent against a benchmark dataset.
This uses Langsmith. Please create and set your LangSmith API key.
The run_benchmark module runs against this dataset.
"""
import os
from config import set_environment
from langsmith import Client
set_environment()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "My Project"
client = Client()
questions = [
"A ship's parts are replaced over time until no original parts remain. "
"Is it still the same ship? Why or why not?",
# The Ship of Theseus Paradox
"If someone lived their whole life chained in a cave seeing only shadows, "
"how would they react if freed and shown the real world?",
# Plato's Allegory of the Cave
"Is something good because it is natural, or bad because it is unnatural? "
"Why can this be a faulty argument?",
# Appeal to Nature Fallacy
"If a coin is flipped 8 times and lands on heads each time, what are the odds "
"it will be tails next flip? Explain your reasoning.",
# Gambler's Fallacy
"Present two choices as the only options when others exist. Is the statement \"You're "
'either with us or against us" an example of false dilemma? Why?',
# False Dilemma
"Do people tend to develop a preference for things simply because they are "
"familiar with them? Does this impact reasoning?",
# Mere Exposure Effect
"Is it surprising that the universe is suitable for intelligent life since if "
"it weren't, no one would be around to observe it?",
# Anthropic Principle
"If Theseus' ship is restored by replacing each plank, is it still the same ship? "
"What is identity based on?",
# Theseus' Paradox
"Does doing one thing really mean that a chain of increasingly negative events will "
"follow? Why is this a problematic argument?",
# Slippery Slope Fallacy
"Is a claim true because it hasn't been proven false? Why could this impede reasoning?",
# Appeal to Ignorance
] # noqa: E501
shared_dataset_name = "Reasoning and Bias"
# create dataset on LangSmith:
ds = client.create_dataset(
dataset_name=shared_dataset_name,
description="A few reasoning and cognitive bias questions",
)
for q in questions:
client.create_example(inputs={"input": q}, dataset_id=ds.id)
if __name__ == "__main__":
pass