自动化示例选择
如何提高 few-shot CoT 的性能?
虽然 few-shot CoT 推理有效,但其有效性依赖于手动制作的示例。此外,选择多样化的示例已被证明能有效减少 CoT 的推理错误。
在这里,我们自动化 CoT 来选择多样化示例。给定潜在示例列表
- 聚类: 对潜在示例进行聚类
- 采样: 对于每个聚类,
- 按与聚类中心的距离排序示例
- 选择满足预定义选择标准的第一个示例
- 提示: 将从每个聚类中选出的问题作为示例纳入 LLM 提示中
信息
一个示例选择标准可以是限制推理步骤最多为 5 步,以鼓励采样具有更简单理由的示例。
import instructor
import numpy as np
from openai import OpenAI
from pydantic import BaseModel
from sklearn.cluster import KMeans
from sentence_transformers import SentenceTransformer
client = instructor.patch(OpenAI())
NUM_CLUSTERS = 2
class Example(BaseModel):
question: str
reasoning_steps: list[str]
class FinalAnswer(BaseModel):
reasoning_steps: list[str]
answer: int
def cluster_and_sort(questions, n_clusters=NUM_CLUSTERS):
# Cluster
embeddings = SentenceTransformer('all-MiniLM-L6-v2').encode(questions)
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10).fit(embeddings)
# Sort
sorted_clusters = [[] for _ in range(kmeans.n_clusters)]
for question, embedding, label in zip(questions, embeddings, kmeans.labels_):
center = kmeans.cluster_centers_[label]
distance = np.linalg.norm(embedding - center)
sorted_clusters[label].append((distance, question))
for cluster in sorted_clusters:
cluster.sort() # Sort by distance
return sorted_clusters
def sample(cluster):
for question in cluster:
response = client.chat.completions.create(
model="gpt-4o",
response_model=Example,
messages=[
{
"role": "system",
"content": "You are an AI assistant that generates step-by-step reasoning for mathematical questions.",
},
{
"role": "user",
"content": f"Q: {question}\nA: Let's think step by step.",
},
],
)
if (
len(response.reasoning_steps) <= 5
): # If we satisfy the selection criteria, we've found our question for this cluster
return response
if __name__ == "__main__":
questions = [
"How many apples are left if you have 10 apples and eat 3?",
"What's the sum of 5 and 7?",
"If you have 15 candies and give 6 to your friend, how many do you have left?",
"What's 8 plus 4?",
"You start with 20 stickers and use 8. How many stickers remain?",
"Calculate 6 added to 9.",
]
# Cluster and sort the questions
sorted_clusters = cluster_and_sort(questions)
# Sample questions that match selection criteria for each cluster
selected_examples = [sample(cluster) for cluster in sorted_clusters]
print(selected_examples)
"""
[
Example(
question='If you have 15 candies and give 6 to your friend, how many do you have left?',
reasoning_steps=[
'Start with the total number of candies you have, which is 15.',
'Subtract the number of candies you give to your friend, which is 6, from the total candies.',
'15 - 6 = 9, so you are left with 9 candies.',
],
),
Example(
question="What's the sum of 5 and 7?",
reasoning_steps=[
'Identify the numbers to be added: 5 and 7.',
'Perform the addition: 5 + 7.',
'The sum is 12.',
],
),
]
"""
# Use selected questions as examples for the LLM
response = client.chat.completions.create(
model="gpt-4o",
response_model=FinalAnswer,
messages=[
{
"role": "user",
"content": f"""
{selected_examples}
If there are 10 books in my bad and I read 8 of them, how many books do I have left? Let's think step by step.
""",
}
],
)
print(response.reasoning_steps)
"""
[
'Start with the total number of books in the bag, which is 10.',
"Subtract the number of books you've read, which is 8, from the total books.",
'10 - 8 = 2, so you have 2 books left.',
]
"""
print(response.answer)
#> 2