跳到内容

自动化示例选择

如何提高 few-shot CoT 的性能?

虽然 few-shot CoT 推理有效,但其有效性依赖于手动制作的示例。此外,选择多样化的示例已被证明能有效减少 CoT 的推理错误。

在这里,我们自动化 CoT 来选择多样化示例。给定潜在示例列表

  1. 聚类: 对潜在示例进行聚类
  2. 采样: 对于每个聚类,
  3. 按与聚类中心的距离排序示例
  4. 选择满足预定义选择标准的第一个示例
  5. 提示: 将从每个聚类中选出的问题作为示例纳入 LLM 提示中

信息

一个示例选择标准可以是限制推理步骤最多为 5 步,以鼓励采样具有更简单理由的示例。

import instructor
import numpy as np
from openai import OpenAI
from pydantic import BaseModel
from sklearn.cluster import KMeans
from sentence_transformers import SentenceTransformer

client = instructor.patch(OpenAI())
NUM_CLUSTERS = 2


class Example(BaseModel):
    question: str
    reasoning_steps: list[str]


class FinalAnswer(BaseModel):
    reasoning_steps: list[str]
    answer: int


def cluster_and_sort(questions, n_clusters=NUM_CLUSTERS):
    # Cluster
    embeddings = SentenceTransformer('all-MiniLM-L6-v2').encode(questions)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10).fit(embeddings)

    # Sort
    sorted_clusters = [[] for _ in range(kmeans.n_clusters)]
    for question, embedding, label in zip(questions, embeddings, kmeans.labels_):
        center = kmeans.cluster_centers_[label]
        distance = np.linalg.norm(embedding - center)
        sorted_clusters[label].append((distance, question))
    for cluster in sorted_clusters:
        cluster.sort()  # Sort by distance

    return sorted_clusters


def sample(cluster):
    for question in cluster:
        response = client.chat.completions.create(
            model="gpt-4o",
            response_model=Example,
            messages=[
                {
                    "role": "system",
                    "content": "You are an AI assistant that generates step-by-step reasoning for mathematical questions.",
                },
                {
                    "role": "user",
                    "content": f"Q: {question}\nA: Let's think step by step.",
                },
            ],
        )
        if (
            len(response.reasoning_steps) <= 5
        ):  # If we satisfy the selection criteria, we've found our question for this cluster
            return response


if __name__ == "__main__":
    questions = [
        "How many apples are left if you have 10 apples and eat 3?",
        "What's the sum of 5 and 7?",
        "If you have 15 candies and give 6 to your friend, how many do you have left?",
        "What's 8 plus 4?",
        "You start with 20 stickers and use 8. How many stickers remain?",
        "Calculate 6 added to 9.",
    ]

    # Cluster and sort the questions
    sorted_clusters = cluster_and_sort(questions)

    # Sample questions that match selection criteria for each cluster
    selected_examples = [sample(cluster) for cluster in sorted_clusters]
    print(selected_examples)
    """
    [
        Example(
            question='If you have 15 candies and give 6 to your friend, how many do you have left?',
            reasoning_steps=[
                'Start with the total number of candies you have, which is 15.',
                'Subtract the number of candies you give to your friend, which is 6, from the total candies.',
                '15 - 6 = 9, so you are left with 9 candies.',
            ],
        ),
        Example(
            question="What's the sum of 5 and 7?",
            reasoning_steps=[
                'Identify the numbers to be added: 5 and 7.',
                'Perform the addition: 5 + 7.',
                'The sum is 12.',
            ],
        ),
    ]
    """

    # Use selected questions as examples for the LLM
    response = client.chat.completions.create(
        model="gpt-4o",
        response_model=FinalAnswer,
        messages=[
            {
                "role": "user",
                "content": f"""
                {selected_examples}
                If there are 10 books in my bad and I read 8 of them, how many books do I have left? Let's think step by step.
                """,
            }
        ],
    )

    print(response.reasoning_steps)
    """
    [
        'Start with the total number of books in the bag, which is 10.',
        "Subtract the number of books you've read, which is 8, from the total books.",
        '10 - 8 = 2, so you have 2 books left.',
    ]
    """
    print(response.answer)
    #> 2

参考文献

1: 大型语言模型中的自动思维链提示

*: 提示报告:提示技术系统性调查