使用不同的示例子集

我们可以通过多次提示模型，每次使用不同的示例子集来最大化示例的使用。然后，我们可以将这些多个输出进行聚合，生成最终响应。这被称为演示集成（DENSE）¹。

为了简化本例，我们简单地迭代示例并将它们平均划分以获得大小相等的簇。然而，根据您的用例，您可能还需要考虑使用某种形式的嵌入聚类来采样这些示例。

我们可以使用 instructor 实现此功能，如下所示。

import instructor
from pydantic import BaseModel
from openai import AsyncOpenAI
import asyncio
from collections import Counter
from typing import Literal
from textwrap import dedent


class DemonstrationResponse(BaseModel):
    correct_answer: Literal["Positive", "Negative", "Neutral"]


client = instructor.from_openai(AsyncOpenAI())


async def generate_self_consistent_response(prompt: str, examples: list[str]):
    concetenated_examples = "\n".join(examples)
    return await client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": dedent(
                    f"""
                You are an intelligent AI System that excels
                at classifying user queries into three
                possible labels:
                - Positive
                - Negative
                - Neutral

                You are about to be given a user query and
                asked to classify it into one of the three
                categories. Make sure to refer closely to
                the examples provided to you, examining each
                individual example before coming up with the
                final answer.

                Here are the examples:
                {concetenated_examples}
                """
                ),
            },
            {"role": "user", "content": prompt},
        ],
        response_model=DemonstrationResponse,
        temperature=0,
    )


async def generate_self_consistent_responses(
    prompt: str, num_responses: int, examples: list[str]
):
    assert (
        len(examples) % num_responses == 0
    ), "The number of examples must be evenly divisible by num_responses"

    # Batch the examples into num_responses batches
    batch_size = len(examples) // num_responses

    coros = [
        generate_self_consistent_response(prompt, examples[i : i + batch_size])
        for i in range(0, len(examples), batch_size)
    ]

    responses = await asyncio.gather(*coros)
    return responses


if __name__ == "__main__":
    user_query = "What is the weather like today?"
    examples = [
        "I love this product! [Positive]",
        "This is the worst service ever. [Negative]",
        "The movie was okay, not great but not terrible. [Neutral]",
        "I'm so happy with my new phone! [Positive]",
        "The food was terrible and the service was slow. [Negative]",
        "It's an average day, nothing special. [Neutral]",
        "Fantastic experience, will come again! [Positive]",
        "I wouldn't recommend this to anyone. [Negative]",
        "The book was neither good nor bad. [Neutral]",
        "Absolutely thrilled with the results! [Positive]",
    ]
    responses = asyncio.run(generate_self_consistent_responses(user_query, 5, examples))
    answer_counts = Counter([response.correct_answer for response in responses])
    most_common_answer, _ = answer_counts.most_common(1)[0]
    print(most_common_answer)
    #> Neutral

参考文献¶

¹: 探索上下文学习中的演示集成