优先处理复杂示例
我们可以通过选择更复杂的示例来提高语言模型的性能。这些示例是指推理步骤更多或响应更长(当推理步骤不可用时)的示例。
如果没有任何示例可用,我们可以采样多个响应,并根据前面几个最复杂的示例生成答案。我们可以根据其推理步骤的长度来确定复杂性,这被称为基于复杂性的一致性 (Complexity Based Consistency) 1 。
我们可以使用 instructor
实现基于复杂性的一致性 (Complexity Based Consistency),如下所示。
import instructor
from openai import AsyncOpenAI
from pydantic import BaseModel, Field
from textwrap import dedent
import asyncio
from collections import Counter
import random
client = instructor.from_openai(AsyncOpenAI())
class ReasoningStep(BaseModel):
step: int = Field(..., description="The step number")
subquestion: str = Field(..., description="Subquestion to solve")
procedure: str = Field(
description="""Any intermediate computation
that was done in the reasoning process. Leave
empty if no computation is needed""",
)
result: str
class Response(BaseModel):
reasoning: list[ReasoningStep] = Field(
description="reasoning steps to derive answer",
)
correct_answer: int
async def generate_single_response(query: str, context: str) -> Response:
return await client.chat.completions.create(
model="gpt-4o",
response_model=Response,
messages=[
{
"role": "system",
"content": dedent(
f"""
You are an expert Question Answering system. Make sure
to output your reasoning in structured reasoning steps
before generating a response to the user's query.
Context:
{context}
Query:
{query}
"""
),
},
],
)
async def complexity_based_consistency(
query: str, context: str, samples: int, top_k: int
):
generated_responses = [
generate_single_response(query, context) for _ in range(samples)
]
responses = await asyncio.gather(*generated_responses)
sorted_responses = sorted(responses, key=lambda x: len(x.reasoning), reverse=True)
top_responses = sorted_responses[:top_k]
return top_responses
if __name__ == "__main__":
query = "How many loaves of bread did they have left?"
context = """
The bakers at the Beverly Hills Bakery baked
200 loaves of bread on Monday morning. They
sold 93 loaves in the morning and 39 loaves
in the afternoon. A grocery store returned 6
unsold loaves.
"""
number_of_reasoning_chains = 5
top_k_to_sample = 3
response = asyncio.run(
complexity_based_consistency(
query, context, number_of_reasoning_chains, top_k_to_sample
)
)
answer_counts = Counter([res.correct_answer for res in response])
most_common_count = answer_counts.most_common(len(answer_counts))[0][1]
max_answers = [
answer for answer, count in answer_counts.items() if count == most_common_count
]
final_answer = random.choice(max_answers)
print(final_answer)
#> 74
参考文献¶
1: 基于复杂性的多步推理提示