跳到内容

独立验证响应

验证链(CoVe)1是一种允许我们验证 LLM 生成的响应的方法。我们可以使用以下步骤来完成:

  1. 首先,我们让 LLM 生成对查询的响应。
  2. 然后,我们生成一组需要回答的后续问题,以验证响应。
  3. 接着,我们独立生成对这些问题的响应集。
  4. 最后,我们使用最终的 LLM 调用,根据我们生成的新问题和答案对来验证响应。
import instructor
from openai import AsyncOpenAI
from pydantic import BaseModel, Field
import asyncio

client = instructor.from_openai(AsyncOpenAI())


class QueryResponse(BaseModel):
    correct_answer: str


class ValidationQuestions(BaseModel):
    question: list[str] = Field(
        description="""A list of questions that need to be
        answered to validate the response"""
    )


class ValidationAnswer(BaseModel):
    answer: str


class FinalResponse(BaseModel):
    correct_answer: str


async def generate_initial_response(query: str):
    return await client.chat.completions.create(
        model="gpt-4o",
        response_model=QueryResponse,
        messages=[
            {
                "role": "system",
                "content": "You are an expert question answering system",
            },
            {"role": "user", "content": query},
        ],
    )


async def generate_verification_questions(llm_response: str):
    return await client.chat.completions.create(
        model="gpt-4o",
        response_model=ValidationQuestions,
        messages=[
            {
                "role": "system",
                "content": """You are an expert AI system that excels at
                generating follow up questions to validate a response.
                These questions should validate key assumptions, facts
                and other important portions of the generated response""",
            },
            {"role": "user", "content": llm_response},
        ],
    )


async def generate_verification_response(questions: list[str]):
    async def verify_question(question: str) -> tuple[ValidationAnswer, str]:
        return (
            await client.chat.completions.create(
                model="gpt-4o",
                response_model=ValidationAnswer,
                messages=[
                    {
                        "role": "system",
                        "content": """You are an expert AI system that
                        excels at answering validation questions.""",
                    },
                    {"role": "user", "content": question},
                ],
            ),
            question,
        )

    coros = [verify_question(question) for question in questions]
    return await asyncio.gather(*coros)


async def generate_final_response(
    answers: list[tuple[ValidationAnswer, str]],
    initial_response: QueryResponse,
    original_query: str,
):
    formatted_answers = "\n".join(
        [f"Q: {question}\nA: {answer.answer}" for answer, question in answers]
    )
    return await client.chat.completions.create(
        model="gpt-4o",
        response_model=FinalResponse,
        messages=[
            {
                "role": "system",
                "content": """You are an expert AI system that excels at
                validating and verifying if an initial answer answers an
                initial query based off some Verification Questions and
                Answers provided. Return the original answer if it is
                valid else generate a new response off the verification
                questions and answers provided.""",
            },
            {
                "role": "user",
                "content": f"""
                Initial query: {original_query}
                Initial Answer : {initial_response.correct_answer}
                Verification Questions and Answers:
                {formatted_answers}
            """,
            },
        ],
    )


if __name__ == "__main__":
    query = "What was the primary cause of the Mexican-American war and how long did it last?"
    initial_response = asyncio.run(generate_initial_response(query))
    print(initial_response.model_dump_json())
    """
    {"correct_answer":"The primary cause of the Mexican-American War was
    the annexation of Texas by the United States and the dispute over
    whether Texas ended at the Nueces River (as the Mexicans claimed) or
    the Rio Grande (as the U.S. claimed). The war lasted from April 25,
    1846, to February 2, 1848, totaling nearly two years."}
    """

    verification_questions = asyncio.run(
        generate_verification_questions(initial_response.correct_answer)
    )
    print(verification_questions.model_dump_json())
    """
    {"question":["Is it accurate that the primary cause of the
    Mexican-American War was the annexation of Texas by the United
    States?","Was there a dispute over whether Texas ended at the Nueces
    River or the Rio Grande?","Did the Mexican-American War last from
    April 25, 1846, to February 2, 1848?","Is it correct to state that
    the disagreement over the Texas border was between the Nueces River
    and the Rio Grande?","Was the Mexican claim that Texas ended at the
    Nueces River while the U.S. claimed it was at the Rio Grande?"]}
    """

    responses = asyncio.run(
        generate_verification_response(verification_questions.question)
    )

    final_answer = asyncio.run(
        generate_final_response(responses, initial_response, query)
    )
    print(final_answer.model_dump_json())
    """
    {"correct_answer":"The primary cause of the Mexican-American War was
    the annexation of Texas by the United States and the dispute over
    whether Texas ended at the Nueces River (as the Mexicans claimed) or
    the Rio Grande (as the U.S. claimed). The war lasted from April 25,
    1846, to February 2, 1848, totaling nearly two years."}
    """

参考文献

1: 验证链减少大型语言模型的幻觉