从推理步骤重构提示

我们可以使用一种称为逆向思维链（Reverse Chain Of Thought）¹ 的方法，根据给定的解决方案逆向工程出问题。这有助于我们发现模型所采取的推理步骤中的具体不一致之处，并提供有针对性的反馈，从而提高解决方案的质量。
这通过以下三个步骤完成：
重构问题：我们首先尝试根据生成的解决方案和推理步骤重构原始问题
识别不一致：识别原始问题与重构问题之间的不一致之处
生成反馈：提供细粒度反馈，指导 LLM 修改其解决方案
我们可以使用 instructor 实现这一点，如下所示。
import instructor
from openai import OpenAI
from pydantic import BaseModel, Field

client = instructor.from_openai(OpenAI())


class ReconstructedPrompt(BaseModel):
    chain_of_thought: str
    reconstructed_prompt: str = Field(
        description="""Reconstruction of a potential prompt
        that could have been used to generate the reasoning
        and final solution provided by the user"""
    )


class ConditionList(BaseModel):
    conditions: list[str] = Field(
        description="""Key information and conditions present
        in the reasoning steps which are relevant to answering
        the question"""
    )


class ModelFeedback(BaseModel):
    detected_inconsistencies: list[str] = Field(
        description="""Inconsistencies that were detected between
        the original condition list and the reconstructed condition
        list"""
    )
    feedback: str = Field(
        description="""Feedback on how to fix the inconsistencies
        detected in the original condition list and the reconstructed
        condition list"""
    )
    is_equal: bool


class ModelResponse(BaseModel):
    chain_of_thought: str = Field(
        description="""Logical Steps that were taken to derive
        the final concluding statement"""
    )
    correct_answer: str


def generate_response(query: str):
    return client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": """
                You are a helpful AI Question Answerer. You are
                about to be passed a query by a User.

                Make sure to generate a series of logical steps
                and reason about the problem before generating
                a solution.
                """,
            },
            {"role": "user", "content": query},
        ],
        response_model=ModelResponse,
    )


def reconstruct_prompt(model_response: ModelResponse):
    return client.chat.completions.create(
        model="gpt-4o",
        response_model=ReconstructedPrompt,
        messages=[
            {
                "role": "system",
                "content": f"""
                    Give the concrete prompt (problem) that can
                    generate this answer. The problem should
                    contain all basic and necessary information
                    and correspond to the answer. The problem
                    can only ask for one result

                    Reasoning: {model_response.chain_of_thought}
                    Response: {model_response.correct_answer}
                    """,
            }
        ],
    )


def deconstruct_prompt_into_condition_list(prompt: str):
    return client.chat.completions.create(
        model="gpt-4o",
        response_model=ConditionList,
        messages=[
            {
                "role": "system",
                "content": """
                You are an expert AI system that excels at
                analyzing and decomposing questions into their
                constituent parts.

                Please list the conditions of the problem given
                below. There might be multiple conditions in the
                problem so make sure to navigate through the
                prompt incrementally, indentifying and extracting
                the conditions necessary to answer the question
                in your final response.
                """,
            },
            {"role": "user", "content": prompt},
        ],
    )


def generate_feedback(
    original_condition_list: list[str], final_condition_list: list[str]
):
    formatted_original_conditions = "\n- ".join(original_condition_list)
    formatted_final_conditions = "\n- ".join(final_condition_list)
    return client.chat.completions.create(
        model="gpt-4o",
        response_model=ModelFeedback,
        messages=[
            {
                "role": "system",
                "content": f"""
                You are an expert AI system that excels at
                analyzing and comparing two lists of conditions.

                Original Condition List:
                {formatted_original_conditions}

                Reconstructed Condition List:
                {formatted_final_conditions}

                Determine if the two condition lists are roughly
                equivalent. If they are not, give targetted
                feedback on what is missing from the reconstructed
                condition list as compared to the original condition
                list and how it can be fixed.
                """,
            }
        ],
    )


def revise_response(response: ModelResponse, feedback: ModelFeedback):
    formatted_inconsistencies = "\n- ".join(feedback.detected_inconsistencies)
    return client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": f"""
                Here are the mistakes and reasons in your answer
                to the prompt

                Original Response: {response.correct_answer}
                You have overlooked some real conditions:
                {formatted_inconsistencies}

                Here are detailed reasons:
                {feedback.feedback}

                Generate a revised response that takes into account
                the detailed feedback and includes the ignored
                conditions
                """,
            }
        ],
        response_model=ModelResponse,
    )


if __name__ == "__main__":
    query = """
    Mary is an avid gardener. Yesterday, she received 18 new
    potted plants from her favorite plant nursery. She already
    has 2 potted plants on each of the 40 window ledges of her
    large backyard. How many potted plants will Mary remain
    with?
    """
    response = generate_response(query)
    reconstructed_prompt = reconstruct_prompt(response)
    print(reconstructed_prompt.reconstructed_prompt)
    """
    Mary received 18 new potted plants. She already has 2 potted plants on each
    of the 40 window ledges in her backyard. How many potted plants does she have now?
    """

    original_condition_list = deconstruct_prompt_into_condition_list(query)
    new_condition_list = deconstruct_prompt_into_condition_list(
        reconstructed_prompt.reconstructed_prompt
    )
    print(original_condition_list.model_dump_json(indent=2))
    """
    {
      "conditions": [
        "Mary received 18 new potted plants.",
        "Mary has 2 potted plants on each of the 40 window ledges in her backyard.",
        "We are required to find the total number of potted plants Mary will have."
      ]
    }
    """
    print(new_condition_list.model_dump_json(indent=2))
    """
    {
      "conditions": [
        "Mary received 18 new potted plants.",
        "She already has 2 potted plants on each of the 40 window ledges in her backyard."
      ]
    }
    """

    feedback = generate_feedback(
        original_condition_list.conditions, new_condition_list.conditions
    )
    print(feedback.model_dump_json(indent=2))
    """
    {
      "detected_inconsistencies": [
        "The reconstructed list is missing the requirement
        to find the total number of potted plants Mary will
        have."
      ],
      "feedback": "Add the requirement of finding the total
      number of potted plants Mary will have to the
      reconstructed condition list to match the original
      condition list.",
      "is_equal": false
    }
    """

    if not feedback.is_equal:
        response = revise_response(response, feedback)

    print(response.model_dump_json(indent=2))
    """
    {
      "chain_of_thought": "First, we note that Mary starts
      with 18 potted plants. According to the problem, she
      bought 2 packs of 40 new potted plants. So, to find
      the total number of plants she will have, we add the
      number of plants she initially has to the number she
      bought. This gives us 18 (initial) + 2 * 40 (new) =
      18 + 80 = 98 potted plants.",
      "correct_answer": "98 potted plants"
    }
    """
参考文献¶

¹: RCoT：通过逆向思维链检测和纠正推理中的事实不一致性 (RCoT: Detecting And Rectifying Factual Inconsistency In Reasoning By Reversing Chain-Ofthought)