跳到主要内容

使用 Anthropic 实现结构化输出,Instructor 完整指南

现在我们有了 Anthropic 客户端,我们可以将其与 instructor 客户端一起使用来发出请求。

让我们首先安装支持 Anthropic 的 instructor 客户端

pip install "instructor[anthropic]"

安装完成后,入门非常简单,只需使用我们的 from_anthropic 方法来修补客户端即可。

基本用法

# Standard library imports
import os
from typing import List

# Third-party imports
import anthropic
import instructor
from pydantic import BaseModel, Field

# Set up environment (typically handled before script execution)
# os.environ["ANTHROPIC_API_KEY"] = "your-api-key"  # Uncomment and replace with your API key if not set

# Define your models with proper type annotations
class Properties(BaseModel):
    """Model representing a key-value property."""
    name: str = Field(description="The name of the property")
    value: str = Field(description="The value of the property")


class User(BaseModel):
    """Model representing a user with properties."""
    name: str = Field(description="The user's full name")
    age: int = Field(description="The user's age in years")
    properties: List[Properties] = Field(description="List of user properties")

# Initialize the client with explicit mode
client = instructor.from_anthropic(
    anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")),
    mode=instructor.Mode.ANTHROPIC_TOOLS  # Using Anthropic's tool calling API
)

try:
    # Extract structured data
    user_response = client.chat.completions.create(
        model="claude-3-haiku-20240307",  # Use latest stable model
        max_tokens=1024,
        messages=[
            {
                "role": "system",
                "content": "Extract structured information based on the user's request."
            },
            {
                "role": "user",
                "content": "Create a user for a model with a name, age, and properties.",
            }
        ],
        response_model=User,
    )

    # Print the result as formatted JSON
    print(user_response.model_dump_json(indent=2))

    # Expected output:
    # {
    #   "name": "John Doe",
    #   "age": 35,
    #   "properties": [
    #     {
    #       "name": "City",
    #       "value": "New York"
    #     },
    #     {
    #       "name": "Occupation",
    #       "value": "Software Engineer"
    #     }
    #   ]
    # }
except instructor.exceptions.InstructorError as e:
    print(f"Validation error: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")

多模态

我们提供了几个不同的样本文件供您测试这些新功能。以下所有示例都使用了这些文件。

  • (图像) : 一些蓝莓植株的图片 image.jpg
  • (PDF) : 一个包含虚假发票的样本 PDF 文件 invoice.pdf

Instructor 提供了一个统一的、与提供商无关的接口,用于处理图像、PDF 和音频文件等多模态输入。借助 Instructor 的多模态对象,您可以使用跨不同 AI 提供商(OpenAI、Anthropic、Mistral 等)工作的统一 API 轻松从 URL、本地文件或 base64 字符串加载媒体。

Instructor 在幕后处理所有提供商特定的格式要求,确保您的代码随着提供商 API 的发展保持干净和面向未来。

让我们看看如何使用 Image 和 PDF 类。

图像

有关图像组件的更深入演练,请查看此处文档

Instructor 可以使用 Anthropic 的 claude 模型轻松分析和提取图像中的语义信息。点击此处查看您想使用的模型是否具备视觉能力。

请看下面使用上述样本图像的示例,我们将使用 from_url 方法加载它。

请注意,我们也支持本地文件和 base64 字符串,使用 from_pathfrom_base64 类方法。

from instructor.multimodal import Image
from pydantic import BaseModel, Field
import instructor
from anthropic import Anthropic


class ImageDescription(BaseModel):
    objects: list[str] = Field(..., description="The objects in the image")
    scene: str = Field(..., description="The scene of the image")
    colors: list[str] = Field(..., description="The colors in the image")


client = instructor.from_anthropic(Anthropic())
url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/image.jpg"
# Multiple ways to load an image:
response = client.chat.completions.create(
    model="claude-3-5-sonnet-20240620",
    response_model=ImageDescription,
    max_tokens=1000,
    messages=[
        {
            "role": "user",
            "content": [
                "What is in this image?",
                # Option 1: Direct URL with autodetection
                Image.from_url(url),
                # Option 2: Local file
                # Image.from_path("path/to/local/image.jpg")
                # Option 3: Base64 string
                # Image.from_base64("base64_encoded_string_here")
                # Option 4: Autodetect
                # Image.autodetect(<url|path|base64>)
            ],
        },
    ],
)

print(response)
# Example output:
# ImageDescription(
#     objects=['blueberries', 'leaves'],
#     scene='A blueberry bush with clusters of ripe blueberries and some unripe ones against a cloudy sky',
#     colors=['green', 'blue', 'purple', 'white']
# )

PDF

Instructor 可以使用 Anthropic 的 Claude 模型系列轻松分析和提取 PDF 中的语义信息。

请看下面使用上述样本 PDF 的示例,我们将使用 from_url 方法加载它。

请注意,我们也支持本地文件和 base64 字符串,使用 from_pathfrom_base64 类方法。

from instructor.multimodal import PDF
from pydantic import BaseModel, Field
import instructor
from anthropic import Anthropic


class Receipt(BaseModel):
    total: int
    items: list[str]


client = instructor.from_anthropic(Anthropic())
url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/invoice.pdf"
# Multiple ways to load an PDF:
response = client.chat.completions.create(
    model="claude-3-5-sonnet-20240620",
    response_model=Receipt,
    max_tokens=1000,
    messages=[
        {
            "role": "user",
            "content": [
                "Extract out the total and line items from the invoice",
                # Option 1: Direct URL
                PDF.from_url(url),
                # Option 2: Local file
                # PDF.from_path("path/to/local/invoice.pdf"),
                # Option 3: Base64 string
                # PDF.from_base64("base64_encoded_string_here")
                # Option 4: Autodetect
                # PDF.autodetect(<url|path|base64>)
            ],
        },
    ],
)

print(response)
# > Receipt(total=220, items=['English Tea', 'Tofu'])

如果您想缓存 PDF 并在多个不同请求中重复使用,我们支持使用 PdfWithCacheControl 类来实现,如下所示。

from instructor.multimodal import PdfWithCacheControl
from pydantic import BaseModel
import instructor
from anthropic import Anthropic


class Receipt(BaseModel):
    total: int
    items: list[str]


client = instructor.from_anthropic(Anthropic())
url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/invoice.pdf"
# Multiple ways to load an PDF:
response, completion = client.chat.completions.create_with_completion(
    model="claude-3-5-sonnet-20240620",
    response_model=Receipt,
    max_tokens=1000,
    messages=[
        {
            "role": "user",
            "content": [
                "Extract out the total and line items from the invoice",
                # Option 1: Direct URL
                PdfWithCacheControl.from_url(url),
                # Option 2: Local file
                # PDF.from_path("path/to/local/invoice.pdf"),
                # Option 3: Base64 string
                # PDF.from_base64("base64_encoded_string_here")
                # Option 4: Autodetect
                # PDF.autodetect(<url|path|base64>)
            ],
        },
    ],
)

assert (
    completion.usage.cache_creation_input_tokens > 0
    or completion.usage.cache_read_input_tokens > 0
)
print(response)
# > Receipt(total=220, items=['English Tea', 'Tofu'])

流式传输支持

Instructor 提供两种主要方式来流式传输响应

  1. 可迭代对象:当您想流式传输同类型对象列表时非常有用(例如,使用结构化输出提取多个用户)
  2. 局部流式传输:当您想流式传输单个对象并希望在响应到达时立即开始处理时非常有用。

局部流

您可以使用我们的 create_partial 方法来流式传输单个对象。请注意,在流式传输对象时,不应在响应模型中声明验证器,因为它会破坏流式传输过程。

# Standard library imports
import os

# Third-party imports
import anthropic
from instructor import from_anthropic
from pydantic import BaseModel, Field

# Set up environment (typically handled before script execution)
# os.environ["ANTHROPIC_API_KEY"] = "your-api-key"  # Uncomment and replace with your API key if not set

# Initialize client with explicit mode
client = from_anthropic(
    anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")),
    mode=instructor.Mode.ANTHROPIC_TOOLS
)

# Define your model with proper annotations
class User(BaseModel):
    """Model representing a user profile."""
    name: str = Field(description="The user's full name")
    age: int = Field(description="The user's age in years")
    bio: str = Field(description="A biographical description of the user")

try:
    # Stream partial objects as they're generated
    for partial_user in client.chat.completions.create_partial(
        model="claude-3-haiku-20240307",  # Use latest stable model
        messages=[
            {"role": "system", "content": "Create a detailed user profile based on the information provided."},
            {"role": "user", "content": "Create a user profile for Jason, age 25"},
        ],
        response_model=User,
        max_tokens=4096,
    ):
        print(f"Current state: {partial_user}")

    # Expected output:
    # > Current state: name='Jason' age=None bio=None
    # > Current state: name='Jason' age=25 bio='Jason is a 25-year-old with an adventurous spirit and a love for technology. He is'
    # > Current state: name='Jason' age=25 bio='Jason is a 25-year-old with an adventurous spirit and a love for technology. He is always on the lookout for new challenges and opportunities to grow both personally and professionally.'
except Exception as e:
    print(f"Error during streaming: {e}")

可迭代示例

您还可以使用我们的 create_iterable 方法来流式传输对象列表。当您想从单个提示中提取同一响应模型的多个实例时,这很有帮助。

# Standard library imports
import os

# Third-party imports
import anthropic
from instructor import from_anthropic
from pydantic import BaseModel, Field

# Set up environment (typically handled before script execution)
# os.environ["ANTHROPIC_API_KEY"] = "your-api-key"  # Uncomment and replace with your API key if not set

# Initialize client with explicit mode
client = from_anthropic(
    anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")),
    mode=instructor.Mode.ANTHROPIC_TOOLS
)

# Define your model with proper annotations
class User(BaseModel):
    """Model representing a basic user."""
    name: str = Field(description="The user's full name")
    age: int = Field(description="The user's age in years")

try:
    # Create an iterable of user objects
    users = client.chat.completions.create_iterable(
        model="claude-3-haiku-20240307",  # Use latest stable model
        messages=[
            {
                "role": "system",
                "content": "Extract all users from the provided text into structured format."
            },
            {
                "role": "user",
                "content": """
                Extract users:
                1. Jason is 25 years old
                2. Sarah is 30 years old
                3. Mike is 28 years old
                """,
            },
        ],
        max_tokens=4096,
        response_model=User,
    )

    # Process each user as it's extracted
    for user in users:
        print(user)

    # Expected output:
    # > name='Jason' age=25
    # > name='Sarah' age=30
    # > name='Mike' age=28
except Exception as e:
    print(f"Error during iteration: {e}")

Instructor 模式

我们提供了几种模式,以便于处理 Anthropic 支持的不同响应模型

  1. instructor.Mode.ANTHROPIC_JSON : 这使用 Anthropic API 的文本补全 API,然后从文本补全模型中提取所需的响应模型
  2. instructor.Mode.ANTHROPIC_TOOLS : 这使用 Anthropic 的工具调用 API 向客户端返回结构化输出

总的来说,我们建议使用 Mode.ANTHROPIC_TOOLS,因为它是确保您拥有所需响应模式的最佳方式。

缓存

如果您想对 Anthropic 客户端使用缓存,我们也支持图像和文本输入。

缓存文本输入

下面是您可以如何实现文本输入缓存(假设您有一个巨大的 book.txt 文件需要读取)。

我们写了一篇全面的演练,介绍如何使用缓存实现 Anthropic 新的上下文检索方法,该方法能显著提高检索准确性。

# Standard library imports
import os

# Third-party imports
import instructor
from anthropic import Anthropic
from pydantic import BaseModel, Field

# Set up environment (typically handled before script execution)
# os.environ["ANTHROPIC_API_KEY"] = "your-api-key"  # Uncomment and replace with your API key if not set

# Define your Pydantic model with proper annotations
class Character(BaseModel):
    """Model representing a character extracted from text."""
    name: str = Field(description="The character's full name")
    description: str = Field(description="A description of the character")

# Initialize client with explicit mode and prompt caching
client = instructor.from_anthropic(
    Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")),
    mode=instructor.Mode.ANTHROPIC_TOOLS,
    enable_prompt_caching=True  # Enable prompt caching
)

try:
    # Load your large context
    with open("./book.txt", "r") as f:
        book = f.read()

    # Make multiple calls using the cached context
    for _ in range(2):
        # The first time processes the large text, subsequent calls use the cache
        resp, completion = client.chat.completions.create_with_completion(
            model="claude-3-haiku-20240307",  # Use latest stable model
            messages=[
                {
                    "role": "system",
                    "content": "Extract character information from the provided text."
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "<book>" + book + "</book>",
                            "cache_control": {"type": "ephemeral"},  # Mark for caching
                        },
                        {
                            "type": "text",
                            "text": "Extract a character from the text given above",
                        },
                    ],
                },
            ],
            response_model=Character,
            max_tokens=1000,
        )

        # Process the result
        print(f"Character: {resp.name}")
        print(f"Description: {resp.description}")

        # The completion contains the raw response
        print(f"Raw completion length: {len(completion)}")

    # Note: Second iteration should be faster due to cache hit

except Exception as e:
    print(f"Error: {e}")

缓存图像

我们也支持图像缓存。这帮助很大,特别是当您重复使用图像以节省成本时。在此处阅读更多相关信息:此处

# Standard library imports
import os

# Third-party imports
import instructor
from anthropic import Anthropic
from pydantic import BaseModel, Field

# Set up environment (typically handled before script execution)
# os.environ["ANTHROPIC_API_KEY"] = "your-api-key"  # Uncomment and replace with your API key if not set

# Define your model for image analysis
class ImageAnalyzer(BaseModel):
    """Model for analyzing image content."""
    content_description: str = Field(description="Description of what appears in the images")
    objects: list[str] = Field(description="List of objects visible in the images")
    scene_type: str = Field(description="Type of scene shown in the images (indoor, outdoor, etc.)")

# Initialize client with explicit mode and image caching enabled
client = instructor.from_anthropic(
    Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")),
    mode=instructor.Mode.ANTHROPIC_TOOLS,
    enable_prompt_caching=True  # Enable prompt caching
)

try:
    # Configure cache control for images
    cache_control = {"type": "ephemeral"}

    # Make a request with cached images
    response = client.chat.completions.create(
        model="claude-3-haiku-20240307",  # Use latest stable model
        response_model=ImageAnalyzer,
        messages=[
            {
                "role": "system",
                "content": "Analyze the content of the provided images in detail."
            },
            {
                "role": "user",
                "content": [
                    "What is in these two images?",
                    # Remote image with caching
                    {
                        "type": "image",
                        "source": "https://example.com/image.jpg",
                        "cache_control": cache_control
                    },
                    # Local image with caching
                    {
                        "type": "image",
                        "source": "path/to/image.jpg",
                        "cache_control": cache_control
                    },
                ]
            }
        ],
        autodetect_images=True  # Automatically handle image content
    )

    # Process the results
    print(f"Description: {response.content_description}")
    print(f"Objects: {', '.join(response.objects)}")
    print(f"Scene type: {response.scene_type}")

    # Subsequent identical requests will use cached images

except Exception as e:
    print(f"Error during image analysis: {e}")

思考

Anthropic 最近发布了对其 sonnet-3.7 模型系列的支持扩展思考功能。在 instructor 中,我们支持使用 instructor.Mode.ANTHROPIC_REASONING_TOOLS 模式获取经过验证的工具调用,如下所示。

from anthropic import Anthropic
import instructor
from pydantic import BaseModel


class Answer(BaseModel):
    answer: float


client = Anthropic()
client = instructor.from_anthropic(client, mode=instructor.Mode.ANTHROPIC_REASONING_TOOLS)
response = client.chat.completions.create(
    model="claude-3-7-sonnet-latest",
    response_model=Answer,
    messages=[
        {
            "role": "user",
            "content": "Which is larger, 9.11 or 9.8",
        },
    ],
    temperature=1,
    max_tokens=2000,
    thinking={"type": "enabled", "budget_tokens": 1024},
)


# Assertions to validate the response
assert isinstance(response, Answer)
assert response.answer == 9.8

然后返回经过验证的 Answer 对象作为响应。