跳到内容

多任务和流式处理

使用 `Iterable` 可以让你从单次 LLM 调用中提取多个结构化对象,并在它们到达时进行流式处理。这对于实体提取、多任务输出等非常有用。

对于大多数用例,我们推荐使用 create_iterable 方法。它比手动指定 Iterable[...]stream=True 更简单且不易出错。

这里有一个简单的例子,展示了如何从一个句子中提取多个用户。你可以使用推荐的 create_iterable 方法,或者使用带有 Iterable[User]create 方法

import instructor
import openai
from pydantic import BaseModel

client = instructor.from_openai(openai.OpenAI())

class User(BaseModel):
    name: str
    age: int

resp = client.chat.completions.create_iterable(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "user",
            "content": "Ivan is 28, lives in Moscow and his friends are Alex, John and Mary who are 25, 30 and 27 respectively",
        }
    ],
    response_model=User,
)

for user in resp:
    print(user)
推荐用于大多数用例。它为你处理流式处理和迭代。

import instructor
import openai
from pydantic import BaseModel
from typing import Iterable

client = instructor.from_openai(openai.OpenAI())

class User(BaseModel):
    name: str
    age: int

resp = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "user",
            "content": "Ivan is 28, lives in Moscow and his friends are Alex, John and Mary who are 25, 30 and 27 respectively",
        }
    ],
    response_model=Iterable[User],
)

for user in resp:
    print(user)
如果你需要更多手动控制或兼容旧代码,请使用此方法。


我们也原生支持更复杂的提取模式,例如你将在下面看到的联合类型(Unions)。

警告

Unions 不适用于 Gemini,因为当前响应 schema 不支持 AnyOf。

同步用法

import instructor
import openai
from typing import Iterable, Union, Literal
from pydantic import BaseModel

class Weather(BaseModel):
    location: str
    units: Literal["imperial", "metric"]

class GoogleSearch(BaseModel):
    query: str

client = instructor.from_openai(openai.OpenAI(), mode=instructor.Mode.TOOLS)

results = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You must always use tools"},
        {"role": "user", "content": "What is the weather in toronto and dallas and who won the super bowl?"},
    ],
    response_model=Iterable[Union[Weather, GoogleSearch]],
    stream=True,
)

for item in results:
    print(item)
import instructor
import openai
from typing import Union, Literal
from pydantic import BaseModel

class Weather(BaseModel):
    location: str
    units: Literal["imperial", "metric"]

class GoogleSearch(BaseModel):
    query: str

client = instructor.from_openai(openai.OpenAI(), mode=instructor.Mode.TOOLS)

results = client.chat.completions.create_iterable(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You must always use tools"},
        {"role": "user", "content": "What is the weather in toronto and dallas and who won the super bowl?"},
    ],
    response_model=Union[Weather, GoogleSearch],
)

for item in results:
    print(item)

异步用法

import instructor
import openai
from typing import Iterable, Union, Literal
from pydantic import BaseModel
import asyncio

class Weather(BaseModel):
    location: str
    units: Literal["imperial", "metric"]

class GoogleSearch(BaseModel):
    query: str

aclient = instructor.from_openai(openai.AsyncOpenAI(), mode=instructor.Mode.TOOLS)

async def main():
    results = await aclient.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You must always use tools"},
            {"role": "user", "content": "What is the weather in toronto and dallas and who won the super bowl?"},
        ],
        response_model=Iterable[Union[Weather, GoogleSearch]],
        stream=True,
    )
    async for item in results:
        print(item)

asyncio.run(main())
import instructor
import openai
from typing import Union, Literal
from pydantic import BaseModel
import asyncio

class Weather(BaseModel):
    location: str
    units: Literal["imperial", "metric"]

class GoogleSearch(BaseModel):
    query: str

aclient = instructor.from_openai(openai.AsyncOpenAI(), mode=instructor.Mode.TOOLS)

async def main():
    results = await aclient.chat.completions.create_iterable(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You must always use tools"},
            {"role": "user", "content": "What is the weather in toronto and dallas and who won the super bowl?"},
        ],
        response_model=Union[Weather, GoogleSearch],
    )
    async for item in results:
        print(item)

asyncio.run(main())