使用令牌
获取非流式请求使用情况的最简单方法是访问原始响应。
import instructor
from openai import OpenAI
from pydantic import BaseModel
client = instructor.from_openai(OpenAI())
class UserExtract(BaseModel):
name: str
age: int
user, completion = client.chat.completions.create_with_completion(
model="gpt-3.5-turbo",
response_model=UserExtract,
messages=[
{"role": "user", "content": "Extract jason is 25 years old"},
],
)
print(completion.usage)
"""
CompletionUsage(
completion_tokens=10,
prompt_tokens=82,
total_tokens=92,
completion_tokens_details=CompletionTokensDetails(
audio_tokens=0, reasoning_tokens=0
),
prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0),
)
"""
每当超出上下文长度时,您可以捕获 IncompleteOutputException 并做出相应反应,例如根据超出令牌的数量裁剪提示。
from instructor.exceptions import IncompleteOutputException
import openai
import instructor
from pydantic import BaseModel
client = instructor.from_openai(openai.OpenAI())
class UserExtract(BaseModel):
name: str
age: int
try:
client.chat.completions.create_with_completion(
model="gpt-3.5-turbo",
response_model=UserExtract,
messages=[
{"role": "user", "content": "Extract jason is 25 years old"},
],
)
except IncompleteOutputException as e:
token_count = e.last_completion.usage.total_tokens # type: ignore
# your logic here