Streaming + Async
Streaming Responses
Pass stream=True to stream the model response back chunk by chunk.
Usage
from openai import OpenAI
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.haimaker.ai/v1"
)
response = client.chat.completions.create(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
cURL
curl https://api.haimaker.ai/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "openai/gpt-4o",
"messages": [{"role": "user", "content": "Hey, how'\''s it going?"}],
"stream": true
}'
Async Completion
Use the AsyncOpenAI client for asynchronous completions.
Usage
from openai import AsyncOpenAI
import asyncio
client = AsyncOpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.haimaker.ai/v1"
)
async def test_get_response():
response = await client.chat.completions.create(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hello, how are you?"}]
)
return response
response = asyncio.run(test_get_response())
print(response)
Async Streaming
Combine async and streaming for non-blocking streamed responses.
Usage
from openai import AsyncOpenAI
import asyncio
client = AsyncOpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.haimaker.ai/v1"
)
async def completion_call():
print("test async streaming")
response = await client.chat.completions.create(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hello, how are you?"}],
stream=True
)
async for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
asyncio.run(completion_call())