Skip to main content

Streaming + Async

Streaming Responses

Pass stream=True to stream the model response back chunk by chunk.

Usage

from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.haimaker.ai/v1"
)

response = client.chat.completions.create(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
stream=True
)

for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")

cURL

curl https://api.haimaker.ai/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "openai/gpt-4o",
"messages": [{"role": "user", "content": "Hey, how'\''s it going?"}],
"stream": true
}'

Async Completion

Use the AsyncOpenAI client for asynchronous completions.

Usage

from openai import AsyncOpenAI
import asyncio

client = AsyncOpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.haimaker.ai/v1"
)

async def test_get_response():
response = await client.chat.completions.create(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hello, how are you?"}]
)
return response

response = asyncio.run(test_get_response())
print(response)

Async Streaming

Combine async and streaming for non-blocking streamed responses.

Usage

from openai import AsyncOpenAI
import asyncio

client = AsyncOpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.haimaker.ai/v1"
)

async def completion_call():
print("test async streaming")
response = await client.chat.completions.create(
model="openai/gpt-4o",
messages=[{"role": "user", "content": "Hello, how are you?"}],
stream=True
)

async for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")

asyncio.run(completion_call())