SDK

Streaming

Streaming

Streaming responses must be used within a trace group context. Without a trace group, tracking may end before the stream completes. The trace group waits for streaming to finish before capturing the full response.

OpenAI Streaming

import nora
from openai import OpenAI

nora_client = nora.init(api_key="your-nora-api-key")
client = OpenAI(api_key="your-openai-key")

def streaming_conversation():
    with nora_client.trace_group(name="streaming_chat"):
        stream = client.responses.stream(
            model="gpt-5",
            input=[
                {
                    "role": "user",
                    "content": "Write a short story about a robot"
                }
            ]
        )

        for event in stream:
            # text deltas only
            if event.type == "response.output_text.delta":
                yield event.delta

for chunk in streaming_conversation():
    print(chunk, end="", flush=True)

Anthropic Streaming

import nora
from anthropic import Anthropic

nora_client = nora.init(api_key="your-nora-api-key")
client = Anthropic(api_key="your-anthropic-key")

def stream_with_claude():
    with nora_client.trace_group(name="claude_stream"):
        with client.messages.stream(
            model="claude-opus-4-5-20251101",
            max_tokens=1024,
            messages=[
                {"role": "user", "content": "Explain machine learning"}
            ]
        ) as stream:
            for text in stream.text_stream:
                yield text

for chunk in stream_with_claude():
    print(chunk, end="", flush=True)

Gemini Streaming

import nora
import google.generativeai as genai

nora_client = nora.init(api_key="your-nora-api-key")

genai.configure(api_key="your-gemini-key")
model = genai.GenerativeModel("gemini-2.5-pro")

def stream_with_gemini():
    with nora_client.trace_group(name="gemini_stream"):
        stream = model.generate_content(
            "Explain machine learning in 3 sentences",
            stream=True
        )

        for chunk in stream:
            if chunk.text:
                yield chunk.text

for chunk in stream_with_gemini():
    print(chunk, end="", flush=True)

Ollama Streaming

import nora
import ollama

nora_client = nora.init(api_key="your-nora-api-key")

def stream_with_ollama():
    with nora_client.trace_group(name="ollama_stream"):
        stream = ollama.chat(
            model="llama3.1",
            messages=[
                {"role": "user", "content": "Count from 1 to 5"}
            ],
            stream=True
        )

        for chunk in stream:
            content = chunk.get("message", {}).get("content", "")
            if content:
                yield content

for chunk in stream_with_ollama():
    print(chunk, end="", flush=True)

Async Streaming

import nora
from openai import AsyncOpenAI
import asyncio

nora_client = nora.init(api_key="your-nora-api-key")
client = AsyncOpenAI(api_key="your-openai-key")

async def async_stream_chat():
    with nora_client.trace_group(name="async_streaming"):
        stream = await client.responses.stream(
            model="gpt-5",
            input=[
                {
                    "role": "user",
                    "content": "Count from 1 to 10"
                }
            ]
        )

        async for event in stream:
            if event.type == "response.output_text.delta":
                yield event.delta

async def main():
    async for chunk in async_stream_chat():
        print(chunk, end="", flush=True)

asyncio.run(main())

Multiple Streaming Calls in One Group

import nora
from openai import OpenAI

nora_client = nora.init(api_key="your-nora-api-key")
client = OpenAI(api_key="your-openai-key")

def multi_streaming():
    with nora_client.trace_group(name="multi_stream_workflow"):
        topics = ["AI", "Robotics", "Quantum Computing"]

        for topic in topics:
            yield f"\n--- {topic} ---\n"

            stream = client.responses.stream(
                model="gpt-5",
                input=[
                    {
                        "role": "user",
                        "content": f"Briefly explain {topic} in 2 sentences."
                    }
                ],
                max_output_tokens=100
            )

            for event in stream:
                if event.type == "response.output_text.delta":
                    yield event.delta

            yield "\n"

for chunk in multi_streaming():
    print(chunk, end="", flush=True)

Was this page helpful?