Since LLMs can take a while to generate a response, streaming the response is generally a good way to improve the user experience.

Conceptual Overview

  • C1 API supports stream mode which allows the backend to get the response in chunks.
  • Backend forwards the API stream to the frontend in a text/event-stream format.
  • C1 React component handles the streaming response and displays it to the user.

Implementation

1

Add streaming to the backend

src/api/route.ts
const llmStream = client.chat.completions.create({
  ...
  stream: true,
  ...
});

const responseStream = transformStream(llmStream, (chunk) => {
  return chunk.choices[0]?.delta?.content || "";
});

return new Response(responseStream as ReadableStream, {
  headers: {
    "Content-Type": "text/event-stream",
    "Cache-Control": "no-cache, no-transform",
    Connection: "keep-alive",
  },
});
2

Setup helpers to stream the response

src/app/page.tsx
const [c1Response, setC1Response] = useState<string>("");
const [isLoading, setIsLoading] = useState<boolean>(false);
const [abortController, setAbortController] = useState<AbortController | null>(null);

try {
  // Cancel any ongoing request before starting a new one
  if (abortController) {
    abortController.abort();
  }

  // Create and set up a new abort controller for this request
  const newAbortController = new AbortController();
  setAbortController(newAbortController);
  setIsLoading(true);

  // Make the API request with the abort signal
  const response = await fetch("/api/ask", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      ...
    }),
    signal: newAbortController.signal,
  });

  // Set up stream reading utilities
  const decoder = new TextDecoder();
  const stream = response.body?.getReader();

  if (!stream) {
    throw new Error("response.body not found");
  }

  // Initialize accumulator for streamed response
  let streamResponse = "";

  // Read the stream chunk by chunk
  while (true) {
    const { done, value } = await stream.read();
    // Decode the chunk, considering if it's the final chunk
    const chunk = decoder.decode(value, { stream: !done });

    // Accumulate response and update state
    streamResponse += chunk;
    setC1Response(streamResponse);

    // Break the loop when stream is complete
    if (done) {
      break;
    }
  }
} catch (error) {
  console.error("Error in makeApiCall:", error);
} finally {
  // Clean up: reset loading state and abort controller
  setIsLoading(false);
  setAbortController(null);
}
3

Pass the streaming response to the C1 component

src/app/page.tsx
<C1Component
  c1Response={c1Response}
  isStreaming={isLoading}
/>