[Interactive Avatar] Sporadic 401 errors

Hello -- We are experimenting with the Interactive Avatar via API. We have looked at the demo code: https://github.com/HeyGen-Official/InteractiveAvatarNextJSDemo and pretty much copied and functionalities to replicate it. (Especially the: https://github.com/HeyGen-Official/InteractiveAvatarNextJSDemo/blob/main/components/InteractiveAvatar.tsx)

When we initialize the avatar and try to get it process a task, we get 401 error almost 9 times out of 10. And if we are lucky the avatar starts processing our streaming task. We get the access token properly (we can print the token and see that it is generating different token every time) and it seems to initialize sessions properly but we see the following errors:

Uncaught TypeError: debugStream is not a function
    at peerConnection.oniceconnectionstatechange (StreamingAvatarApi.js:533:53)Understand this error
api.heygen.com/v1/streaming.task:1 
        
Failed to load resource: the server responded with a status of 401 ()

When this happens we see the avatar and it does the "default" movement like hand guestures...etc but it doesn't read our text which we send via the streaming task.

We try it almost 10 times (by refreshing, switching avatar, voice) and somehow one of them works.

We are not sure what we are doing wrong. When we run the "remaining quota" API we still see 60.

Here is our full code. Any help would greatly be appreciated!

import React, { useState, useEffect, useRef } from 'react';
import { Configuration, NewSessionData, StreamingAvatarApi } from "@heygen/streaming-avatar";
import OpenAI from "openai";
import { Button, Tooltip } from "@nextui-org/react";
import { Microphone, MicrophoneStage } from "@phosphor-icons/react";

const openai = new OpenAI({
apiKey: process.env.NEXT_PUBLIC_OPENAI_API_KEY,
dangerouslyAllowBrowser: true,
});

interface InteractiveAvatarProps {
avatarId: string;
voiceId: string;
onUserMessage: (message: string) => void;
aiResponse: string | null;
}

export default function InteractiveAvatar({ avatarId, voiceId, onUserMessage, aiResponse }: InteractiveAvatarProps) {
const [isLoadingSession, setIsLoadingSession] = useState(false);
const [stream, setStream] = useState();
const [data, setData] = useState();
const [initialized, setInitialized] = useState(false);
const [recording, setRecording] = useState(false);
const [debug, setDebug] = useState();

const mediaStream = useRef<HTMLVideoElement>(null);
const avatar = useRef<StreamingAvatarApi | null>(null);
const mediaRecorder = useRef<MediaRecorder | null>(null);
const audioChunks = useRef<Blob[]>([]);

useEffect(() => {
    init();
    return () => {
        endSession();
    };
}, []);

useEffect(() => {
    if (initialized && aiResponse) {
        handleSpeak(aiResponse);
    }
}, [initialized, aiResponse]);

async function init() {
    const newToken = await fetchAccessToken();
    console.log("Initializing with Access Token:", newToken);
    avatar.current = new StreamingAvatarApi(
        new Configuration({ accessToken: newToken, jitterBuffer: 200 })
    );
    setInitialized(true);
    await startSession();
}

async function fetchAccessToken() {
    try {
        const response = await fetch("/api/get-access-token", {
            method: "POST",
        });
        const token = await response.text();
        console.log("Access Token:", token);
        return token;
    } catch (error) {
        console.error("Error fetching access token:", error);
        return "";
    }
}

async function startSession() {
    setIsLoadingSession(true);
    if (!avatar.current) {
        setDebug("Avatar API is not initialized");
        return;
    }
    try {
        const res = await avatar.current.createStartAvatar(
            {
                newSessionRequest: {
                    quality: "low",
                    avatarName: avatarId,
                    voice: { voiceId: voiceId },
                },
            },
        );
        setData(res);
        setStream(avatar.current.mediaStream);
    } catch (error) {
        console.error("Error starting avatar session:", error);
        setDebug(
            `There was an error starting the session. ${voiceId ? "This custom voice ID may not be supported." : ""}`
        );
    }
    setIsLoadingSession(false);
}

async function handleSpeak(text: string) {
    if (!initialized || !avatar.current) {
        setDebug("Avatar API not initialized");
        return;
    }
    await avatar.current
        .speak({ taskRequest: { text: text, sessionId: data?.sessionId } })
        .catch((e) => {
            setDebug(e.message);
        });
}

async function handleInterrupt() {
    if (!initialized || !avatar.current) {
        setDebug("Avatar API not initialized");
        return;
    }
    await avatar.current
        .interrupt({ interruptRequest: { sessionId: data?.sessionId } })
        .catch((e) => {
            setDebug(e.message);
        });
}

async function endSession() {
    if (!initialized || !avatar.current) {
        setDebug("Avatar API not initialized");
        return;
    }
    await avatar.current.stopAvatar(
        { stopSessionRequest: { sessionId: data?.sessionId } },
        setDebug
    );
    setStream(undefined);
}

function startRecording() {
    navigator.mediaDevices
        .getUserMedia({ audio: true })
        .then((stream) => {
            mediaRecorder.current = new MediaRecorder(stream);
            mediaRecorder.current.ondataavailable = (event) => {
                audioChunks.current.push(event.data);
            };
            mediaRecorder.current.onstop = () => {
                const audioBlob = new Blob(audioChunks.current, {
                    type: "audio/wav",
                });
                audioChunks.current = [];
                transcribeAudio(audioBlob);
            };
            mediaRecorder.current.start();
            setRecording(true);
        })
        .catch((error) => {
            console.error("Error accessing microphone:", error);
        });
}

function stopRecording() {
    if (mediaRecorder.current) {
        mediaRecorder.current.stop();
        setRecording(false);
    }
}

async function transcribeAudio(audioBlob: Blob) {
    try {
        const audioFile = new File([audioBlob], "recording.wav", {
            type: "audio/wav",
        });
        const response = await openai.audio.transcriptions.create({
            model: "whisper-1",
            file: audioFile,
        });
        const transcription = response.text;
        console.log("Transcription: ", transcription);
        onUserMessage(transcription);
    } catch (error) {
        console.error("Error transcribing audio:", error);
    }
}

useEffect(() => {
    if (stream && mediaStream.current) {
        mediaStream.current.srcObject = stream;
        mediaStream.current.onloadedmetadata = () => {
            mediaStream.current!.play();
            setDebug("Playing");
        };
    }
}, [stream]);

return (
    <div className="h-[500px] w-[900px] justify-center items-center flex rounded-lg overflow-hidden relative">
        {isLoadingSession ? (
            <div>Initializing avatar...</div>
        ) : (
            <>
                <video
                    ref={mediaStream}
                    autoPlay
                    playsInline
                    style={{
                        width: "100%",
                        height: "100%",
                        objectFit: "contain",
                    }}
                >
                    <track kind="captions" />
                </video>
                <div className="flex flex-col gap-2 absolute bottom-3 right-3">
                    <Tooltip content={!recording ? "Start recording" : "Stop recording"}>
                        <Button
                            onClick={!recording ? startRecording : stopRecording}
                            isDisabled={!stream}
                            className={`mr-4 text-white ${!recording ? "bg-gradient-to-tr from-indigo-500 to-indigo-300" : ""}`}
                            size="sm"
                        >
                            {!recording ? (
                                <Microphone size={20} />
                            ) : (
                                <>
                                    <div className="absolute h-full w-full bg-gradient-to-tr from-indigo-500 to-indigo-300 animate-pulse -z-10"></div>
                                    <MicrophoneStage size={20} />
                                </>
                            )}
                        </Button>
                    </Tooltip>
                    <Button
                        onClick={handleInterrupt}
                        className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
                        size="sm"
                    >
                        Interrupt task
                    </Button>
                    <Button
                        onClick={endSession}
                        className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
                        size="sm"
                    >
                        End session
                    </Button>
                </div>
            </>
        )}
        <p className="absolute bottom-0 left-0 font-mono text-xs">
            <span className="font-bold">Debug:</span> {debug}
        </p>
    </div>
);

}