Discussions

Ask a Question
Back to All

Python with Next.js

Hello again! I am building web application using python as my backend and next.js as my frontend. The project is real-time conversation bot, so I need your help. Here is the relevant backend code:

from flask import Flask, request, jsonify
import os
from flask_cors import CORS
from flask_socketio import SocketIO, emit, disconnect  
import logging
from dotenv import load_dotenv  
import requests  
from enum import Enum
load_dotenv()

heygen_api_token = os.environ["HEYGEN_API_TOKEN"]

def create_avatar_session():
    """Create a new HeyGen avatar session."""
    global avatar_session_id
    try:
        url = "https://api.heygen.com/v1/streaming.new"
        headers = {
            "accept": "application/json",
            "content-type": "application/json",
            "x-api-key": heygen_api_token
        }
        
        # Avatar configuration matching SDK types
        data = {
            "avatar_id": "SilasHR_public",  # Changed from avatar_id to match SDK
            "quality": "low",
            "voice": {"rate": 1},
            "video_encoding": "VP8",  # Keep snake_case for API request
            "disable_idle_timeout": True  # Keep snake_case for API request
        }
        
        logger.info(f"Creating avatar session with token: {heygen_api_token[:10]}...")
        logger.info(f"Full request headers: {headers}")
        logger.info(f"Full request data: {data}")
        
        response = requests.post(url, headers=headers, json=data)
        
        if response.status_code != 200:
            logger.error(f"HeyGen API error: Status {response.status_code}")
            logger.error(f"Response headers: {response.headers}")
            logger.error(f"Response content: {response.text}")
            response.raise_for_status()
            
        response_data = response.json()
        logger.info(f"Avatar session response: {response_data}")
        
        if response_data.get('code') != 100:
            raise Exception(f"API error: {response_data.get('message', 'Unknown error')}")
            
        session_data = response_data.get('data', {})
        if not session_data or not session_data.get('session_id'):
            raise Exception(f"No session data in response: {response_data}")
            
        avatar_session_id = session_data['session_id']
        logger.info(f"Created avatar session: {avatar_session_id}")
        
        return {
            'session_id': avatar_session_id,
            'access_token': session_data.get('access_token'),
            'url': session_data.get('url')
        }
        
    except requests.exceptions.RequestException as e:
        error_msg = f"Network error creating avatar session: {str(e)}"
        logger.error(error_msg)
        raise Exception(error_msg)
    except Exception as e:
        error_msg = f"Error creating avatar session: {str(e)}"
        logger.error(error_msg)
        raise Exception(error_msg)

def send_avatar_speak_task(text):
    """Send a speak task to the avatar."""
    try:
        if not avatar_session_id:
            raise Exception("No active avatar session")
            
        url = "https://api.heygen.com/v1/streaming.speak"
        headers = {
            "accept": "application/json",
            "content-type": "application/json",
            "x-api-key": heygen_api_token
        }
        data = {
            "session_id": avatar_session_id,
            "text": text,
            "task_type": TaskType.REPEAT.value,
            "voice": {
                "rate": 1
            }
        }
        
        logger.info(f"Sending speak task: {text[:50]}...")
        response = requests.post(url, json=data, headers=headers)
        response.raise_for_status()
        
        response_data = response.json()
        logger.info(f"Speak task response: {response_data}")
        
        if response_data.get('code') != 100:
            raise Exception(f"API error: {response_data.get('message', 'Unknown error')}")
            
        speak_data = response_data.get('data', {})
        if not speak_data:
            raise Exception(f"No data in speak task response: {response_data}")
            
        return speak_data
        
    except Exception as e:
        error_msg = f"Error sending speak task: {str(e)}"
        logger.error(error_msg)
        raise Exception(error_msg)

def end_avatar_session():
    """End the current avatar session."""
    global avatar_session_id
    try:
        if avatar_session_id:
            url = f"https://api.heygen.com/v1/streaming.stop"
            headers = {
                "accept": "application/json",
                "content-type": "application/json",
                "x-api-key": heygen_api_token
            }
            data = {
                "session_id": avatar_session_id
            }
            
            logger.info(f"Ending avatar session: {avatar_session_id}")
            response = requests.post(url, json=data, headers=headers)
            response.raise_for_status()
            
            response_data = response.json()
            if response_data.get('code') != 100:
                raise Exception(f"API error: {response_data.get('message', 'Unknown error')}")
                
            logger.info("Avatar session ended successfully")
            avatar_session_id = None
            
    except Exception as e:
        error_msg = f"Error ending avatar session: {str(e)}"
        logger.error(error_msg)
        # Don't raise the exception here as this is cleanup code

Moreover, I used my own stt and LLM, I did not include them above code as they are necessary for that task, I think.
Here is my frontend code:

'use client';

import { useEffect, useState, useRef } from 'react';
import io, { Socket } from 'socket.io-client';
import StreamingAvatar, 
{ StreamingAvatarApiConfig,
  StartAvatarRequest,
  AvatarQuality,
  VoiceEmotion,
  SpeakRequest,
  TaskType
} from '@heygen/streaming-avatar';
import StreamingAvatarInstance from '@heygen/streaming-avatar';
import styles from './page.module.css';


interface VoiceSettings {
  rate: number;
}

export default function Home() {
  const [isSessionStarted, setIsSessionStarted] = useState(false);
  const [isRecording, setIsRecording] = useState(false);
  const [isConnected, setIsConnected] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const socketRef = useRef<Socket | null>(null);
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  const chunksRef = useRef<Blob[]>([]);
  const avatarRef = useRef<HTMLDivElement>(null);
  const streamingAvatarRef = useRef<StreamingAvatarInstance | null>(null);

  useEffect(() => {
    socketRef.current = io('http://localhost:5000', {
      reconnection: true,
      reconnectionAttempts: 5,
      reconnectionDelay: 1000,
      reconnectionDelayMax: 5000,
      timeout: 120000,
      transports: ['websocket', 'polling'],
      withCredentials: true,
    });

    socketRef.current.on('connect', () => {
      console.log('Connected to server');
      setIsConnected(true);
      setError(null);
    });

    socketRef.current.on('disconnect', () => {
      console.error('Disconnected from server');
      setIsConnected(false);
    });

    socketRef.current.on('avatar_session', async (sessionData) => {
      try {
        if (!avatarRef.current) {
          console.error('Avatar container ref not found');
          return;
        }
        
        console.log('Received session data:', sessionData);
        
        // Initialize streaming avatar with token only
        const config: StreamingAvatarApiConfig = {
          token: sessionData.access_token
        };
        const avatar = new StreamingAvatar(config);

        // Store the reference
        streamingAvatarRef.current = avatar;

        try {
          // Create new session with properly typed request
          const startRequest: StartAvatarRequest = {
            avatarName: "SilasHR_public", 
            quality: AvatarQuality.Low,
            voice: { rate: 1, emotion: VoiceEmotion.EXCITED },
            language: 'en',
            disableIdleTimeout: true 
          };

          // Create and start session in correct order
          console.log('Creating new session with request:', startRequest);
          await avatar.newSession(startRequest);
          console.log('Session created, starting avatar...');
          await avatar.startSession();
          
          console.log('Avatar session started successfully');
          setIsSessionStarted(true);
        } catch (error) {
          console.error('Error in avatar session setup:', error);
          throw error;
        }
      } catch (error) {
        console.error('Error connecting to avatar session:', error);
        setError('Failed to connect to avatar session. Please try refreshing the page.');
      }
    });

    socketRef.current.on('error', (data) => {
      console.error('Server error:', data.message);
      setError(data.message);
    });

    socketRef.current.on('ai_response', (data) => {
      console.log('Received AI response:', data.response);
      // Send the response to the avatar to speak
      if (streamingAvatarRef.current) {
        streamingAvatarRef.current.speak({
          text: data.response,
          task_type: TaskType.REPEAT
        }).catch((error: any) => {
          console.error('Error making avatar speak:', error);
        });
      }
    });

    return () => {
      if (streamingAvatarRef.current) {
        streamingAvatarRef.current.stopAvatar().catch((error: any) => {
          console.error('Error stopping avatar:', error);
        });
      }
      socketRef.current?.disconnect();
    };
  }, []);

  const initializeAvatarSession = async () => {
    try {
      // Request avatar session from backend
      socketRef.current?.emit('request_avatar_session');
      setIsSessionStarted(true);
      setError(null);
    } catch (error) {
      console.error('Error initializing avatar session:', error);
      setError('Failed to initialize avatar session');
    }
  };

  const startRecording = async () => {
    try {
      // Ensure we have an active avatar session
      if (!streamingAvatarRef.current) {
        socketRef.current?.emit('request_avatar_session');
        await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for session
      }

      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const options = { 
        mimeType: 'audio/webm;codecs=opus',
        audioBitsPerSecond: 16000
      };
      
      mediaRecorderRef.current = new MediaRecorder(stream, options);
      chunksRef.current = [];

      mediaRecorderRef.current.ondataavailable = (event) => {
        if (event.data.size > 0) {
          chunksRef.current.push(event.data);
          const reader = new FileReader();
          reader.onload = () => {
            const audioData = new Uint8Array(reader.result as ArrayBuffer);
            socketRef.current?.emit('audio_data', Array.from(audioData));
          };
          reader.readAsArrayBuffer(event.data);
        }
      };

      mediaRecorderRef.current.start(250);
      setIsRecording(true);
      socketRef.current?.emit('start_stream');
    } catch (error) {
      console.error('Error starting recording:', error);
      setError('Please grant microphone permission to continue.');
    }
  };

  const stopRecording = () => {
    if (mediaRecorderRef.current && isRecording) {
      mediaRecorderRef.current.stop();
      mediaRecorderRef.current.stream.getTracks().forEach(track => track.stop());
      setIsRecording(false);
      socketRef.current?.emit('stop_stream');
    }
  };

  if (!isSessionStarted) {
    return (
      <main className={styles.main}>
        <div className={styles.titleContainer}>
          <span className={styles.playIcon}>▶</span>
          <h1 className={styles.title}>IELTS Speaking Examiner Simulation</h1>
        </div>

        <div className={styles.container}>
          <div className={styles.instructionsBox}>
            <ul className={styles.instructionsList}>
              <li>Click the "Start Session" button to begin.</li>
              <li>Once the instructor appears, simply greet them, and the process will start.</li>
              <li>If you haven&apos;t already allowed your browser to use the microphone, grant permission.</li>
              <li>After finishing the conversation, click the "End Session" button.</li>
            </ul>
          </div>

          <button 
            className={styles.startButton}
            onClick={initializeAvatarSession}
          >
            Start Session
          </button>
        </div>
      </main>
    );
  }

  return (
    <main className={styles.main}>
      <div className={styles.titleContainer}>
        <span className={styles.playIcon}>▶</span>
        <h1 className={styles.title}>IELTS Speaking Examiner Simulation</h1>
      </div>

      <div className={styles.container}>
        <div 
          ref={avatarRef}
          className={styles.avatarContainer}
          style={{ width: '100%', height: '400px' }}
        />

        <button 
          className={`${styles.pushToTalkButton} ${isRecording ? styles.recording : ''}`}
          onClick={() => isRecording ? stopRecording() : startRecording()}
        >
          {isRecording ? 'Click to Stop' : 'Click to Talk'}
          <div className={styles.micIcon}>🎤</div>
        </button>

        <button 
          className={styles.startButton}
          onClick={() => {
            setIsSessionStarted(false);
            if (streamingAvatarRef.current) {
              streamingAvatarRef.current.stopAvatar().catch((error: any) => {
                console.error('Error stopping avatar:', error);
              });
            }
          }}
        >
          End Session
        </button>
      </div>
    </main>
  );
}

I included all the frontend code. Well, here is the problem I faced:

  1. When I run the backend and frontend, I got avatar created message in the backend.
  2. But, I got "POST https://api.heygen.com/v1/streaming.new 401 (Unauthorized)" followed by "Error connecting to avatar session: APIError: API request failed with status 401"
  3. there is no problem with API credits, when I was testing, I had 8 credits left.

Well, thus I think there is an error in the frontend as backend created the avatar.
Please, help me!!!