Discussions

Ask a Question
Back to All

Why my avatar doest respond when I send it Audo from mic?

Hello, I have a Flutter client that works well when I send text through HTTP using /sendtask, and the avatar responds correctly. However, I'm having trouble when sending audio from the microphone.

I understand that to send audio to the avatar, I need to connect to the WebSocket:
/v1/ws/streaming.chat

In my code, I have one button that records audio from the user's microphone and another button to stop the recording. Then, with another button, I send the last recording to HeyGen's WebSocket, but the avatar never responds. It's as if it doesn't hear me.

What could I be doing wrong?

Here is my complete code:



import 'dart:async';
import 'dart:convert';
import 'dart:io';
import 'dart:typed_data';
import 'package:appwrite/appwrite.dart';
import 'package:flutter/material.dart';
import 'package:flutter_sound/flutter_sound.dart';
import 'package:flutter_webrtc/flutter_webrtc.dart';
import 'package:livekit_client/livekit_client.dart';
import 'package:path_provider/path_provider.dart';
import 'package:permission_handler/permission_handler.dart' as ph;
import 'package:web_socket_channel/web_socket_channel.dart';
// import 'pipecat.pb.dart';

void main() async {
  WidgetsFlutterBinding.ensureInitialized();
  if (Platform.isAndroid) await _checkPermissions();
  runApp(MyApp());
}

Future<void> _checkPermissions() async {
  var status = await ph.Permission.bluetooth.request();
  if (status.isPermanentlyDenied) {
    print('Bluetooth Permission disabled');
  }
  status = await ph.Permission.bluetoothConnect.request();
  if (status.isPermanentlyDenied) {
    print('Bluetooth Connect Permission disabled');
  }
  status = await ph.Permission.camera.request();
  if (status.isPermanentlyDenied) {
    print('Camera Permission disabled');
  }
  status = await ph.Permission.microphone.request();
  if (status.isPermanentlyDenied) {
    print('Microphone Permission disabled');
  }
}

class MyApp extends StatelessWidget {
  @override
  Widget build(BuildContext context) {
    return MaterialApp(
      title: 'HeyGen Streaming API LiveKit (V2)',
      theme: ThemeData(
        primarySwatch: Colors.blue,
      ),
      home: StreamingPage2(),
    );
  }
}

class StreamingPage2 extends StatefulWidget {
  @override
  _StreamingPage2State createState() => _StreamingPage2State();
}

class _StreamingPage2State extends State<StreamingPage2> {
  // Appwrite Configuration
  final API_CONFIG = {
    'endpoint': 'https://cloud.appwrite.io/v1',
    'projectId': 'xxxxxxxxxx',
    'functionId': 'xxxxxxxxxx',
    'heyGenServerUrl': "https://api.heygen.com"
  };

  // Appwrite Client
  late Client client;
  late Functions functions;

  // Global variables
  SessionInfo? sessionInfo;
  Room? room;
  WebSocketChannel? _webSocketChannel;
  String? sessionToken;

  // Controllers
  final _avatarIDController = TextEditingController(text: '');
  final _voiceIDController = TextEditingController(text: '');
  final _taskInputController = TextEditingController();

  // Status
  final _status = ValueNotifier<String>('');

  // Media Element
  VideoTrackRenderer? _videoTrackRenderer;
  VideoTrack? _videoTrack;

  FlutterSoundRecorder? _recorder;
  bool _isRecording = false;


  String? _localFilePath;
  Uint8List? _lastRecordedAudio;

  @override
  void initState() {
    super.initState();
    _initializeAppwrite();
    _initialize();
  }

  void _initializeAppwrite() {
    client = Client()
      ..setEndpoint(API_CONFIG['endpoint']!)
      ..setProject(API_CONFIG['projectId']!);

    functions = Functions(client);
  }

  Future<void> _initialize() async {
    room = Room(
      roomOptions: RoomOptions(
        adaptiveStream: true,
        dynacast: true,
        defaultVideoPublishOptions: VideoPublishOptions(
          simulcast: false,
        ),
      ),
    );

    room!.addListener(_onChange);

    // Handle room events
    room!.events.on<RoomDisconnectedEvent>((event) {
      updateStatus('Room disconnected: ${event.reason}');
    });
    room!.events.on<DataReceivedEvent>((event) {
      final data = utf8.decode(event.data);
      print("Room message: ${jsonDecode(data)}");
    });

    // Handle media streams
    room!.events.on<TrackSubscribedEvent>((event) async {
      if (event.track.kind == TrackType.VIDEO ||
          event.track.kind == TrackType.AUDIO) {
        if (event.track.kind == TrackType.VIDEO) {
          _videoTrack = event.track as VideoTrack;
          _videoTrackRenderer = VideoTrackRenderer(_videoTrack!);
          setState(() {});
        }
        updateStatus('Media stream ready');
      }
    });

    // Handle media stream removal
    room!.events.on<TrackUnsubscribedEvent>((event) {
      if (event.track.kind == TrackType.VIDEO) {
        _videoTrackRenderer = null;
        setState(() {});
      }
    });

    // Inicializar el micrófono aquí
    await _initMicrophone();
  }

   Future<void> _initMicrophone() async {
    var status = await ph.Permission.microphone.request();
    if (status.isGranted) {
      try {
        _recorder = await FlutterSoundRecorder().openRecorder();
        await _recorder!.setSubscriptionDuration(const Duration(milliseconds: 10));
        print('Microphone initialized successfully');
      } catch (e) {
        print('Error initializing microphone: $e');
      }
    } else {
      print('Microphone permission denied');
      updateStatus('Microphone permission denied');
    }
  }

  Future<void> _startRecordingLocal() async {
      if (_recorder == null) return;

      try {
        // Obtener el directorio temporal
        Directory tempDir = await getTemporaryDirectory();
        _localFilePath = '${tempDir.path}/local_recording.pcm'; // Definir la ruta del archivo

        // Iniciar la grabación local en un archivo
        await _recorder!.startRecorder(
          toFile: _localFilePath,
          codec: Codec.pcm16,
          numChannels: 1,
          sampleRate: 16000,
        );

        setState(() {
          _isRecording = true;
        });
        updateStatus('Grabando localmente en: $_localFilePath');
      } catch (err) {
        print('Error starting local recording: $err');
      }
    }

    Future<void> _stopRecordingLocal() async {
      if (_recorder == null) return;

      try {
        String? filePath = await _recorder!.stopRecorder();

        setState(() {
          _isRecording = false;
        });

        // Leer el archivo grabado y guardarlo en _lastRecordedAudio
        if (filePath != null) {
          File audioFile = File(filePath);
          _lastRecordedAudio = await audioFile.readAsBytes();
          updateStatus('Grabación local detenida. Guardada en: $filePath');
        }
      } catch (err) {
        print('Error stopping local recording: $err');
      }
    }

  // Helper function to execute Appwrite function
  Future<Map<String, dynamic>> _executeFunction(
      String path, Map<String, dynamic> body) async {
    try {
      final execution = await functions.createExecution(
        functionId: API_CONFIG['functionId']!,
        body: jsonEncode({
          'path': path,
          'body': body,
        }),
      );

      if (execution.status == 'completed') {
        return jsonDecode(execution.responseBody);
      } else {
        throw Exception('Function execution failed: ${execution.status}');
      }
    } catch (e) {
      throw Exception('Error executing function: $e');
    }
  }

  void _onChange() {
    // perform computations and then call setState
    // setState will trigger a build
    setState(() {
      // your updates here
    });
  }

  // Helper function to update status
  void updateStatus(String message) {
    final timestamp = DateTime.now().toLocal().toString();
    _status.value += '[$timestamp] $message\n';
  }

  // Get session token from Appwrite function
  Future<void> getSessionToken() async {
    final data = await _executeFunction('/getSessionToken', {});
    sessionToken = data['data']['token'];
    updateStatus('Session token obtained');
  }

  void _connectWebSocket() {
    if (sessionInfo != null && sessionToken != null) {
      final wsUrl = Uri.parse(
          '${API_CONFIG['heyGenServerUrl']}/v1/ws/streaming.chat')
          .replace(
        scheme: 'wss',
        queryParameters: {
          'session_id': sessionInfo!.sessionId,
          'session_token': sessionToken,
          // 'silence_response': 'false', 
          
          'stt_language': 'en', 
        },
      ).toString();

      _webSocketChannel = WebSocketChannel.connect(Uri.parse(wsUrl));

      _webSocketChannel!.stream.listen(
            (event) {
          final eventData = jsonDecode(event);
          print('Raw WebSocket event: $eventData');
        },
        onError: (error) {
          print('WebSocket error: $error');
          updateStatus('WebSocket error: $error');
        },
        onDone: () {
          print('WebSocket connection closed');
          updateStatus('WebSocket connection closed');
          _webSocketChannel = null;
        },
      );
    }
  }

  void _closeWebSocket() {
    if (_webSocketChannel != null) {
      _webSocketChannel!.sink.close();
      _webSocketChannel = null;
    }
  }

  Future<void> createNewSession() async {
    if (sessionToken == null) {
      await getSessionToken();
    }

    final data = await _executeFunction('/createNewSession', {
      'quality': 'high',
      'avatar_name': _avatarIDController.text,
      'voice': {
        'voice_id': _voiceIDController.text,
        'rate': 2,
      },
      'version': 'v2',
      'video_encoding': 'H264',
    });

    if (data['data'] == null) {
      throw Exception('Session creation failed');
    }

    sessionInfo = SessionInfo.fromJson(data['data']);
    await room!.prepareConnection(sessionInfo!.url, sessionInfo!.accessToken);
    updateStatus('Connection prepared');

    // Connect WebSocket after room preparation
    _connectWebSocket();

    updateStatus('Session created successfully');
  }

  Future<void> _sendAudioThroughWebSocket() async {
    if (_lastRecordedAudio != null && _webSocketChannel != null && !_isRecording) {
      try {
        updateStatus('Enviando audio por WebSocket...');
        _webSocketChannel!.sink.add(_lastRecordedAudio);
        updateStatus('Audio enviado por WebSocket.');
      } catch (e) {
        updateStatus('Error al enviar audio por WebSocket: $e');
      }
    } else {
      if (_isRecording) {
        updateStatus('No se puede enviar audio mientras se está grabando.');
      } else if (_lastRecordedAudio == null) {
        updateStatus('No hay audio grabado para enviar.');
      } else {
        updateStatus('WebSocket no conectado.');
      }
    }
  }

  // Start streaming session through Appwrite function
  Future<void> startStreamingSession() async {
    await _executeFunction('/startStreamingSession', {
      'session_id': sessionInfo!.sessionId,
    });

    // Connect to LiveKit room
    await room!.connect(sessionInfo!.url, sessionInfo!.accessToken);
    updateStatus('Connected to room');

    setState(() {});
    updateStatus('Streaming started successfully');
  }

  // Send text to avatar through Appwrite function
  Future<void> sendText(String text, String taskType) async {
    if (sessionInfo == null) {
      updateStatus('No active session');
      return;
    }

    await _executeFunction('/sendTask', {
      'session_id': sessionInfo!.sessionId,
      'text': text,
      'task_type': taskType,
    });

    updateStatus('Sent text ($taskType): $text');
  }

  Future<void> closeSession() async {
    if (sessionInfo == null) {
      updateStatus('No active session');
      return;
    }

    await _executeFunction('/closeSession', {
      'session_id': sessionInfo!.sessionId,
    });

    _closeWebSocket();

    // Disconnect from LiveKit room
    if (room != null) {
      await room!.disconnect();
    }

    _videoTrackRenderer = null;
    // sessionInfo = null; 
    // room = null; 
    // sessionToken = null; 

    setState(() {});
    updateStatus('Session closed');
  }

  @override
  void dispose() {
    _status.dispose();
    _avatarIDController.dispose();
    _voiceIDController.dispose();
    _taskInputController.dispose();
    room?.dispose();
    _videoTrackRenderer = null;
    _closeWebSocket();
    _recorder?.closeRecorder();
    _recorder = null;
    // _recordingDataController?.close();
    super.dispose();
  }

  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(
        title: Text('HeyGen Streaming API LiveKit (V2)'),
      ),
      body: SingleChildScrollView(
        child: Center(
          child: Container(
            padding: EdgeInsets.all(16.0),
            child: Column(
              mainAxisAlignment: MainAxisAlignment.center,
              children: <Widget>[
                TextField(
                  controller: _avatarIDController,
                  decoration: InputDecoration(
                    labelText: 'Avatar ID',
                  ),
                ),
                TextField(
                  controller: _voiceIDController,
                  decoration: InputDecoration(
                    labelText: 'Voice ID',
                  ),
                ),
                SizedBox(height: 16.0),
                Row(
                  mainAxisAlignment: MainAxisAlignment.spaceEvenly,
                  children: [
                    ElevatedButton(
                      onPressed: () async {
                        try {
                          await createNewSession();
                          await startStreamingSession();
                        } catch (e) {
                          updateStatus('Error: $e');
                        }
                      },
                      child: Text('Start'),
                    ),
                    ElevatedButton(
                      onPressed: closeSession,
                      child: Text('Close'),
                    ),
                  ],
                ),
                SizedBox(height: 16.0),
                TextField(
                  controller: _taskInputController,
                  decoration: InputDecoration(
                    labelText: 'Enter text for avatar to speak',
                  ),
                ),
                SizedBox(height: 16.0),
                Row(
                  mainAxisAlignment: MainAxisAlignment.spaceEvenly,
                  children: [
                    ElevatedButton(
                      onPressed: () {
                        final text = _taskInputController.text.trim();
                        if (text.isNotEmpty) {
                          sendText(text, 'talk');
                          _taskInputController.clear();
                        }
                      },
                      child: Text('Talk (LLM)'),
                    ),
                    ElevatedButton(
                      onPressed: () {
                        final text = _taskInputController.text.trim();
                        if (text.isNotEmpty) {
                          sendText(text, 'repeat');
                          _taskInputController.clear();
                        }
                      },
                      child: Text('Repeat'),
                    ),
                  ],
                ),
                SizedBox(height: 16.0),
                if (_videoTrackRenderer != null)
                  Container(
                    width: 320,
                    height: 240,
                    child: _videoTrackRenderer!,
                  )
                else
                  Container(
                    width: 320,
                    height: 240,
                    decoration: BoxDecoration(
                      border: Border.all(color: Colors.grey),
                      borderRadius: BorderRadius.circular(8.0),
                    ),
                    child: Center(
                      child: Text('Video will appear here'),
                    ),
                  ),
                SizedBox(height: 16.0),
                ValueListenableBuilder<String>(
                  valueListenable: _status,
                  builder: (context, value, child) {
                    return Container(
                      height: 100,
                      padding: EdgeInsets.all(8.0),
                      decoration: BoxDecoration(
                        border: Border.all(color: Colors.grey),
                        borderRadius: BorderRadius.circular(8.0),
                      ),
                      child: SingleChildScrollView(
                        child: Text(value),
                      ),
                    );
                  },
                ),
                SizedBox(height: 16.0),
                Row(
                  mainAxisAlignment: MainAxisAlignment.spaceEvenly,
                  children: [
                    Column(
                      children: [
                        ElevatedButton(
                          onPressed: _isRecording ? null : _startRecordingLocal,
                          child: Text('Grabar Local'),
                        ),
                        ElevatedButton(
                          onPressed: _isRecording ? _stopRecordingLocal : null,
                          child: Text('Detener Local'),
                        ),
                      ],
                    ),
                    Column(
                      children: [
                        ElevatedButton(
                          // Modificar la condición del botón
                          // onPressed: (_lastRecordedAudio != null && !_isRecording && _webSocketChannel != null)
                          //     ? _sendAudioThroughWebSocket
                          //     : null,
                          onPressed: () => _sendAudioThroughWebSocket(),
                          child: Text('Enviar por WS'),
                        ),
                      ],
                    ),
                  ],
                ),
              ],
            ),
          ),
        ),
      ),
    );
  }
}

class SessionInfo {
  final String url;
  final String accessToken;
  final String sessionId;

  SessionInfo({
    required this.url,
    required this.accessToken,
    required this.sessionId,
  });

  factory SessionInfo.fromJson(Map<String, dynamic> json) {
    return SessionInfo(
      url: json['url'],
      accessToken: json['access_token'],
      sessionId: json['session_id'],
    );
  }
}

Thanks in advanced