Discussions
Why my avatar doest respond when I send it Audo from mic?
25 days ago by Pablo Palma
Hello, I have a Flutter client that works well when I send text through HTTP using /sendtask, and the avatar responds correctly. However, I'm having trouble when sending audio from the microphone.
I understand that to send audio to the avatar, I need to connect to the WebSocket:
/v1/ws/streaming.chat
In my code, I have one button that records audio from the user's microphone and another button to stop the recording. Then, with another button, I send the last recording to HeyGen's WebSocket, but the avatar never responds. It's as if it doesn't hear me.
What could I be doing wrong?
Here is my complete code:
import 'dart:async';
import 'dart:convert';
import 'dart:io';
import 'dart:typed_data';
import 'package:appwrite/appwrite.dart';
import 'package:flutter/material.dart';
import 'package:flutter_sound/flutter_sound.dart';
import 'package:flutter_webrtc/flutter_webrtc.dart';
import 'package:livekit_client/livekit_client.dart';
import 'package:path_provider/path_provider.dart';
import 'package:permission_handler/permission_handler.dart' as ph;
import 'package:web_socket_channel/web_socket_channel.dart';
// import 'pipecat.pb.dart';
void main() async {
WidgetsFlutterBinding.ensureInitialized();
if (Platform.isAndroid) await _checkPermissions();
runApp(MyApp());
}
Future<void> _checkPermissions() async {
var status = await ph.Permission.bluetooth.request();
if (status.isPermanentlyDenied) {
print('Bluetooth Permission disabled');
}
status = await ph.Permission.bluetoothConnect.request();
if (status.isPermanentlyDenied) {
print('Bluetooth Connect Permission disabled');
}
status = await ph.Permission.camera.request();
if (status.isPermanentlyDenied) {
print('Camera Permission disabled');
}
status = await ph.Permission.microphone.request();
if (status.isPermanentlyDenied) {
print('Microphone Permission disabled');
}
}
class MyApp extends StatelessWidget {
@override
Widget build(BuildContext context) {
return MaterialApp(
title: 'HeyGen Streaming API LiveKit (V2)',
theme: ThemeData(
primarySwatch: Colors.blue,
),
home: StreamingPage2(),
);
}
}
class StreamingPage2 extends StatefulWidget {
@override
_StreamingPage2State createState() => _StreamingPage2State();
}
class _StreamingPage2State extends State<StreamingPage2> {
// Appwrite Configuration
final API_CONFIG = {
'endpoint': 'https://cloud.appwrite.io/v1',
'projectId': 'xxxxxxxxxx',
'functionId': 'xxxxxxxxxx',
'heyGenServerUrl': "https://api.heygen.com"
};
// Appwrite Client
late Client client;
late Functions functions;
// Global variables
SessionInfo? sessionInfo;
Room? room;
WebSocketChannel? _webSocketChannel;
String? sessionToken;
// Controllers
final _avatarIDController = TextEditingController(text: '');
final _voiceIDController = TextEditingController(text: '');
final _taskInputController = TextEditingController();
// Status
final _status = ValueNotifier<String>('');
// Media Element
VideoTrackRenderer? _videoTrackRenderer;
VideoTrack? _videoTrack;
FlutterSoundRecorder? _recorder;
bool _isRecording = false;
String? _localFilePath;
Uint8List? _lastRecordedAudio;
@override
void initState() {
super.initState();
_initializeAppwrite();
_initialize();
}
void _initializeAppwrite() {
client = Client()
..setEndpoint(API_CONFIG['endpoint']!)
..setProject(API_CONFIG['projectId']!);
functions = Functions(client);
}
Future<void> _initialize() async {
room = Room(
roomOptions: RoomOptions(
adaptiveStream: true,
dynacast: true,
defaultVideoPublishOptions: VideoPublishOptions(
simulcast: false,
),
),
);
room!.addListener(_onChange);
// Handle room events
room!.events.on<RoomDisconnectedEvent>((event) {
updateStatus('Room disconnected: ${event.reason}');
});
room!.events.on<DataReceivedEvent>((event) {
final data = utf8.decode(event.data);
print("Room message: ${jsonDecode(data)}");
});
// Handle media streams
room!.events.on<TrackSubscribedEvent>((event) async {
if (event.track.kind == TrackType.VIDEO ||
event.track.kind == TrackType.AUDIO) {
if (event.track.kind == TrackType.VIDEO) {
_videoTrack = event.track as VideoTrack;
_videoTrackRenderer = VideoTrackRenderer(_videoTrack!);
setState(() {});
}
updateStatus('Media stream ready');
}
});
// Handle media stream removal
room!.events.on<TrackUnsubscribedEvent>((event) {
if (event.track.kind == TrackType.VIDEO) {
_videoTrackRenderer = null;
setState(() {});
}
});
// Inicializar el micrófono aquí
await _initMicrophone();
}
Future<void> _initMicrophone() async {
var status = await ph.Permission.microphone.request();
if (status.isGranted) {
try {
_recorder = await FlutterSoundRecorder().openRecorder();
await _recorder!.setSubscriptionDuration(const Duration(milliseconds: 10));
print('Microphone initialized successfully');
} catch (e) {
print('Error initializing microphone: $e');
}
} else {
print('Microphone permission denied');
updateStatus('Microphone permission denied');
}
}
Future<void> _startRecordingLocal() async {
if (_recorder == null) return;
try {
// Obtener el directorio temporal
Directory tempDir = await getTemporaryDirectory();
_localFilePath = '${tempDir.path}/local_recording.pcm'; // Definir la ruta del archivo
// Iniciar la grabación local en un archivo
await _recorder!.startRecorder(
toFile: _localFilePath,
codec: Codec.pcm16,
numChannels: 1,
sampleRate: 16000,
);
setState(() {
_isRecording = true;
});
updateStatus('Grabando localmente en: $_localFilePath');
} catch (err) {
print('Error starting local recording: $err');
}
}
Future<void> _stopRecordingLocal() async {
if (_recorder == null) return;
try {
String? filePath = await _recorder!.stopRecorder();
setState(() {
_isRecording = false;
});
// Leer el archivo grabado y guardarlo en _lastRecordedAudio
if (filePath != null) {
File audioFile = File(filePath);
_lastRecordedAudio = await audioFile.readAsBytes();
updateStatus('Grabación local detenida. Guardada en: $filePath');
}
} catch (err) {
print('Error stopping local recording: $err');
}
}
// Helper function to execute Appwrite function
Future<Map<String, dynamic>> _executeFunction(
String path, Map<String, dynamic> body) async {
try {
final execution = await functions.createExecution(
functionId: API_CONFIG['functionId']!,
body: jsonEncode({
'path': path,
'body': body,
}),
);
if (execution.status == 'completed') {
return jsonDecode(execution.responseBody);
} else {
throw Exception('Function execution failed: ${execution.status}');
}
} catch (e) {
throw Exception('Error executing function: $e');
}
}
void _onChange() {
// perform computations and then call setState
// setState will trigger a build
setState(() {
// your updates here
});
}
// Helper function to update status
void updateStatus(String message) {
final timestamp = DateTime.now().toLocal().toString();
_status.value += '[$timestamp] $message\n';
}
// Get session token from Appwrite function
Future<void> getSessionToken() async {
final data = await _executeFunction('/getSessionToken', {});
sessionToken = data['data']['token'];
updateStatus('Session token obtained');
}
void _connectWebSocket() {
if (sessionInfo != null && sessionToken != null) {
final wsUrl = Uri.parse(
'${API_CONFIG['heyGenServerUrl']}/v1/ws/streaming.chat')
.replace(
scheme: 'wss',
queryParameters: {
'session_id': sessionInfo!.sessionId,
'session_token': sessionToken,
// 'silence_response': 'false',
'stt_language': 'en',
},
).toString();
_webSocketChannel = WebSocketChannel.connect(Uri.parse(wsUrl));
_webSocketChannel!.stream.listen(
(event) {
final eventData = jsonDecode(event);
print('Raw WebSocket event: $eventData');
},
onError: (error) {
print('WebSocket error: $error');
updateStatus('WebSocket error: $error');
},
onDone: () {
print('WebSocket connection closed');
updateStatus('WebSocket connection closed');
_webSocketChannel = null;
},
);
}
}
void _closeWebSocket() {
if (_webSocketChannel != null) {
_webSocketChannel!.sink.close();
_webSocketChannel = null;
}
}
Future<void> createNewSession() async {
if (sessionToken == null) {
await getSessionToken();
}
final data = await _executeFunction('/createNewSession', {
'quality': 'high',
'avatar_name': _avatarIDController.text,
'voice': {
'voice_id': _voiceIDController.text,
'rate': 2,
},
'version': 'v2',
'video_encoding': 'H264',
});
if (data['data'] == null) {
throw Exception('Session creation failed');
}
sessionInfo = SessionInfo.fromJson(data['data']);
await room!.prepareConnection(sessionInfo!.url, sessionInfo!.accessToken);
updateStatus('Connection prepared');
// Connect WebSocket after room preparation
_connectWebSocket();
updateStatus('Session created successfully');
}
Future<void> _sendAudioThroughWebSocket() async {
if (_lastRecordedAudio != null && _webSocketChannel != null && !_isRecording) {
try {
updateStatus('Enviando audio por WebSocket...');
_webSocketChannel!.sink.add(_lastRecordedAudio);
updateStatus('Audio enviado por WebSocket.');
} catch (e) {
updateStatus('Error al enviar audio por WebSocket: $e');
}
} else {
if (_isRecording) {
updateStatus('No se puede enviar audio mientras se está grabando.');
} else if (_lastRecordedAudio == null) {
updateStatus('No hay audio grabado para enviar.');
} else {
updateStatus('WebSocket no conectado.');
}
}
}
// Start streaming session through Appwrite function
Future<void> startStreamingSession() async {
await _executeFunction('/startStreamingSession', {
'session_id': sessionInfo!.sessionId,
});
// Connect to LiveKit room
await room!.connect(sessionInfo!.url, sessionInfo!.accessToken);
updateStatus('Connected to room');
setState(() {});
updateStatus('Streaming started successfully');
}
// Send text to avatar through Appwrite function
Future<void> sendText(String text, String taskType) async {
if (sessionInfo == null) {
updateStatus('No active session');
return;
}
await _executeFunction('/sendTask', {
'session_id': sessionInfo!.sessionId,
'text': text,
'task_type': taskType,
});
updateStatus('Sent text ($taskType): $text');
}
Future<void> closeSession() async {
if (sessionInfo == null) {
updateStatus('No active session');
return;
}
await _executeFunction('/closeSession', {
'session_id': sessionInfo!.sessionId,
});
_closeWebSocket();
// Disconnect from LiveKit room
if (room != null) {
await room!.disconnect();
}
_videoTrackRenderer = null;
// sessionInfo = null;
// room = null;
// sessionToken = null;
setState(() {});
updateStatus('Session closed');
}
@override
void dispose() {
_status.dispose();
_avatarIDController.dispose();
_voiceIDController.dispose();
_taskInputController.dispose();
room?.dispose();
_videoTrackRenderer = null;
_closeWebSocket();
_recorder?.closeRecorder();
_recorder = null;
// _recordingDataController?.close();
super.dispose();
}
@override
Widget build(BuildContext context) {
return Scaffold(
appBar: AppBar(
title: Text('HeyGen Streaming API LiveKit (V2)'),
),
body: SingleChildScrollView(
child: Center(
child: Container(
padding: EdgeInsets.all(16.0),
child: Column(
mainAxisAlignment: MainAxisAlignment.center,
children: <Widget>[
TextField(
controller: _avatarIDController,
decoration: InputDecoration(
labelText: 'Avatar ID',
),
),
TextField(
controller: _voiceIDController,
decoration: InputDecoration(
labelText: 'Voice ID',
),
),
SizedBox(height: 16.0),
Row(
mainAxisAlignment: MainAxisAlignment.spaceEvenly,
children: [
ElevatedButton(
onPressed: () async {
try {
await createNewSession();
await startStreamingSession();
} catch (e) {
updateStatus('Error: $e');
}
},
child: Text('Start'),
),
ElevatedButton(
onPressed: closeSession,
child: Text('Close'),
),
],
),
SizedBox(height: 16.0),
TextField(
controller: _taskInputController,
decoration: InputDecoration(
labelText: 'Enter text for avatar to speak',
),
),
SizedBox(height: 16.0),
Row(
mainAxisAlignment: MainAxisAlignment.spaceEvenly,
children: [
ElevatedButton(
onPressed: () {
final text = _taskInputController.text.trim();
if (text.isNotEmpty) {
sendText(text, 'talk');
_taskInputController.clear();
}
},
child: Text('Talk (LLM)'),
),
ElevatedButton(
onPressed: () {
final text = _taskInputController.text.trim();
if (text.isNotEmpty) {
sendText(text, 'repeat');
_taskInputController.clear();
}
},
child: Text('Repeat'),
),
],
),
SizedBox(height: 16.0),
if (_videoTrackRenderer != null)
Container(
width: 320,
height: 240,
child: _videoTrackRenderer!,
)
else
Container(
width: 320,
height: 240,
decoration: BoxDecoration(
border: Border.all(color: Colors.grey),
borderRadius: BorderRadius.circular(8.0),
),
child: Center(
child: Text('Video will appear here'),
),
),
SizedBox(height: 16.0),
ValueListenableBuilder<String>(
valueListenable: _status,
builder: (context, value, child) {
return Container(
height: 100,
padding: EdgeInsets.all(8.0),
decoration: BoxDecoration(
border: Border.all(color: Colors.grey),
borderRadius: BorderRadius.circular(8.0),
),
child: SingleChildScrollView(
child: Text(value),
),
);
},
),
SizedBox(height: 16.0),
Row(
mainAxisAlignment: MainAxisAlignment.spaceEvenly,
children: [
Column(
children: [
ElevatedButton(
onPressed: _isRecording ? null : _startRecordingLocal,
child: Text('Grabar Local'),
),
ElevatedButton(
onPressed: _isRecording ? _stopRecordingLocal : null,
child: Text('Detener Local'),
),
],
),
Column(
children: [
ElevatedButton(
// Modificar la condición del botón
// onPressed: (_lastRecordedAudio != null && !_isRecording && _webSocketChannel != null)
// ? _sendAudioThroughWebSocket
// : null,
onPressed: () => _sendAudioThroughWebSocket(),
child: Text('Enviar por WS'),
),
],
),
],
),
],
),
),
),
),
);
}
}
class SessionInfo {
final String url;
final String accessToken;
final String sessionId;
SessionInfo({
required this.url,
required this.accessToken,
required this.sessionId,
});
factory SessionInfo.fromJson(Map<String, dynamic> json) {
return SessionInfo(
url: json['url'],
accessToken: json['access_token'],
sessionId: json['session_id'],
);
}
}
Thanks in advanced