Support for all multiple generation API features #98

tomkail · 2024-10-25T15:12:28Z

Removes previous_text from the constructor and adds next_text, previous_request_ids and next_request_ids

…us_request_ids and next_request_ids

tomkail · 2024-10-25T15:13:18Z

Testing script here!

// Licensed under the MIT License. See LICENSE in the project root for license information.

using ElevenLabs.Models;
using ElevenLabs.Voices;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using ElevenLabs.TextToSpeech;
using UnityEngine;
using Utilities.Async;

namespace ElevenLabs.Demo
{
    [RequireComponent(typeof(AudioSource))]
    public class TextToSpeechDemo : MonoBehaviour
    {
        [SerializeField]
        private ElevenLabsConfiguration configuration;

        [SerializeField]
        private bool debug = true;

        [SerializeField]
        private Voice voice;

        [SerializeField]
        public Message[] messages;
        [Serializable]
        public class Message {
            [TextArea(3, 10)]
            public string message;
            public string id;
            public AudioClip audioClip;
        }

        [SerializeField]
        private AudioSource audioSource;

        private readonly Queue<AudioClip> streamClipQueue = new();

        public Mode mode = Mode.GenerateLinear;
        public enum Mode {
            GenerateLinear,
            GenerateRandom,
            NoPreviousRequests,
        }
#if !UNITY_2022_3_OR_NEWER
        private readonly CancellationTokenSource lifetimeCts = new();
        private CancellationToken destroyCancellationToken => lifetimeCts.Token;
#endif

        private void OnValidate()
        {
            if (audioSource == null)
            {
                audioSource = GetComponent<AudioSource>();
            }
        }

        void Start()
        {
            OnValidate();
            RunAllIndicesAsync();
        }

        private async void RunAllIndicesAsync()
        {
            if (mode == Mode.GenerateLinear || mode == Mode.NoPreviousRequests) {
                for (int i = 0; i < messages.Length; i++) {
                    if (!string.IsNullOrEmpty(messages[i].id)) continue;
                    var index = Array.IndexOf(messages, messages[i]);
                    await RunForIndexAsync(index);
                    if(lifetimeCts.Token.IsCancellationRequested) break;
                }
            }
            else if (mode == Mode.GenerateRandom) {
                var shuffledMessages = messages.OrderBy(x => Guid.NewGuid()).ToArray();
                for (int i = 0; i < shuffledMessages.Length; i++) {
                    if (!string.IsNullOrEmpty(shuffledMessages[i].id)) continue;
                    var index = Array.IndexOf(messages, shuffledMessages[i]);
                    await RunForIndexAsync(index);
                    if(lifetimeCts.Token.IsCancellationRequested) break;
                }
            }

            streamClipQueue.Clear();
            var streamQueueCts = CancellationTokenSource.CreateLinkedTokenSource(destroyCancellationToken);
            for (int i = 0; i < messages.Length; i++) {
                streamClipQueue.Enqueue(messages[i].audioClip);
            }
            PlayStreamQueue(streamQueueCts.Token);
            await new WaitUntil(() => streamClipQueue.Count == 0 && !audioSource.isPlaying);
            
            streamQueueCts.Cancel();
        }
        
        private async Task RunForIndexAsync(int index)
        {
            try
            {
                var api = new ElevenLabsClient(configuration)
                {
                    EnableDebug = debug
                };

                if (voice == null)
                {
                    voice = (await api.VoicesEndpoint.GetAllVoicesAsync(destroyCancellationToken)).FirstOrDefault();
                }

                var voiceSettings = await api.VoicesEndpoint.GetVoiceSettingsAsync(voice.Id, destroyCancellationToken);
                
                

                var request = new TextToSpeechRequest(voice, messages[index].message, Encoding.UTF8, voiceSettings, OutputFormat.MP3_44100_128, null, Model.TurboV2_5);

                if (mode != Mode.NoPreviousRequests) {
                    request.PreviousText = index > 0 ? messages[index - 1].message : null;
                    request.NextText = index < messages.Length-1 ? messages[index + 1].message : null;
                    
                    var previousRequestIds = new List<string>();
                    for(int i = 1; i <= 3; i++) {
                        var prevIndex = index - i;
                        if(prevIndex < 0 || string.IsNullOrEmpty(messages[prevIndex].id)) break;
                        previousRequestIds.Add(messages[prevIndex].id);
                    }

                    var nextRequestIds = new List<string>();
                    for (int i = 1; i <= 3; i++) {
                        var nextIndex = index + i;
                        if(nextIndex >= messages.Length || string.IsNullOrEmpty(messages[nextIndex].id)) break;
                        nextRequestIds.Add(messages[nextIndex].id);
                    }
                    request.PreviousRequestIds = previousRequestIds.ToArray();
                    request.NextRequestIds = nextRequestIds.ToArray();
                }
                    
                var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(request, cancellationToken: destroyCancellationToken);
                messages[index].id = voiceClip.Id;
                messages[index].audioClip = voiceClip.AudioClip;
                // audioSource.PlayOneShot(voiceClip.AudioClip);
                //
                // await new WaitUntil(() => !audioSource.isPlaying);

                if (debug)
                {
                    Debug.Log($"Full clip: {voiceClip.Id}");
                }
            }
            catch (Exception e)
            {
                Debug.LogError(e);
            }
        }
        
#if !UNITY_2022_3_OR_NEWER
        private void OnDestroy()
        {
            lifetimeCts.Cancel();
            lifetimeCts.Dispose();
        }
#endif

        private async void PlayStreamQueue(CancellationToken cancellationToken)
        {
            try
            {
                await new WaitUntil(() => streamClipQueue.Count > 0);
                var endOfFrame = new WaitForEndOfFrame();

                do
                {
                    if (!audioSource.isPlaying &&
                        streamClipQueue.TryDequeue(out var clip))
                    {
                        Debug.Log($"playing partial clip: {clip.name}");
                        audioSource.PlayOneShot(clip);
                    }

                    await endOfFrame;
                } while (!cancellationToken.IsCancellationRequested);
            }
            catch (Exception e)
            {
                Debug.LogError(e);
            }
        }
    }
}

ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechRequest.cs

Removes previous_text from the constructor and adds next_text, previo…

b37d54e

…us_request_ids and next_request_ids

tomkail requested a review from StephenHodgson as a code owner October 25, 2024 15:12

StephenHodgson reviewed Oct 25, 2024

View reviewed changes

ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechRequest.cs Outdated Show resolved Hide resolved

remove setters and add to .ctr

d610056

StephenHodgson changed the base branch from main to development November 2, 2024 18:10

Merge branch 'development' into multiple_generations

2a0cded

tomkail closed this Nov 3, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Support for all multiple generation API features #98

Support for all multiple generation API features #98

tomkail commented Oct 25, 2024

tomkail commented Oct 25, 2024 •

edited by StephenHodgson

Loading

Support for all multiple generation API features #98

Support for all multiple generation API features #98

Conversation

tomkail commented Oct 25, 2024

tomkail commented Oct 25, 2024 • edited by StephenHodgson Loading

tomkail commented Oct 25, 2024 •

edited by StephenHodgson

Loading