Amazon Transcribe流服务语音转换成.NET SDK文本

时间:2019-01-22 07:26:01

标签: .net amazon-web-services aws-sdk speech-to-text aws-transcribe

我无法在AWS .NET SDK中找到任何有关转录流服务(从语音到文本)的引用。

.NET SDK Amazon Transcribe流服务中是否可用?任何参考都会有帮助

2 个答案:

答案 0 :(得分:2)

我是这样做的:

using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Threading.Tasks;
using Amazon;
using Amazon.S3;
using Amazon.S3.Model;
using Amazon.TranscribeService;
using Amazon.TranscribeService.Model;
using Newtonsoft.Json;
using QuickScreenHelper;

namespace CognitiveFace.Speech
{
    public class AwsSpeech : IDisposable
    {
        public AwsSpeech(RegionEndpoint regionEndpoint = null)
        {
            RegionEndpoint = regionEndpoint ?? RegionEndpoint.APNortheast1;
            //todo add region endpoint for AWS Face
            S3Client = new AmazonS3Client(RegionEndpoint);
            TranscribeClient = new AmazonTranscribeServiceClient(RegionEndpoint);
        }

        private RegionEndpoint RegionEndpoint { get; }

        private AmazonTranscribeServiceClient TranscribeClient { get; }

        private AmazonS3Client S3Client { get; }

        public void Dispose()
        {
            //TODO remember to call
            S3Client.Dispose();
            TranscribeClient.Dispose();
            //TODO dispose for faceClient
            //todo dispose for gcp speech and azure speech
        }

        public async Task TranscribeInputFile(string fileName, string targetLanguageCode = "ja-JP")
        {
            var bucketName = "transcribe-" + Guid.NewGuid();
            var putBucketResponse = await CreateBucket(bucketName);
            if (putBucketResponse.HttpStatusCode == HttpStatusCode.OK)
            {
                var uploadInputFileToS3 = await UploadInputFileToS3(fileName, bucketName);
                if (uploadInputFileToS3.HttpStatusCode == HttpStatusCode.OK)
                {
                    var startTranscriptionJobResponse =
                        await TranscribeInputFile(fileName, bucketName, targetLanguageCode);
                    //todo
                    //todo delete bucket
                }
                else
                {
                    Logger.WriteLine($"Fail to transcribe {fileName} because cannot upload {fileName} to {bucketName}",
                        uploadInputFileToS3);
                }
            }
            else
            {
                Logger.WriteLine($"Fail to transcribe {fileName} because cannot create bucket {bucketName}",
                    putBucketResponse);
            }
        }

        private async Task<TranscriptionJobResult> TranscribeInputFile(string fileName, string bucketName,
            string targetLanguageCode)
        {
            var objectName = Path.GetFileName(fileName);

            var media = new Media()
            {
                MediaFileUri = $"https://s3.{RegionEndpoint.SystemName}.amazonaws.com/{bucketName}/{objectName}"
            };

            var transcriptionJobName = $"transcribe-job-{bucketName}";
            var transcriptionJobRequest = new StartTranscriptionJobRequest()
            {
                LanguageCode = targetLanguageCode,
                Media = media,
                MediaFormat = MediaFormat.Wav,
                TranscriptionJobName = transcriptionJobName,
                OutputBucketName = bucketName
            };

            var startTranscriptionJobResponse =
                await TranscribeClient.StartTranscriptionJobAsync(transcriptionJobRequest);
            if (startTranscriptionJobResponse.HttpStatusCode == HttpStatusCode.OK)
            {
                return await WaitForTranscriptionJob(startTranscriptionJobResponse.TranscriptionJob, bucketName);
            }
            else
            {
                //todo
                throw new NotImplementedException();
            }
        }

        private async Task<TranscriptionJobResult> WaitForTranscriptionJob(TranscriptionJob transcriptionJob,
            string bucketName, int delayTime = 16000)
        {
            var transcriptionJobTranscriptionJobStatus = transcriptionJob.TranscriptionJobStatus;
            Logger.WriteLine($"transcriptionJobTranscriptionJobStatus={transcriptionJobTranscriptionJobStatus}");
            if (transcriptionJobTranscriptionJobStatus ==
                TranscriptionJobStatus.COMPLETED)
            {
                var keyName = $"{transcriptionJob.TranscriptionJobName}.json";
                Logger.WriteLine($"Downloading {keyName}");
                var result = await GetFileFromS3(keyName, bucketName);
                return JsonConvert.DeserializeObject<TranscriptionJobResult>(result);
                /*using var stringReader = new StringReader(result);
                using var jsonTextReader = new JsonTextReader(stringReader);*/
            }
            else if (transcriptionJobTranscriptionJobStatus == TranscriptionJobStatus.FAILED)
            {
                //TODO
                throw new NotImplementedException();
            }
            else
            {
                await Task.Delay(delayTime);
                var getTranscriptionJobResponse = await TranscribeClient.GetTranscriptionJobAsync(
                    new GetTranscriptionJobRequest()
                    {
                        TranscriptionJobName = transcriptionJob.TranscriptionJobName
                    });
                return await WaitForTranscriptionJob(getTranscriptionJobResponse.TranscriptionJob, bucketName,
                    delayTime * 2);
            }
        }

        public async Task<PutBucketResponse> CreateBucket(string bucketName)
        {
            var putBucketRequest = new PutBucketRequest()
            {
                BucketName = bucketName,
            };

            return await S3Client.PutBucketAsync(putBucketRequest);
        }

        public async Task<PutObjectResponse> UploadInputFileToS3(string fileName, string bucketName)
        {
            var objectName = Path.GetFileName(fileName);

            var putObjectRequest = new PutObjectRequest
            {
                BucketName = bucketName,
                Key = objectName,
                ContentType = "audio/wav",
                FilePath = fileName
            };

            return await S3Client.PutObjectAsync(putObjectRequest);
        }

        public async Task<string> GetFileFromS3(string keyName, string bucketName)
        {
            var request = new GetObjectRequest()
            {
                BucketName = bucketName,
                Key = keyName
            };
            using var response = await S3Client.GetObjectAsync(request);
            using var responseStream = response.ResponseStream;
            using var reader = new StreamReader(responseStream);
            /*string title = response.Metadata["x-amz-meta-title"]; // Assume you have "title" as medata added to the object.
            string contentType = response.Headers["Content-Type"];
            Console.WriteLine("Object metadata, Title: {0}", title);
            Console.WriteLine("Content type: {0}", contentType);*/

            return await reader.ReadToEndAsync(); // Now you process the response body.
        }
    }

    //todo move
    public class TranscriptionJobResult
    {
        public string jobName { get; set; }
        public string accountId { get; set; }
        public string status { get; set; }
        public TranscriptionResult results { get; set; }
    }

    public class TranscriptionResult
    {
        public List<Transcript> transcripts { get; set; }
        public List<TranscriptItem> items { get; set; }
    }

    public class Transcript
    {
        public string transcript { get; set; }
    }

    public class TranscriptItem
    {
        public string start_time { get; set; }
        public string end_time { get; set; }
        public List<AlternativeTranscription> alternatives { get; set; }
        public string type { get; set; }
    }

    public class AlternativeTranscription
    {
        public string confidence { get; set; }
        public string content { get; set; }
    }
}

答案 1 :(得分:0)

可能不是,对于.NET我什么也没发现。我也在搜索JavaScritpt SDK,发现有一个GitHub issue,它回答说它尚不支持。

基于this official blog post,实时转录功能是最近才出现的,这也许可以解释SDK的缺乏(2018年11月20日)

我仅使用Java SDK找到了一个示例: Example Java Application using AWS SDK creating streaming transcriptions via AWS Transcribe

UPDATE : 我取得了联系,他们给了我这个答案:

  

现在,只有Java和Ruby SDK支持流转录。如果您想使用.NET Framework或JavaScript,则需要编写自己的客户端。如果您决定这样做,则可能会在此页面上找到有用的文档:   https://docs.aws.amazon.com/transcribe/latest/dg/streaming-format.html