C#使用whisper.net实现语音识别（语音转文本）-Toy模板网

这篇具有很好参考价值的文章主要介绍了C#使用whisper.net实现语音识别（语音转文本）。希望对大家有所帮助。如果存在错误或未考虑完全的地方，请大家不吝赐教，您也可以点击"举报违法"按钮提交疑问。

介绍

效果

输出信息

项目

代码

下载

介绍

github地址：https://github.com/sandrohanea/whisper.net

Whisper.net. Speech to text made simple using Whisper Models

模型下载地址：https://huggingface.co/sandrohanea/whisper.net/tree/main/classic

效果

C#使用whisper.net实现语音识别（语音转文本）,C#人工智能实践,whisper,c#,人工智能,机器学习,深度学习,.net,语音识别

输出信息

whisper_init_from_file_no_state: loading model from 'ggml-small.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab = 51865
whisper_model_load: n_audio_ctx = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx = 448
whisper_model_load: n_text_state = 768
whisper_model_load: n_text_head = 12
whisper_model_load: n_text_layer = 12
whisper_model_load: n_mels = 80
whisper_model_load: ftype = 1
whisper_model_load: qntvr = 0
whisper_model_load: type = 3
whisper_model_load: mem required = 743.00 MB (+ 16.00 MB per decoder)
whisper_model_load: adding 1608 extra tokens
whisper_model_load: model ctx = 464.68 MB
whisper_model_load: model size = 464.44 MB
whisper_init_state: kv self size = 15.75 MB
whisper_init_state: kv cross size = 52.73 MB
00:00:00->00:00:20: 皇鶴楼,崔昊,西人已成皇鶴去,此地空于皇鶴楼,皇鶴一去不复返,白云千载空悠悠。
00:00:20->00:00:39: 青川莉莉汉阳树,方草七七英五周,日暮相关何处事,燕泊江上世人愁。

项目

C#使用whisper.net实现语音识别（语音转文本）,C#人工智能实践,whisper,c#,人工智能,机器学习,深度学习,.net,语音识别

代码

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Whisper.net;
using static System.Net.Mime.MediaTypeNames;

namespace C_使用whisper.net实现语音转文本
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}

string fileFilter = "*.wav|*.wav";
string wavFileName = "";
WhisperFactory whisperFactory;
WhisperProcessor processor;
private async void button2_Click(object sender, EventArgs e)
{
if (wavFileName == "")
{
return;
}

try
{
button2.Enabled = false;
using var fileStream = File.OpenRead(wavFileName);
await foreach (var result in processor.ProcessAsync(fileStream))
{
Console.WriteLine($"{result.Start}->{result.End}: {result.Text}\r\n");
txtResult.Text += $"{result.Start}->{result.End}: {result.Text}\r\n";
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
finally
{
button2.Enabled = true;
}

}

private void Form1_Load(object sender, EventArgs e)
{
whisperFactory = WhisperFactory.FromPath("ggml-small.bin");

processor = whisperFactory.CreateBuilder()
.WithLanguage("zh")//.WithLanguage("auto")
.Build();

wavFileName = "085黄鹤楼.wav";
txtFileName.Text = wavFileName;
}

private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;

txtResult.Text = "";

wavFileName = ofd.FileName;
txtFileName.Text = wavFileName;
}
}
}

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Whisper.net;
using static System.Net.Mime.MediaTypeNames;

namespace C_使用whisper.net实现语音转文本
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        string fileFilter = "*.wav|*.wav";
        string wavFileName = "";
        WhisperFactory whisperFactory;
        WhisperProcessor processor;
        private async void button2_Click(object sender, EventArgs e)
        {
            if (wavFileName == "")
            {
                return;
            }

            try
            {
                button2.Enabled = false;
                using var fileStream = File.OpenRead(wavFileName);
                await foreach (var result in processor.ProcessAsync(fileStream))
                {
                    Console.WriteLine($"{result.Start}->{result.End}: {result.Text}\r\n");
                    txtResult.Text += $"{result.Start}->{result.End}: {result.Text}\r\n";
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
            finally
            {
                button2.Enabled = true;
            }

        }

        private void Form1_Load(object sender, EventArgs e)
        {
            whisperFactory = WhisperFactory.FromPath("ggml-small.bin");

            processor = whisperFactory.CreateBuilder()
               .WithLanguage("zh")//.WithLanguage("auto")
               .Build();

            wavFileName = "085黄鹤楼.wav";
            txtFileName.Text = wavFileName;
        }

        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = fileFilter;
            if (ofd.ShowDialog() != DialogResult.OK) return;

            txtResult.Text = "";

            wavFileName = ofd.FileName;
            txtFileName.Text = wavFileName;
        }
    }
}