Skip to content

GetcharZp/go-speech

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

10 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

license mit
pipeline_tag text-to-speech
tags
text-to-speech
automatic-speech-recognition
logo

go-speech forks go-speech stars go-speech pull-requests

go-speech 基于 Golang + ONNX 构建的轻量语音库,支持 TTS(文本转语音)与 ASR(语音转文字)。 集成 MeloTTS、Piper、达摩院 Paraformer 架构模型、Whisper 模型。

安装

# 下载包
go get -u github.com/getcharzp/go-speech

# 下载模型、动态链接库
git clone https://huggingface.co/getcharzp/go-speech

快速开始

TTS

MeloTTS 支持中英混合合成

package main

import (
	"github.com/getcharzp/go-speech/tts/melotts"
	"github.com/up-zero/gotool/fileutil"
	"log"
)

func main() {
	ttsEngine, err := melotts.NewEngine(melotts.DefaultConfig())
	if err != nil {
		log.Fatalf("创建引擎失败: %v", err)
	}
	defer ttsEngine.Destroy()

	text := "2019年12月30日,中国人口突破14亿人,联系电话: 13800138000。"
	wavData, err := ttsEngine.SynthesizeToWav(text, 1.0)
	if err != nil {
		log.Fatalf("合成失败: %v", err)
	}

	outputPath := "output.wav"
	err = fileutil.FileSave(outputPath, wavData)
	if err != nil {
		log.Fatalf("保存 WAV 失败: %v", err)
	}
}

Piper 支持中文合成

package main

import (
	"github.com/getcharzp/go-speech/tts/pipertts"
	"github.com/up-zero/gotool/fileutil"
	"log"
)

func main() {
	cfg := pipertts.Config{
		OnnxRuntimeLibPath: "../lib/onnxruntime.dll",
		ModelPath:          "../pipertts_weights/zh_CN-xiao_ya-medium.onnx",
		ConfigPath:         "../pipertts_weights/zh_CN-xiao_ya-medium.onnx.json",
	}

	ttsEngine, err := pipertts.NewEngine(cfg)
	if err != nil {
		log.Fatalf("创建引擎失败: %v", err)
	}
	defer ttsEngine.Destroy()

	testText := "2019年12月30日,中国人口突破14亿人。联系电话: 13800138000。"
	wavBytes, err := ttsEngine.SynthesizeToWav(testText)
	if err != nil {
		log.Fatalf("合成失败: %v", err)
	}

	outputPath := "pipertts_output.wav"
	err = fileutil.FileSave(outputPath, wavBytes)
	if err != nil {
		log.Fatalf("保存失败: %v", err)
	}
}

ASR

Paraformer

package main

import (
	"fmt"
	"github.com/getcharzp/go-speech/asr/paraformer"
	"log"
)

func main() {
	asrEngine, err := paraformer.NewEngine(paraformer.DefaultConfig())
	if err != nil {
		log.Fatalf("创建引擎失败: %v", err)
	}
	defer asrEngine.Destroy()

	text, err := asrEngine.TranscribeFile("./zh-en.wav")
	if err != nil {
		log.Printf("识别出错: %v", err)
		return
	}
	fmt.Printf("识别结果: %s\n", text)
}

Whisper

package main

import (
	"fmt"
	"github.com/getcharzp/go-speech/asr/whisper"
	"log"
)

func main() {
	asrEngine, err := whisper.NewEngine(whisper.DefaultConfig())
	if err != nil {
		log.Fatalf("创建引擎失败: %v", err)
	}
	defer asrEngine.Destroy()

	text, err := asrEngine.TranscribeFile("./zh-en.wav", whisper.TranscribeOption{
		Language: whisper.LangZh,
		Task:     whisper.TaskTranscribe,
	})
	if err != nil {
		log.Fatalf("识别出错: %v", err)
		return
	}
	fmt.Printf("识别结果: %s\n", text) // Yesterday was星期一Today is Tuesday明天是星期三
}

About

go-speech 基于 Golang + ONNX 构建的轻量语音库,支持 TTS(文本转语音)与 ASR(语音转文字)。已集成 MeloTTS、Piper、达摩院 Paraformer 架构模型、Whisper 模型。

Topics

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors

Languages