之前的项目中要用到把用户输入的文字转换为语音文件再播放出来,当时也是一头雾水没搞明白怎么弄,查了好多资料有的是使用在线合成语音文件,这个就有局限性了,不能离线合成,所以就放弃了。后来发现讯飞是有离线语音合成包了,但是官网没有C#调用的离线包,windows平台只有C++写的包,这个就比较坑了。后来就想到用C#调用C++dll的方式看能不能实现,参考了讯飞官网和论坛中大神们写的代码,最终还是实现了C#文字转换语音的功能。现在把代码贴出来,小伙们们可以直接使用,不过是要去讯飞官网购买离线包的。
public class iFlyTTS
{
///
/// 引入TTSDll函数的类
///
private class TTSDll
{
#region TTS dll import
[DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
public static extern int MSPLogin(string one, string two, string configs);
[DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
public static extern void MSPLogout();
[DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr QTTSSessionBegin(string _params, ref int errorCode);
[DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
public static extern int QTTSTextPut(string sessionID, string textString, uint textLen, string _params);
[DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr QTTSAudioGet(string sessionID, ref int audioLen, ref SynthStatus synthStatus, ref int errorCode);
[DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr QTTSAudioInfo(string sessionID);
[DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
public static extern int QTTSSessionEnd(string sessionID, string hints);
[DllImport("msc.dll", CallingConvention = CallingConvention.Cdecl)]
public static extern int QTTSGetParam(string sessionID, string paramName, string paramValue, ref uint valueLen);
#endregion
}
private string sessionID;
public iFlyTTS(string configs)
{
int ret = TTSDll.MSPLogin(null, null, configs);
if (ret != 0) throw new Exception("初始化TTS引擎错误,错误代码:" + ret);
}
public void MultiSpeek(string SpeekText, string outWaveFlie = null)
{
MemoryStream mStream = new MemoryStream();
try
{
speek(SpeekText, ref mStream);
WAVE_Header header = getWave_Header((int)mStream.Length - 44); //创建wav文件头
byte[] headerByte = StructToBytes(header); //把文件头结构转化为字节数组 //写入文件头
mStream.Position = 0; //定位到文件头
mStream.Write(headerByte, 0, headerByte.Length); //写入文件头
if (outWaveFlie != null)
{
FileStream ofs = new FileStream(outWaveFlie, FileMode.Create);
mStream.WriteTo(ofs);
ofs.Close();
ofs = null;
}
}
catch (Exception ex)
{
}
finally
{
TTSDll.MSPLogout();
mStream.Close();
mStream = null;
}
}
///
/// 把文本转换成声音,写入指定的内存流
///
/// 要转化成语音的文字
/// 合成结果输出的音频流
private void speek(string SpeekText, ref MemoryStream mStream)
{
string szParams = "engine_type = local, voice_name = xiaoyan, text_encoding = GB2312, tts_res_path = fo|res\\tts\\xiaoyan.jet;fo|res\\tts\\common.jet, sample_rate = 8000,aue=speex-wb;7, speed = 50, volume = 50, pitch = 50, rdn = 2";
int ret = 0;
try
{
sessionID = Ptr2Str(TTSDll.QTTSSessionBegin(szParams, ref ret));
if (ret != 0) throw new Exception("初始化TTS引会话错误,错误代码:" + ret);
ret = TTSDll.QTTSTextPut(sessionID, SpeekText, (uint)Encoding.Default.GetByteCount(SpeekText), string.Empty);
if (ret != 0) throw new Exception("向服务器发送数据,错误代码:" + ret);
int audio_len = 0;
SynthStatus synth_status = SynthStatus.TTS_FLAG_STILL_HAVE_DATA;
MemoryStream fs = mStream;
IntPtr audio_data;// Marshal.AllocHGlobal(1024);
Thread.Sleep(1000);
while (synth_status != SynthStatus.TTS_FLAG_DATA_END)
{
audio_data = TTSDll.QTTSAudioGet(sessionID, ref audio_len, ref synth_status, ref ret); // sdh 这个是会花费时间的
byte[] tmpArray = Encoding.Default.GetBytes(SpeekText);
if (ret != 0) break;
byte[] data = new byte[audio_len];
if (audio_len > 0) Marshal.Copy(audio_data, data, 0, audio_len);
fs.Write(data, 0, data.Length);
}
}
catch (Exception ex)
{
}
finally
{
ret = TTSDll.QTTSSessionEnd(sessionID, "");
if (ret != 0) throw new Exception("结束TTS会话错误,错误代码:" + ret);
}
}
private struct WAVE_Header
{
public int RIFF_ID; //4 byte , 'RIFF'
public int File_Size; //4 byte , 文件长度
public int RIFF_Type; //4 byte , 'WAVE'
public int FMT_ID; //4 byte , 'fmt'
public int FMT_Size; //4 byte , 数值为16或18,18则最后又附加信息
public short FMT_Tag; //2 byte , 编码方式,一般为0x0001
public ushort FMT_Channel; //2 byte , 声道数目,1--单声道;2--双声道
public int FMT_SamplesPerSec;//4 byte , 采样频率
public int AvgBytesPerSec; //4 byte , 每秒所需字节数,记录每秒的数据量
public ushort BlockAlign; //2 byte , 数据块对齐单位(每个采样需要的字节数)
public ushort BitsPerSample; //2 byte , 每个采样需要的bit数
public int DATA_ID; //4 byte , 'data'
public int DATA_Size; //4 byte ,
}
///
/// 根据数据段的长度,生产文件头
///
/// 音频数据长度
/// 返回wav文件头结构体
WAVE_Header getWave_Header(int data_len)
{
WAVE_Header wav_Header = new WAVE_Header();
wav_Header.RIFF_ID = 0x46464952; //字符RIFF
wav_Header.File_Size = data_len + 36;
wav_Header.RIFF_Type = 0x45564157; //字符WAVE
wav_Header.FMT_ID = 0x20746D66; //字符fmt
wav_Header.FMT_Size = 16;
wav_Header.FMT_Tag = 0x0001;
wav_Header.FMT_Channel = 1; //单声道
wav_Header.FMT_SamplesPerSec = 8000; //采样频率
wav_Header.AvgBytesPerSec = 16000; //每秒所需字节数
wav_Header.BlockAlign = 2; //每个采样1个字节
wav_Header.BitsPerSample = 16; //每个采样8bit
wav_Header.DATA_ID = 0x61746164; //字符data
wav_Header.DATA_Size = data_len;
return wav_Header;
}
///
/// 把结构体转化为字节序列
///
/// 被转化的结构体
/// 返回字节序列
Byte[] StructToBytes(Object structure)
{
Int32 size = Marshal.SizeOf(structure);
IntPtr buffer = Marshal.AllocHGlobal(size);
try
{
Marshal.StructureToPtr(structure, buffer, false);
Byte[] bytes = new Byte[size];
Marshal.Copy(buffer, bytes, 0, size);
return bytes;
}
finally
{
Marshal.FreeHGlobal(buffer);
}
}
///
/// 指针转字符串
///
/// 指向非托管代码字符串的指针
/// 返回指针指向的字符串
public static string Ptr2Str(IntPtr p)
{
List lb = new List();
while (Marshal.ReadByte(p) != 0)
{
lb.Add(Marshal.ReadByte(p));
p = p + 1;
}
byte[] bs = lb.ToArray();
return Encoding.Default.GetString(lb.ToArray());
}
}
使用的时候直接调用下面的方法就行,如果要改变音色或者播放速度的都可以配置的,这个小伙伴们自己研究下,我调试了好几种感觉就现在的挺好。
private void 文字转语音(String sourcewav)
{
try
{
//这个appid 是指在讯飞官网购买离线包的appid
string login_params = "appid = ******, work_dir = .";
iFlyTTS tts = new iFlyTTS(login_params);
//填写上自己要保存的路径
string strPath = System.IO.Path.Combine(***, sourcewav + ".wav");
tts.MultiSpeek(sourcewav.Trim(), strPath);
}
catch (Exception e)
{
}
}