Introduction¶
Easily implement speech synthesis using PaddlePaddle, with simple sample code, a GUI interface, and a Flask web interface for Android calls.
Source Code Address: Download
Video Tutorial Address: Bilibili
- First, let’s write a simple program to achieve speech synthesis with the following code:
import os
import warnings
import wave
import paddle
import pyaudio
import soundfile as sf
warnings.filterwarnings("ignore")
from paddlespeech.t2s.frontend.zh_frontend import Frontend
# Acoustic model path
am_model_path = 'models/fastspeech2/model'
# Phoneme dictionary path for the model
phones_dict_path = 'models/fastspeech2/phone_id_map.txt'
# Vocoder model path
voc_model_path = 'models/wavegan/model'
# Text to be synthesized
text = '我是夜雨飘零,我爱深度学习!'
# Output file retention path
output_path = 'output/1.wav'
# Get text frontend
frontend = Frontend(g2p_model='g2pM', phone_vocab_path=phones_dict_path)
# Acoustic model
am_inference = paddle.jit.load(am_model_path)
# Vocoder model
voc_inference = paddle.jit.load(voc_model_path)
# Text to model input
input_ids = frontend.get_input_ids(text, merge_sentences=False)
phone_ids = input_ids['phone_ids']
print(phone_ids)
# Model output result concatenation
wav_all = None
for i in range(len(phone_ids)):
part_phone_ids = phone_ids[i]
# Get acoustic model output
mel = am_inference(part_phone_ids)
# Get vocoder model output
wav = voc_inference(mel)
if wav_all is None:
wav_all = wav
else:
wav_all = paddle.concat([wav_all, wav])
# Tensor to numpy
wav = wav_all.numpy()
os.makedirs(os.path.dirname(output_path), exist_ok=True)
sf.write(output_path, wav, samplerate=24000)
print(f'Audio saved: {output_path}')
chunk = 1024
wf = wave.open(output_path, 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(width=wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
# Read audio
data = wf.readframes(chunk)
# Loop to read audio
while len(data) > 0:
stream.write(data)
data = wf.readframes(chunk)
stream.stop_stream()
stream.close()
p.terminate()
-
A
gui.pyinterface program is provided for speech synthesis through the interface.

-
Additionally,
server.pyis provided, which uses Flask to provide a web interface for Android applications or applets to call for speech synthesis.
