'Do I have an error in MCD evaluations? Python code
I was trying to implement script that would evaluate sythesis quality using MCD. I treid to code according to second equation from HERE. The problem is that when evaluate noise i get 535, and when quite nice sound I get 515. I think that something must be wrong in my calculus because the difference should be far greater as quality if sythesis is much much better. Would any one try to check my code? Maybe there is some mistake I am not seeing.
(rate,sig) = wav.read(output_path)
mc = python_speech_features.base.mfcc(sig, samplerate=rate, winlen=0.025, winstep=0.01, numcep=80, nfilt=26, nfft=1024, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
(rate,sig) = wav.read("/content/LJSpeech-1.1/wavs/"+wavfilename+".wav")
mc2 = python_speech_features.base.mfcc(sig, samplerate=rate, winlen=0.025, winstep=0.01, numcep=80, nfilt=26, nfft=1024, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
import math
import numpy as np
def mcd(C, C_hat):
minShape = min(C.shape[0], C_hat.shape[0])
x = C[:minShape]
y = C_hat[:minShape]
K = 10 / np.log(10) * np.sqrt(2)
return K * np.mean(np.sqrt(np.sum((x - y) ** 2, axis=1)))
mcd(mc, mc2)
def generateAudioAndMCD(filePath, text):
if vocoder_path == "":
!python /content/TTS/TTS/bin/synthesize.py --text "$text" \
--model_path $model_path \
--config_path $config_path \
--out_path $output_path
else:
!python /content/TTS/TTS/bin/synthesize.py --text "$text" \
--model_path $model_path \
--config_path $config_path \
--vocoder_path $vocoder_path\
--out_path $output_path
(rate, sig) = wav.read(output_path)
mc2 = python_speech_features.base.mfcc(sig, samplerate=rate, winlen=0.025, winstep=0.01, numcep=80, nfilt=26, nfft=1024, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
(rate,sig) = wav.read(filePath)
mc2 = python_speech_features.base.mfcc(sig, samplerate=rate, winlen=0.025, winstep=0.01, numcep=80, nfilt=26, nfft=1024, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
return mcd(mc, mc2)
totalMCD = 0
count = 0
with open("LJSpeech-1.1/metadata_val.csv", 'r') as f:
for line in f.readlines():
wavfilename, _, text = line.split("|")
filePath = "/content/LJSpeech-1.1/wavs/"+wavfilename+".wav"
totalMCD += generateAudioAndMCD(filePath, text)
count += 1
averageMCD = totalMCD / count
print(averageMCD)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
