'Getting File Read Error using python Speech Recognition after converting mp4 to wav

I have been using the following script to batch convert mp4 files to wav, and the second script to write speech to text. I had run that successfully for a while, and converted some 2000 audio files. All files are less than 60 seconds in length. Then suddenly speech_recognition stopped working and would give the following error for each file.

"File Read Error: Audio file could not be read as PCM WAV, AIFF/AIFF-C, or Native FLAC; check if file is corrupted or in another format"

Any help would be greatly appreciated.

This is my code for converting mp4 to wav:

#!/usr/bin/env python3
#convert mp4 to wav
import os
import sys
import glob
from pydub import AudioSegment


folder_path = input("Enter the path for the folder/directory : ")
print("\n Processing...")

#Remove quotes from string
if folder_path[0]=="\"":
    folder_path = folder_path[1:]
if folder_path[-1] == "\"":
    folder_path = folder_path[:-1]

os.chdir(folder_path)
folder = os.listdir(folder_path)

#Count files
wavList = glob.glob(folder_path + r"\*.wav")
mp4List = glob.glob(folder_path + r"\*.mp4")

if(input(str(len(mp4List)) + r" '.mp4' files & " + str(len(wavList)) + r" '.wav' files found. Continue (y/n) : ") != "y"):
    print('canceled by user')
    exit()

#loop through files
print(folder_path)
#for srcfile in folder:     #loop in folder only
for subdir, dirs, files in os.walk(folder_path):
    os.chdir(subdir)
    for srcfile in files:
        print(srcfile)
        if(srcfile[-4:]==".mp4"):     
            wavfile = srcfile[:-3] + "wav"
            print(wavfile)
            if (os.path.isfile(wavfile)):
                if sys.argv[0] == "a":
                    os.remove(wavfile)
            if not (os.path.isfile(wavfile)):
                infile = os.path.join(folder_path, srcfile)
                print(infile)
                wavpath = os.path.join(folder_path, wavfile)
                print(wavpath)
                audio = AudioSegment.from_file(infile, format = "mp4")
                audio.export(wavpath, format = "WAV")

And this is my function for audio to text. I truncated it, as I had a lot of options for speech recognition engines, but it's not getting that far.

#!/usr/bin/env python3
import speech_recognition as sr
import os
import json
import atexit

text_count = 0
fail_count = 0
skip_count = 0

def get_audio_text(audio_file, TRANSLATE_OPTION):
    txt = audio_file

    # use the audio file as the audio source
    r = sr.Recognizer()
    try:
        with sr.AudioFile(audio_file) as source:
            audio = r.record(source)  # ERROR HERE
    except Exception as e:
        errStr = "File Read Error: " + str(e)
        print(errStr)
        return errStr

    if(TRANSLATE_OPTION == "s" or TRANSLATE_OPTION == "sphinx"):
        # recognize speech using Sphinx
        try:
            txt = r.recognize_sphinx(audio)
        except sr.UnknownValueError:
            txt ="Sphinx could not understand audio"
        except sr.RequestError as e:
            txt ="Sphinx error; {0}".format(e)

    elif(TRANSLATE_OPTION == "g" or TRANSLATE_OPTION == "google"):
        # recognize speech using Google Speech Recognition
        try:
            # for testing purposes, we're just using the default API key
            # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
            txt = r.recognize_google(audio)
        except sr.UnknownValueError:
            txt = "Google Speech Recognition could not understand audio"
        except sr.RequestError as e:
            txt = "Could not request results from Google Speech Recognition service; {0}".format(e)
    
    return txt

running on Windows 10. Tried with python3.10 and python3.9



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source