'fixed predictions when using recorded audio files

I've implemented a model that was trained on an audio data set. The accuracy reaches 91% and it gives almost correct predictions when using different files to test. However, when I use the model to predict recorded files (I record, and immediately this file is sent for prediction). I always get only 2 different results.

I'm not sure what I'm doing wrong. The recorded files are the same length as the files in the training dataset. Below are the recording code and the prediction code.

Recording code.

import pyaudio
import math
import struct
import wave
import time
import os

Threshold = 10

SHORT_NORMALIZE = (1.0/32768.0)
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
swidth = 2

TIMEOUT_LENGTH = 2.5

f_name_directory = r'C:\Users\x\recording'
filename=""

class Recorder:

    @staticmethod
    def rms(frame):
        count = len(frame) / swidth
        format = "%dh" % (count)
        shorts = struct.unpack(format, frame)

        sum_squares = 0.0
        for sample in shorts:
            n = sample * SHORT_NORMALIZE
            sum_squares += n * n
        rms = math.pow(sum_squares / count, 0.5)

        return rms * 1000

    def __init__(self):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=FORMAT,
                                  channels=CHANNELS,
                                  rate=RATE,
                                  input=True,
                                  output=True,
                                  frames_per_buffer=chunk)

    def record(self):
        print('Noise detected, recording beginning')
        rec = []
        current = time.time()
        end = time.time() + TIMEOUT_LENGTH

        while current <= end:

            data = self.stream.read(chunk)
            if self.rms(data) >= Threshold: end = time.time() + TIMEOUT_LENGTH

            current = time.time()
            rec.append(data)
        self.write(b''.join(rec))

    def write(self, recording):
        n_files = len(os.listdir(f_name_directory))

        filename = os.path.join(f_name_directory, '{}.wav'.format(n_files))

        wf = wave.open(filename, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(self.p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(recording)
        wf.close()
        print('Written to file: {}'.format(filename))
        print('Returning to listening')



    def listen(self):
        print('Listening beginning')
        
        input = self.stream.read(chunk)
        rms_val = self.rms(input)
            
        self.record()
        
   

a = Recorder()

a.listen()

Prediction code

import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from tensorflow.keras.models import load_model
import extract_features
import nn
import svm
import sys
import untitled2
import os
import ffmpeg
import glob
import subprocess
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense
from tensorflow.python.framework import ops
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd

ops.reset_default_graph()


f_name_directory = r'C:\Users\x\recording'
filename2=""

def get_train_test(X,y):
    
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 42,shuffle=True)
    return  X_train, X_test, y_train, y_test

    

if __name__ == "__main__":
    
    print(sys.argv[1])
    
    # extract features
    
    print("Extracting features..")
    

    
    list_of_files = glob.glob('recording/*') # * means all if need specific format then *.csv
    latest_file = max(list_of_files, key=os.path.getctime)

   
    features_df = pd.read_csv("final_features3.csv") 
   
    print(features_df.shape)
    print("*********************************************************")
    if sys.argv[1] == "cnn":
       
        n_files = len(os.listdir(f_name_directory)) 
        recordfile = '{}.wav'.format(n_files)
        
        list_of_files = glob.glob('recording/*') # * means all if need specific format then *.csv
        latest_file = max(list_of_files, key=os.path.getctime)
        print(latest_file)
    
        
        model = load_model("trained_cnn.h5")
        prediction_feature = extract_features.get_features(latest_file)
        prediction_feature = np.expand_dims(np.array([prediction_feature]),axis=2)
        prediction_feature = np.expand_dims(np.array(prediction_feature),axis=3)    
        np.argmax(model.predict(prediction_feature))
        print(np.argmax(model.predict(prediction_feature)))
        prob=model.predict(prediction_feature)
        print(prob)
        print(prob.sort() )
        
        
        

    elif sys.argv[1] == "svm":
        svm.svm(features_df)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'fixed predictions when using recorded audio files

Sources

Related Questions