Aim: To find out features that would help us classify a given sound signal as a 'ahh' sound or not
Attempt 1: Extract features using PyAudioAnalysis and try to classify a sound frame as 'ahh' or not 'ahh' based on single feature
# Select framesize in seconds, sliding window size, base paths for sounds and features
framesize = 0.1
slide_window_by = 0.1
SOUND_PATH = '/home/vagabondtechie/Dropbox/Thesis/data/sounds/'
FEATURE_PATH = '/home/vagabondtechie/Dropbox/Thesis/data/features/'
print "Select window of size = %fs, then slide window by %fs." %(framesize, slide_window_by)
# Imports
from pyAudioAnalysis import audioBasicIO, audioFeatureExtraction
# Read the audio file
[samplingRate, signal] = audioBasicIO.readAudioFile(SOUND_PATH + 'ahh-sample1.wav')
# Extract short term feature from signal
featureMatrix = audioFeatureExtraction.stFeatureExtraction(signal, samplingRate, framesize * samplingRate, slide_window_by * samplingRate)
# Prepare data sets
import numpy
numpy.savetxt(FEATURE_PATH + 'expt2-all.csv', featureMatrix, delimiter=',')
for i in range(len(featureMatrix)):
# This data will be used to train classifiers
numpy.savetxt(FEATURE_PATH + 'expt2-trn-feature%d.csv' %(i+1), featureMatrix[i, :600], delimiter=',')
# And this data, to test the classifier on.
numpy.savetxt(FEATURE_PATH + 'expt2-tst-feature%d.csv' %(i+1), featureMatrix[i, 600:], delimiter=',')
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import numpy
# Create a model
model = LogisticRegression()
# model = SVC()
# Get the data
X = numpy.loadtxt(FEATURE_PATH + 'expt2-trn-all.csv', delimiter=',')
y = numpy.loadtxt(FEATURE_PATH + 'output-trn.csv')
# from matplotlib import pyplot as plt
# plt.plot(X, y, marker='o',linestyle='None')
# plt.show()
# Train the model
model.fit(X, y)
# print model.coef_
# print model.n_support_
# print model.intercept_
# Test the model
testDataset = numpy.loadtxt(FEATURE_PATH + 'expt2-tst-all.csv', delimiter=',')
expected = numpy.loadtxt(FEATURE_PATH + 'output-tst.csv')
predicted = model.predict(testDataset)
print predicted
from sklearn import metrics
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))