Expt 2: Identify Ahh's in a speech

Aim: To find out features that would help us classify a given sound signal as a 'ahh' sound or not

Attempt 1: Extract features using PyAudioAnalysis and try to classify a sound frame as 'ahh' or not 'ahh' based on single feature

Steps

1. Initial data reading and dataset preparation

In [2]:
# Select framesize in seconds, sliding window size, base paths for sounds and features
framesize = 0.1
slide_window_by = 0.1
SOUND_PATH = '/home/vagabondtechie/Dropbox/Thesis/data/sounds/'
FEATURE_PATH = '/home/vagabondtechie/Dropbox/Thesis/data/features/'
print "Select window of size = %fs, then slide window by %fs." %(framesize, slide_window_by)
Select window of size = 0.100000s, then slide window by 0.100000s.
In [3]:
# Imports
from pyAudioAnalysis import audioBasicIO, audioFeatureExtraction

# Read the audio file
[samplingRate, signal] = audioBasicIO.readAudioFile(SOUND_PATH + 'ahh-sample1.wav')

# Extract short term feature from signal
featureMatrix = audioFeatureExtraction.stFeatureExtraction(signal, samplingRate, framesize * samplingRate, slide_window_by * samplingRate)
In [27]:
# Prepare data sets
import numpy

numpy.savetxt(FEATURE_PATH + 'expt2-all.csv', featureMatrix, delimiter=',')
for i in range(len(featureMatrix)):
    # This data will be used to train classifiers
    numpy.savetxt(FEATURE_PATH + 'expt2-trn-feature%d.csv' %(i+1), featureMatrix[i, :600], delimiter=',')
    
    # And this data, to test the classifier on.
    numpy.savetxt(FEATURE_PATH + 'expt2-tst-feature%d.csv' %(i+1), featureMatrix[i, 600:], delimiter=',')

2. Training a Logistic Regression Classifier

In [234]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import numpy

# Create a model
model = LogisticRegression()
# model = SVC()

# Get the data
X = numpy.loadtxt(FEATURE_PATH + 'expt2-trn-all.csv', delimiter=',')
y = numpy.loadtxt(FEATURE_PATH + 'output-trn.csv')

# from matplotlib import pyplot as plt
# plt.plot(X, y, marker='o',linestyle='None')
# plt.show()

# Train the model
model.fit(X, y)
# print model.coef_
# print model.n_support_
# print model.intercept_
Out[234]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr',
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0)

3. Making predictions about the test data

In [235]:
# Test the model
testDataset = numpy.loadtxt(FEATURE_PATH + 'expt2-tst-all.csv', delimiter=',')
expected = numpy.loadtxt(FEATURE_PATH + 'output-tst.csv')
predicted = model.predict(testDataset)
print predicted
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  1.  0.  0.  0.
  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  1.  1.  1.  1.  0.  0.  0.  0.  1.  1.  1.  0.  1.  1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.
  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  1.  1.  1.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.  0.  1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.
  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  1.  1.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  0.
  1.  1.  1.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.
  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.
  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  1.  1.  1.  1.  1.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  1.]

4. Print metrics

In [236]:
from sklearn import metrics
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
             precision    recall  f1-score   support

        0.0       0.93      0.95      0.94       576
        1.0       0.69      0.58      0.63       103

avg / total       0.89      0.90      0.89       679

[[549  27]
 [ 43  60]]