-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGMM.py
115 lines (91 loc) · 4.4 KB
/
GMM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 17 10:59:59 2017
@author: harryholt
GMM.py
Purpose:
- Generate the GMM object in reduced space using X_train
- Retaing information on the classes in reduced and reconstructed space
- Assigned labels and posterior probabilities to the full training dataset X
"""
import pickle
from sklearn import mixture
import numpy as np
import time
import ClassProperties
import Print
start_time = time.clock()
def create(address, runIndex, n_comp, cov_type):
print("GMM.create")
""" Takes the training dataset and creates the GMM object """
# load col_reduced
col_reduced = None
col_reduced = Print.readColreduced(address, runIndex)
col_reduced_array = np.arange(col_reduced)
# load training data in reduced pca space
lon_train, lat_train, dynHeight_train, X_train_array, varTime_train = \
None, None, None, None, None
lon_train, lat_train, dynHeight_train, X_train_array, varTime_train = \
Print.readPCAFromFile_Train(address, runIndex, col_reduced)
# calculate GMM Object
gmm, gmm_weights, gmm_means, gmm_covariances = \
None, None, None, None
gmm, gmm_weights, gmm_means, gmm_covariances = \
GaussianMixtureModel(address, runIndex, n_comp, X_train_array, cov_type)
""" Print the information on the classes to a file """
class_number_array = np.arange(0,n_comp).reshape(-1,1)
Print.printGMMclasses(address, runIndex, class_number_array, gmm_weights, \
gmm_means, gmm_covariances, col_reduced_array, 'reduced')
###############################################################################
def apply(address, runIndex, n_comp):
print("GMM.apply")
# load col_reduced value
col_reduced = None
col_reduced = Print.readColreduced(address, runIndex)
# load full data array - X
lon, lat, dynHeight, X_array, varTime = None, None, None, None, None
lon, lat, dynHeight, X_array, varTime = Print.readPCAFromFile(address, \
runIndex, col_reduced)
# load GMM object
gmm = None
with open(address+'Objects/GMM_Object.pkl', 'rb') as input:
gmm = pickle.load(input)
# calculate the labels and probabilities of the profiles
sortedLabels, labels, post_prob = None, None, None
# predict classes based on training dataset, output labels
labels = gmm.predict(X_array) # expected shape (n_profiles, pca index)
Print.printLabelsUnsorted(address, runIndex, lon, lat, dynHeight, varTime, labels)
# sort labels by mean SST of each class
# (do this during the "plot" stage now
# allDF, sortedLabels, old2new = ClassProperties.main(address,runIndex,n_comp)
# Print.printLabels(address, runIndex, lon, lat, dynHeight, varTime, sortedLabels)
# calculate posterior probabilities
post_prob = gmm.predict_proba(X_array) # expected shape (n_profiles, classes)
# needed for input of printPosteriorProb
class_number_array = np.arange(0,n_comp).reshape(-1,1)
# Print Labels and probabilities to file
Print.printPosteriorProb(address, runIndex, lon, lat, dynHeight, \
varTime, post_prob, class_number_array)
def GaussianMixtureModel(address, runIndex, n_comp, X_train, cov_type):
print("GMM.GaussianMixtureModel")
gmm = None
gmm = mixture.GaussianMixture(n_components = n_comp, \
covariance_type = cov_type)
# gmm = mixture.BayesianGaussianMixture(n_components = n_comp, \
# covariance_type = cov_type)
# use training dataset to "fit" Gaussian mixture model
gmm.fit(X_train)
# store the GMM object
gmm_store = address+"Objects/GMM_Object.pkl"
with open(gmm_store, 'wb') as output:
gmmObject = gmm
pickle.dump(gmmObject, output, pickle.HIGHEST_PROTOCOL)
del gmmObject
# means and covariances
weights, means, covariances = None, None, None
weights = np.squeeze(gmm.weights_) # shape (n_components)
# note: "weights" is the same for each col_red
means = np.squeeze(gmm.means_) # shape (n_components, n_features)
covariances = abs(np.squeeze(gmm.covariances_)) # shape (n_components, n_features)
return gmm, weights, means, covariances
print('GMM runtime = ', time.clock() - start_time,' s')