forked from dmckee5/BigVidGAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinception_utils.py
executable file
·356 lines (321 loc) · 13.9 KB
/
inception_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
''' Inception utilities
This file contains methods for calculating IS and FID, using either
the original numpy code or an accelerated fully-pytorch version that
uses a fast newton-schulz approximation for the matrix sqrt. There are also
methods for acquiring a desired number of samples from the Generator,
and parallelizing the inbuilt PyTorch inception network.
NOTE that Inception Scores and FIDs calculated using these methods will
*not* be directly comparable to values calculated using the original TF
IS/FID code. You *must* use the TF model if you wish to report and compare
numbers. This code tends to produce IS values that are 5-10% lower than
those obtained through TF.
'''
import numpy as np
from scipy import linalg # For numpy FID
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter as P
from torchvision.models.inception import inception_v3
from torchvision.models.video import r2plus1d_18
# Module that wraps the inception network to enable use with dataparallel and
# returning pool features and logits.
class WrapInception(nn.Module):
def __init__(self, net):
super(WrapInception,self).__init__()
self.net = net
self.mean = P(torch.tensor([0.485, 0.456, 0.406]).view(1, -1, 1, 1),
requires_grad=False)
self.std = P(torch.tensor([0.229, 0.224, 0.225]).view(1, -1, 1, 1),
requires_grad=False)
def forward(self, x):
# Normalize x
x = (x + 1.) / 2.0
x = (x - self.mean) / self.std
# Upsample if necessary
if x.shape[2] != 299 or x.shape[3] != 299:
x = F.interpolate(x, size=(299, 299), mode='bilinear', align_corners=True)
# 299 x 299 x 3
x = self.net.Conv2d_1a_3x3(x)
# 149 x 149 x 32
x = self.net.Conv2d_2a_3x3(x)
# 147 x 147 x 32
x = self.net.Conv2d_2b_3x3(x)
# 147 x 147 x 64
x = F.max_pool2d(x, kernel_size=3, stride=2)
# 73 x 73 x 64
x = self.net.Conv2d_3b_1x1(x)
# 73 x 73 x 80
x = self.net.Conv2d_4a_3x3(x)
# 71 x 71 x 192
x = F.max_pool2d(x, kernel_size=3, stride=2)
# 35 x 35 x 192
x = self.net.Mixed_5b(x)
# 35 x 35 x 256
x = self.net.Mixed_5c(x)
# 35 x 35 x 288
x = self.net.Mixed_5d(x)
# 35 x 35 x 288
x = self.net.Mixed_6a(x)
# 17 x 17 x 768
x = self.net.Mixed_6b(x)
# 17 x 17 x 768
x = self.net.Mixed_6c(x)
# 17 x 17 x 768
x = self.net.Mixed_6d(x)
# 17 x 17 x 768
x = self.net.Mixed_6e(x)
# 17 x 17 x 768
# 17 x 17 x 768
x = self.net.Mixed_7a(x)
# 8 x 8 x 1280
x = self.net.Mixed_7b(x)
# 8 x 8 x 2048
x = self.net.Mixed_7c(x)
# 8 x 8 x 2048
pool = torch.mean(x.view(x.size(0), x.size(1), -1), 2)
# 1 x 1 x 2048
logits = self.net.fc(F.dropout(pool, training=False).view(pool.size(0), -1))
# 1000 (num_classes)
return pool, logits
#xiaodan: added by xiaodan
class WrapR2plus1d_18(nn.Module):
def __init__(self, net):
super(WrapR2plus1d_18,self).__init__()
self.net = net
self.removed = list(self.net.children())[-1]
self.remained = list(self.net.children())[:-1]
self.poolModel= torch.nn.Sequential(*self.remained)
#xiaodan: mean and std stats from https://pytorch.org/docs/stable/torchvision/models.html#video-classification
self.mean = P(torch.tensor([0.43216, 0.394666, 0.37645]).view(1, -1, 1, 1),
requires_grad=False)
self.std = P(torch.tensor([0.22803, 0.22145, 0.216989]).view(1, -1, 1, 1),
requires_grad=False)
def forward(self, x):
# x [B,T,C,H,W]
# Normalize x
x = (x + 1.) / 2.0
x = (x - self.mean) / self.std
# print('x',x.shape)
# Upsample if necessary
if x.shape[3] != 112 or x.shape[4] != 112:
x_list=[]
for x_single in x:
# print('x_single',x_single.shape)
#xiaodan: size (112,112) for R (2+1)D, not sure if 'area' will be better than bilinear
x_single_down = F.interpolate(x_single, size=(112, 112), mode='bilinear', align_corners=True)
# print('x_single_down',x_single_down.shape)
x_list.append(x_single_down.unsqueeze(0))
x = torch.cat(x_list) #[B,T,C,112,112]
else:
print('Already 112')
x = x.permute(0,2,1,3,4).contiguous()#[B,C,T,112,112]
pool = self.poolModel(x).squeeze() # [B,512]
logits = self.removed(pool)# [B,400]
return pool, logits
# A pytorch implementation of cov, from Modar M. Alfadly
# https://discuss.pytorch.org/t/covariance-and-gradient-support/16217/2
def torch_cov(m, rowvar=False):
'''Estimate a covariance matrix given data.
Covariance indicates the level to which two variables vary together.
If we examine N-dimensional samples, `X = [x_1, x_2, ... x_N]^T`,
then the covariance matrix element `C_{ij}` is the covariance of
`x_i` and `x_j`. The element `C_{ii}` is the variance of `x_i`.
Args:
m: A 1-D or 2-D array containing multiple variables and observations.
Each row of `m` represents a variable, and each column a single
observation of all those variables.
rowvar: If `rowvar` is True, then each row represents a
variable, with observations in the columns. Otherwise, the
relationship is transposed: each column represents a variable,
while the rows contain observations.
Returns:
The covariance matrix of the variables.
'''
if m.dim() > 2:
raise ValueError('m has more than 2 dimensions')
if m.dim() < 2:
m = m.view(1, -1)
if not rowvar and m.size(0) != 1:
m = m.t()
# m = m.type(torch.double) # uncomment this line if desired
fact = 1.0 / (m.size(1) - 1)
m -= torch.mean(m, dim=1, keepdim=True)
mt = m.t() # if complex: mt = m.t().conj()
return fact * m.matmul(mt).squeeze()
# Pytorch implementation of matrix sqrt, from Tsung-Yu Lin, and Subhransu Maji
# https://github.com/msubhransu/matrix-sqrt
def sqrt_newton_schulz(A, numIters, dtype=None):
with torch.no_grad():
if dtype is None:
dtype = A.type()
batchSize = A.shape[0]
dim = A.shape[1]
normA = A.mul(A).sum(dim=1).sum(dim=1).sqrt()
Y = A.div(normA.view(batchSize, 1, 1).expand_as(A));
I = torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).type(dtype)
Z = torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).type(dtype)
for i in range(numIters):
T = 0.5*(3.0*I - Z.bmm(Y))
Y = Y.bmm(T)
Z = T.bmm(Z)
sA = Y*torch.sqrt(normA).view(batchSize, 1, 1).expand_as(A)
return sA
# FID calculator from TTUR--consider replacing this with GPU-accelerated cov
# calculations using torch?
def numpy_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
"""Numpy implementation of the Frechet Distance.
Taken from https://github.com/bioinf-jku/TTUR
The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
and X_2 ~ N(mu_2, C_2) is
d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
Stable version by Dougal J. Sutherland.
Params:
-- mu1 : Numpy array containing the activations of a layer of the
inception net (like returned by the function 'get_predictions')
for generated samples.
-- mu2 : The sample mean over activations, precalculated on an
representive data set.
-- sigma1: The covariance matrix over activations for generated samples.
-- sigma2: The covariance matrix over activations, precalculated on an
representive data set.
Returns:
-- : The Frechet Distance.
"""
mu1 = np.atleast_1d(mu1)
mu2 = np.atleast_1d(mu2)
sigma1 = np.atleast_2d(sigma1)
sigma2 = np.atleast_2d(sigma2)
assert mu1.shape == mu2.shape, \
'Training and test mean vectors have different lengths'
assert sigma1.shape == sigma2.shape, \
'Training and test covariances have different dimensions'
diff = mu1 - mu2
# Product might be almost singular
covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
if not np.isfinite(covmean).all():
msg = ('fid calculation produces singular product; '
'adding %s to diagonal of cov estimates') % eps
print(msg)
offset = np.eye(sigma1.shape[0]) * eps
covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
# Numerical error might give slight imaginary component
if np.iscomplexobj(covmean):
print('wat')
if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
m = np.max(np.abs(covmean.imag))
raise ValueError('Imaginary component {}'.format(m))
covmean = covmean.real
tr_covmean = np.trace(covmean)
out = diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
return out
def torch_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
"""Pytorch implementation of the Frechet Distance.
Taken from https://github.com/bioinf-jku/TTUR
The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
and X_2 ~ N(mu_2, C_2) is
d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
Stable version by Dougal J. Sutherland.
Params:
-- mu1 : Numpy array containing the activations of a layer of the
inception net (like returned by the function 'get_predictions')
for generated samples.
-- mu2 : The sample mean over activations, precalculated on an
representive data set.
-- sigma1: The covariance matrix over activations for generated samples.
-- sigma2: The covariance matrix over activations, precalculated on an
representive data set.
Returns:
-- : The Frechet Distance.
"""
assert mu1.shape == mu2.shape, \
'Training and test mean vectors have different lengths'
assert sigma1.shape == sigma2.shape, \
'Training and test covariances have different dimensions'
diff = mu1 - mu2
# Run 50 itrs of newton-schulz to get the matrix sqrt of sigma1 dot sigma2
covmean = sqrt_newton_schulz(sigma1.mm(sigma2).unsqueeze(0), 50).squeeze()
out = (diff.dot(diff) + torch.trace(sigma1) + torch.trace(sigma2)
- 2 * torch.trace(covmean))
return out
# Calculate Inception Score mean + std given softmax'd logits and number of splits
def calculate_inception_score(pred, num_splits=10):
scores = []
for index in range(num_splits):
pred_chunk = pred[index * (pred.shape[0] // num_splits): (index + 1) * (pred.shape[0] // num_splits), :]
kl_inception = pred_chunk * (np.log(pred_chunk) - np.log(np.expand_dims(np.mean(pred_chunk, 0), 0)))
kl_inception = np.mean(np.sum(kl_inception, 1))
scores.append(np.exp(kl_inception))
return np.mean(scores), np.std(scores)
# Loop and run the sampler and the net until it accumulates num_inception_images
# activations. Return the pool, the logits, and the labels (if one wants
# Inception Accuracy the labels of the generated class will be needed)
def accumulate_inception_activations(sample, net, num_inception_images=50000):
pool, logits, labels = [], [], []
while (torch.cat(logits, 0).shape[0] if len(logits) else 0) < num_inception_images:
with torch.no_grad():
videos, labels_val = sample()
pool_val, logits_val = net(videos.float())
pool += [pool_val]
logits += [F.softmax(logits_val, 1)]
labels += [labels_val]
return torch.cat(pool, 0), torch.cat(logits, 0), torch.cat(labels, 0)
# Load and wrap the Inception model
def load_inception_net(parallel=False):
inception_model = inception_v3(pretrained=True, transform_input=False)
inception_model = WrapInception(inception_model.eval()).cuda()
if parallel:
print('Parallelizing Inception module...')
inception_model = nn.DataParallel(inception_model)
return inception_model
#xiaodan: added by xiaodan to use R(2+1)D model
# Load and wrap the R(2+1)D model
def load_r2plus1d_18_net(parallel=False):
r2plus1d_18_model = r2plus1d_18(pretrained=True)
r2plus1d_18_model = WrapR2plus1d_18(r2plus1d_18_model.eval()).cuda()
if parallel:
print('Parallelizing Inception module...')
r2plus1d_18_model = nn.DataParallel(r2plus1d_18_model)
return r2plus1d_18_model
# This produces a function which takes in an iterator which returns a set number of samples
# and iterates until it accumulates config['num_inception_images'] images.
# The iterator can return samples with a different batch size than used in
# training, using the setting confg['inception_batchsize']
def prepare_inception_metrics(dataset, parallel, no_fid=False):
# Load metrics; this is intentionally not in a try-except loop so that
# the script will crash here if it cannot find the Inception moments.
# By default, remove the "hdf5" from dataset
dataset = dataset.strip('_hdf5')
data_mu = np.load(dataset+'_inception_moments.npz')['mu']
data_sigma = np.load(dataset+'_inception_moments.npz')['sigma']
# Load network
net = load_r2plus1d_18_net(parallel)
def get_inception_metrics(sample, num_inception_images, num_splits=10,
prints=True, use_torch=True):
if prints:
print('Gathering activations...')
pool, logits, labels = accumulate_inception_activations(sample, net, num_inception_images)
if prints:
print('Calculating Inception Score...')
IS_mean, IS_std = calculate_inception_score(logits.cpu().numpy(), num_splits)
if no_fid:
FID = 9999.0
else:
if prints:
print('Calculating means and covariances...')
if use_torch:
mu, sigma = torch.mean(pool, 0), torch_cov(pool, rowvar=False) #[512], [512,512]
else:
mu, sigma = np.mean(pool.cpu().numpy(), axis=0), np.cov(pool.cpu().numpy(), rowvar=False)
if prints:
print('Covariances calculated, getting FID...')
if use_torch:
FID = torch_calculate_frechet_distance(mu, sigma, torch.tensor(data_mu).float().cuda(), torch.tensor(data_sigma).float().cuda())
FID = float(FID.cpu().numpy())
else:
FID = numpy_calculate_frechet_distance(mu.cpu().numpy(), sigma.cpu().numpy(), data_mu, data_sigma)
# Delete mu, sigma, pool, logits, and labels, just in case
del mu, sigma, pool, logits, labels
return IS_mean, IS_std, FID
return get_inception_metrics