forked from CompVis/adaptive-style-transfer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare_dataset.py
159 lines (134 loc) · 7.57 KB
/
prepare_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Copyright (C) 2018 Artsiom Sanakoyeu and Dmytro Kotovenko
#
# This file is part of Adaptive Style Transfer
#
# Adaptive Style Transfer is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Adaptive Style Transfer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import print_function
import pandas as pd
import numpy as np
import os
import time
from tqdm import tqdm
import scipy.misc
import utils
import random
class ArtDataset():
def __init__(self, path_to_art_dataset):
self.dataset = [os.path.join(path_to_art_dataset, x) for x in os.listdir(path_to_art_dataset)]
print("Art dataset contains %d images." % len(self.dataset))
def get_batch(self, augmentor, batch_size=1):
"""
Reads data from dataframe data containing path to images in column 'path' and, in case of dataframe,
also containing artist name, technique name, and period of creation for given artist.
In case of content images we have only the 'path' column.
Args:
augmentor: Augmentor object responsible for augmentation pipeline
batch_size: size of batch
Returns:
dictionary with fields: image
"""
batch_image = []
for _ in range(batch_size):
image = scipy.misc.imread(name=random.choice(self.dataset), mode='RGB')
if max(image.shape) > 1800.:
image = scipy.misc.imresize(image, size=1800. / max(image.shape))
if max(image.shape) < 800:
# Resize the smallest side of the image to 800px
alpha = 800. / float(min(image.shape))
if alpha < 4.:
image = scipy.misc.imresize(image, size=alpha)
image = np.expand_dims(image, axis=0)
else:
image = scipy.misc.imresize(image, size=[800, 800])
if augmentor:
batch_image.append(augmentor(image).astype(np.float32))
else:
batch_image.append((image).astype(np.float32))
# Now return a batch in correct form
batch_image = np.asarray(batch_image)
return {"image": batch_image}
def initialize_batch_worker(self, queue, augmentor, batch_size=1, seed=228):
np.random.seed(seed)
while True:
batch = self.get_batch(augmentor=augmentor, batch_size=batch_size)
queue.put(batch)
class PlacesDataset():
categories_names = \
['/a/abbey', '/a/arch', '/a/amphitheater', '/a/aqueduct', '/a/arena/rodeo', '/a/athletic_field/outdoor',
'/b/badlands', '/b/balcony/exterior', '/b/bamboo_forest', '/b/barn', '/b/barndoor', '/b/baseball_field',
'/b/basilica', '/b/bayou', '/b/beach', '/b/beach_house', '/b/beer_garden', '/b/boardwalk', '/b/boathouse',
'/b/botanical_garden', '/b/bullring', '/b/butte', '/c/cabin/outdoor', '/c/campsite', '/c/campus',
'/c/canal/natural', '/c/canal/urban', '/c/canyon', '/c/castle', '/c/church/outdoor', '/c/chalet',
'/c/cliff', '/c/coast', '/c/corn_field', '/c/corral', '/c/cottage', '/c/courtyard', '/c/crevasse',
'/d/dam', '/d/desert/vegetation', '/d/desert_road', '/d/doorway/outdoor', '/f/farm', '/f/fairway',
'/f/field/cultivated', '/f/field/wild', '/f/field_road', '/f/fishpond', '/f/florist_shop/indoor',
'/f/forest/broadleaf', '/f/forest_path', '/f/forest_road', '/f/formal_garden', '/g/gazebo/exterior',
'/g/glacier', '/g/golf_course', '/g/greenhouse/indoor', '/g/greenhouse/outdoor', '/g/grotto', '/g/gorge',
'/h/hayfield', '/h/herb_garden', '/h/hot_spring', '/h/house', '/h/hunting_lodge/outdoor', '/i/ice_floe',
'/i/ice_shelf', '/i/iceberg', '/i/inn/outdoor', '/i/islet', '/j/japanese_garden', '/k/kasbah',
'/k/kennel/outdoor', '/l/lagoon', '/l/lake/natural', '/l/lawn', '/l/library/outdoor', '/l/lighthouse',
'/m/mansion', '/m/marsh', '/m/mausoleum', '/m/moat/water', '/m/mosque/outdoor', '/m/mountain',
'/m/mountain_path', '/m/mountain_snowy', '/o/oast_house', '/o/ocean', '/o/orchard', '/p/park',
'/p/pasture', '/p/pavilion', '/p/picnic_area', '/p/pier', '/p/pond', '/r/raft', '/r/railroad_track',
'/r/rainforest', '/r/rice_paddy', '/r/river', '/r/rock_arch', '/r/roof_garden', '/r/rope_bridge',
'/r/ruin', '/s/schoolhouse', '/s/sky', '/s/snowfield', '/s/swamp', '/s/swimming_hole',
'/s/synagogue/outdoor', '/t/temple/asia', '/t/topiary_garden', '/t/tree_farm', '/t/tree_house',
'/u/underwater/ocean_deep', '/u/utility_room', '/v/valley', '/v/vegetable_garden', '/v/viaduct',
'/v/village', '/v/vineyard', '/v/volcano', '/w/waterfall', '/w/watering_hole', '/w/wave',
'/w/wheat_field', '/z/zen_garden', '/a/alcove', '/a/apartment-building/outdoor', '/a/artists_loft',
'/b/building_facade', '/c/cemetery']
categories_names = [x[1:] for x in categories_names]
def __init__(self, path_to_dataset):
self.dataset = []
for category_idx, category_name in enumerate(tqdm(self.categories_names)):
print(category_name, category_idx)
if os.path.exists(os.path.join(path_to_dataset, category_name)):
for file_name in tqdm(os.listdir(os.path.join(path_to_dataset, category_name))):
self.dataset.append(os.path.join(path_to_dataset, category_name, file_name))
else:
print("Category %s can't be found in path %s. Skip it." %
(category_name, os.path.join(path_to_dataset, category_name)))
print("Finished. Constructed Places2 dataset of %d images." % len(self.dataset))
def get_batch(self, augmentor, batch_size=1):
"""
Generate bathes of images with attached labels(place category) in two different formats:
textual and one-hot-encoded.
Args:
augmentor: Augmentor object responsible for augmentation pipeline
batch_size: size of batch we return
Returns:
dictionary with fields: image
"""
batch_image = []
for _ in range(batch_size):
image = scipy.misc.imread(name=random.choice(self.dataset), mode='RGB')
image = scipy.misc.imresize(image, size=2.)
image_shape = image.shape
if max(image_shape) > 1800.:
image = scipy.misc.imresize(image, size=1800. / max(image_shape))
if max(image_shape) < 800:
# Resize the smallest side of the image to 800px
alpha = 800. / float(min(image_shape))
if alpha < 4.:
image = scipy.misc.imresize(image, size=alpha)
image = np.expand_dims(image, axis=0)
else:
image = scipy.misc.imresize(image, size=[800, 800])
batch_image.append(augmentor(image).astype(np.float32))
return {"image": np.asarray(batch_image)}
def initialize_batch_worker(self, queue, augmentor, batch_size=1, seed=228):
np.random.seed(seed)
while True:
batch = self.get_batch(augmentor=augmentor, batch_size=batch_size)
queue.put(batch)