resolve README conflict

deephdc · Nov 26, 2019 · 6c912c8 · 6c912c8
2 parents 28c18dd + e2cacd8
commit 6c912c8
Show file tree

Hide file tree

Showing 8 changed files with 289 additions and 172 deletions.
diff --git a/README.md b/README.md
@@ -1,8 +1,10 @@
 2D semantic segmentation
 ==============================
+
 [![Build Status](https://jenkins.indigo-datacloud.eu/buildStatus/icon?job=Pipeline-as-code%2FDEEP-OC-org%2Fsemseg_vaihingen%2Fmaster)](https://jenkins.indigo-datacloud.eu/job/Pipeline-as-code/job/DEEP-OC-org/job/semseg_vaihingen/job/master/)
 ----
 
+
 2D semantic segmentation (Vaihingen dataset)
 
 

diff --git a/semseg_vaihingen/config.py b/semseg_vaihingen/config.py
@@ -9,14 +9,20 @@
 BASE_DIR = path.dirname(path.normpath(path.dirname(__file__)))
 
 DATA_DIR = path.join(BASE_DIR,'data') # Location of model data and output files
-MODEL_PATH = path.join(BASE_DIR,'models','resnet50_fcn_weights.hdf5') # Location + name of the output model
+MODEL_WEIGHTS_FILE = 'resnet50_fcn_weights.hdf5'
+MODEL_DIR = path.join(BASE_DIR,'models') # Location + name of the output model
 MODEL_REMOTE_PUBLIC = 'https://nc.deep-hybrid-datacloud.eu/s/eTqJexZ5PcBxXR6/download?path='
+REMOTE_STORAGE = 'rshare:/deep-oc-apps/semseg_vaihingen'
+REMOTE_MODELS_UPLOAD = path.join(REMOTE_STORAGE, 'models')
 NUM_LABELS = 6  # max number of labels
+PATCH_SIZE = 256
+TRAINING_DATA = 'vaihingen_train.hdf5'
+VALIDATION_DATA = 'vaihingen_val.hdf5'
 
 train_args = { 'augmentation': {'default': False,
-                                 'choices': [False, True],
-                                 'help': 'Apply augmentation',
-                                 'required': False
+                                'choices': [False, True],
+                                'help': 'Apply augmentation',
+                                'required': False
                                 },
                'transfer_learning': {'default': False,
                                       'choices': [False, True],
@@ -34,14 +40,31 @@
                'batch_size':  {'default': 16,
                                'help': 'Number of samples per batch',
                                'required': False
-                              }
+                              },
+               'upload_back': {'default': False,
+                               'choices': [False, True],
+                               'help': 'Either upload a trained model back to the remote storage (True) or not (False, default)',
+                               'required': False
+                              },
+               'model_weights_save':  {'default': MODEL_WEIGHTS_FILE,
+                                       'help': 'Filename for the models weights',
+                                       'required': False
+                                       },
 }
 
-predict_args = {'model_retrieve':   {'default': False,
+predict_args = {'model_weights_load':  {'default': MODEL_WEIGHTS_FILE,
+                                        'help': 'Filename for the models weights (default: resnet50_fcn_weights.hdf5)',
+                                        'required': False
+                                        },
+                'model_retrieve':   {'default': False,
                              'choices': [False, True],
                              'help': 'Force model update from the remote repository',
                              'required': False
                            },
-
+               #'convert_grayscale':   {'default': True,
+               #              'choices': [False, True],
+               #              'help': 'Convert color image to grayscale before processing (default)',
+               #              'required': False
+               #            },
 }
 
diff --git a/semseg_vaihingen/models/augmentation.py b/semseg_vaihingen/models/augmentation.py
@@ -1,5 +1,6 @@
 # imports
 import numpy as np
+import semseg_vaihingen.config as cfg
 from sklearn.utils import shuffle
 
 
@@ -47,7 +48,7 @@ def choose_augmentation(z, num):
 
 # apply one random augmentation to every image in the dataset:
 def every_element_randomly_once(x, y):
-    rands = np.random.randint(1, 6, x.shape[0])
+    rands = np.random.randint(1, cfg.NUM_LABELS, x.shape[0])
     x_aug = np.zeros(x.shape, dtype=x.dtype)
     y_aug = np.zeros(y.shape, dtype=y.dtype)
     for i, r in enumerate(rands):

diff --git a/semseg_vaihingen/models/data_io.py b/semseg_vaihingen/models/data_io.py
@@ -5,41 +5,101 @@
 
 import sys
 import argparse
+import semseg_vaihingen.config as cfg
 
+from PIL import Image
 # load jpeg or png image:
 # use standard tools of Keras (skip cv2)
 from keras import backend
 from keras.preprocessing.image import load_img
 from keras.preprocessing.image import img_to_array
 
-def load_image_jpg(file_path):
+
+# obsolete, i.e not used
+def rgb2gray(rgb):
+    '''
+    Function to convert RGB to gray using formula in 
+    https://pillow.readthedocs.io/en/3.2.x/reference/Image.html#PIL.Image.Image.convert
+    '''
+    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])
+
+
+def image_to_gray(data_in):
+    '''
+    Function to convert image data into grayscale.
+    :param data_in: input array, must be (u)int8 type (0..255)!
+    '''
+    debug = False
+    img_bw = Image.fromarray(data_in, 'RGB').convert('L')
+    data_bw = img_to_array(img_bw, dtype='int')
+    data = np.concatenate((data_bw, data_bw, data_bw), axis=2)
+    print("[INFO] data_in.shape: {}, data_bw.shape: {}, data.shape: {}".format(
+           data_in.shape, data_bw.shape, data.shape))
+    print("[INFO] data_in.type: {}, data_bw.type: {}, data.type: {}".format(
+           data_in.dtype, data_bw.dtype, data.dtype))
+
+    if debug:
+        print("[DEBUG] data_in {}".format(data_in[:5,:5,]))
+        print("[DEBUG] data_bw {}".format(data_bw[:5,:5,]))
+        print("[DEBUG] data {}".format(data[:5,:5,]))
+
+    return data
+
+def load_image_jpg(file_path, convert_gray=False):
+    debug = False
     # set default dimension ordering as for TensorFlow
     backend.set_image_dim_ordering('tf')
     # load the image
     img = load_img(file_path)
     # convert to numpy array
-    data = img_to_array(img, dtype='int')
+    data_raw = img_to_array(img, dtype='uint8')
+
+    if debug:
+        print("[DEBUG] data_raw {}".format(data_raw[:5,:5,]))
+
+    if convert_gray:
+        print("[INFO] Use conversation to grayscale!")
+        data = image_to_gray(data_raw)
+        if debug:
+            print("[DEBUG] data {}".format(data[:5,:5,]))
+    else:
+        data = data_raw
+
 
-    #print("[DEBUG] data shape: {}".format(data.shape))
-    #print("[DEBUG] data {}".format(data[:10,:10,]))
 
     return data
 
 
-# load one of the vaihingen images, specified by image_number; by default only the first 3 channels are taken:
-def load_vaihingen_image(filename, image_number, only_three_channels=True, show_properties=False):
+# load one of the vaihingen images, specified by image_number; 
+# by default only the first 3 channels are taken:
+def load_vaihingen_image(filename, image_number, 
+                         only_three_channels=True, show_properties=False,
+                         convert_gray=False):
+    debug = False
     # load the data and ground truth:
     f = h5py.File(filename)
     ground_truth = np.array(f['y_{}'.format(image_number)])
     ground_truth = np.transpose(ground_truth)
-    data = np.array(f['x_{}'.format(image_number)])
-    data = np.transpose(data)
+    data_raw = np.array(f['x_{}'.format(image_number)])
+    data_raw = np.transpose(data_raw)
     f.close()
 
     # only use the first three channels:
     if only_three_channels:
-        data = data[:, :, :3]
+        data_raw = data_raw[:, :, :3]
 
+    if debug:
+        print("[DEBUG] data shape: {}".format(data_raw.shape))
+        print("[DEBUG] data {}".format(data_raw[:5,:5,]))
+
+    if convert_gray:
+        print("[INFO] Use conversation to grayscale!")
+        data = image_to_gray(data_raw)
+        if debug:
+            print("[DEBUG] data {}".format(data[:5,:5,]))
+    else:
+        data = data_raw
+
     # show properties of the data and ground truth:
     if show_properties:
         print('- Ground truth:')
@@ -77,10 +137,11 @@ def image_to_dataset(filename, image_number, window_shape, overlap_factor):
     return input_list, output_list
 
 
-# generate dataset consisting of 256x256 sized image patches; spatial overlap of the patches can be specified
+# generate dataset consisting of 256x256 sized image patches (defined in config.py)
+# spatial overlap of the patches can be specified
 def generate_dataset(data_path, image_numbers, overlap_factor):
     # input / output size of the FCN:
-    size = 256
+    size = cfg.PATCH_SIZE
 
     # specify filename and directory:
     file_directory = data_path
@@ -93,7 +154,7 @@ def generate_dataset(data_path, image_numbers, overlap_factor):
     y_list = []
     for i in image_numbers:
         file = path.join(file_directory, filename + str(i) + file_extension)
-        print file
+        print(file)
         x, y = image_to_dataset(file, i, [size, size], overlap_factor)
         x_list.extend(x)
         y_list.extend(y)
@@ -123,6 +184,7 @@ def load_data(name):
     f = h5py.File(name)
     x = np.array(f['x'], dtype=np.float32)
     y = np.array(f['y'], dtype=np.uint8)
+    print("[DEBUG] load_data, x.shape: {}".format(x.shape))
     return x, y
 
 
@@ -142,8 +204,8 @@ def main():
     x_train, y_train = generate_dataset(data_path, training_nums, overlap)
     x_val, y_val = generate_dataset(data_path, validation_nums, overlap)
 
-    save_dataset(path.join(output_path,'vaihingen_train.hdf5'), x_train, y_train)
-    save_dataset(path.join(output_path,'vaihingen_val.hdf5'), x_val, y_val)
+    save_dataset(path.join(output_path, cfg.TRAINING_DATA), x_train, y_train)
+    save_dataset(path.join(output_path, cfg.VALIDATION_DATA), x_val, y_val)
 
 
 if __name__ == '__main__':