-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_processing_eo_1.py
133 lines (112 loc) · 4.96 KB
/
data_processing_eo_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# -*- coding: utf-8 -*-
"""Data_processing_EO_1.ipynb
Automatically generated by Colaboratory.
# Installing and Loading Libraries required
"""
# Commented out IPython magic to ensure Python compatibility.
!pip install sklearn
!pip install folium
!pip install gdal
!pip install matplotlib
!pip install rasterio
!pip install branca
!pip install seaborn
import gdal
from sklearn import cluster
from sklearn import decomposition
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from branca import colormap as cm
import rasterio as rio
import numpy as np
import folium
# %matplotlib inline
"""# Create stacked image of all 242 bands using gdal"""
dir /b /s *.tif > listoftifs.txt
gdal_merge.py -of GTIFF -seperate -o EO1_PZ.tif --optfile /Hyperion-EO1/EO1H1480472016328110PZ_1GST/EO1H1480472016328110PZ/listoftifs.txt
"""# Read merged/stacked hyperspectral image as array & reshape array for input preparation"""
img_ds = gdal.Open('EO1_PZ.lan')
combarr = img_ds.ReadAsArray()
print("Image array shape is ", combarr.shape)
transarr = combarr.transpose(1,2,0)
new_shape = (transarr.shape[0] * transarr.shape[1], transarr.shape[2])
X = transarr[:, :, :242].reshape(new_shape)
print("Transposed and reshaped array shape is ",X.shape)
"""# Using Scikit-learn PCA for dimensionality reduction and visualising based on elbow plot"""
pca = decomposition.PCA()
pca.fit(X)
#Elbow plot for explained variance of 242 components
plt.plot(range(1, 11), pca.explained_variance_ratio_[:10],marker='o')
plt.title('Elbow method')
plt.xlabel('No of Principal Component')
plt.ylabel('Explained varaince')
plt.show()
"""# Reducing number of component to 3 and transforming array and visualizing variance using Seaborn barplot"""
pca = decomposition.PCA(n_components = 3)
pca.fit(X)
print("Explained variance for first 3 components" , pca.explained_variance_ratio_)
X_reduced = pca.transform(X)
print("Reduced array shape " ,X_reduced.shape)
df = pd.DataFrame({'Percentage of variance explained':pca.explained_variance_ratio_, 'Principal Components':['PC1','PC2','PC3']})
sns.barplot(x='Principal Components',y="Percentage of variance explained", data=df, color="c");
"""# Classifying reduced data based on pca using k-means algorithm (Scikit-learn library)"""
k_means = cluster.KMeans(n_clusters=7)
k_fit = k_means.fit(X_reduced)
X_labels = k_means.labels_
X_cluster = X_labels.reshape(transarr[:, :, 0].shape)
"""# Plot classified array using matplotlib"""
plt.figure(figsize=(20,20))
plt.imshow(X_cluster, cmap="hsv")
plt.show()
"""# Elbow plot for distortions according to number of clusters"""
distortions =[]
for i in range(1, 11):
k_means = cluster.KMeans(n_clusters = i).fit(X_reduced)
distortions.append(k_means.inertia_)
plt.plot(range(1, 11), distortions,marker='o')
plt.title('Elbow method')
plt.xlabel('No of clusters')
plt.ylabel('Distortions')
plt.show()
"""## Exporting as an classified GEOTIFF image and reprojecting to WGS84 spatial reference system"""
ds = gdal.Open("EO1_PZ.tif")
band = ds.GetRasterBand(1)
arr = band.ReadAsArray()
[cols, rows] = arr.shape
driver_format = "GTiff"
driver = gdal.GetDriverByName(driver_format)
outDataRaster = driver.Create("pca_kmeans_output.tif", rows, cols, 1, gdal.GDT_Byte)
outDataRaster.SetGeoTransform(ds.GetGeoTransform())##sets same geotransform as input
outDataRaster.SetProjection(ds.GetProjection())##sets same projection as input
outDataRaster.GetRasterBand(1).WriteArray(X_cluster)
#outDataRaster.GetRasterBand(1).SetNoDataValue(1)
outDataRaster.FlushCache() ## remove from memory
del outDataRaster ## delete the data (not the actual geotiff)
gdal.Warp("projected_output.tif","pca_kmeans_output.tif",dstSRS='EPSG:4326')
"""# Visualize classified image on map using Folium"""
with rio.open("projected_output.tif") as src:
dataset = src.read()
meta = src.meta
ymin = src.bounds[1]
xmin = src.bounds[0]
ymax = src.bounds[3]
xmax = src.bounds[2]
dataset = dataset[0,:,]
dataset[np.isnan(dataset)] = 0 ### Handling nan values
dataimage=dataset
step = cm.StepColormap([(255,255,255,1), (56,168,0), (0, 38, 115), (38, 115, 0), (255,211,127), (230, 0, 169),(0, 0, 0)], vmin=0, vmax=6, index=[0,0.1, 1, 2, 3,4,5,6], caption="step")
step.caption = '1-Low Vegetation,2-Waterbodies, 3-Roads, 4-Land surface, 5-Urban,6-Dense Vegetation '
token = "pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw" # your mapbox token
tileurl = 'https://api.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}@2x.png?access_token=' + str(token)
m = folium.Map([(ymax+ymin)/2,(xmax+xmin)/2],
zoom_start=10)
folium.TileLayer(tiles=tileurl,attr='Mapbox',name="Mapbox Satellite Imagery").add_to(m)
folium.raster_layers.ImageOverlay(name="Classified Hyperspectral Image",image=dataimage,
bounds=[[ymin, xmin], [ymax, xmax]],
colormap=lambda x: step.rgba_bytes_tuple(x) ,
opacity = 1,).add_to(m)
m.add_child(step)
folium.LayerControl().add_to(m)
m.save("map.html")
m