-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path5.classify_new_images.py
executable file
·48 lines (30 loc) · 1.2 KB
/
5.classify_new_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python
#
# CLassify images using a trained RandomForest classifier
#
# (c) 2021 Jean-Olivier Irisson, GNU General Public License v3
import pickle
import pandas as pd
import numpy as np
print('Set options') ## ----
# None!
print('Read test images features') ## ----
hand_feat = pd.read_csv('io/test_features.csv.gz', index_col='id')
deep_feat = pd.read_csv('io/test_deep_features.csv.gz', index_col='id')
features = hand_feat.join(deep_feat)
# in EcoTaxa, this would be extracted from the database
print('Load and apply classifier') ## -----
with open('io/classifer.pickle','rb') as rf_file:
RF = pickle.load(rf_file)
# in EcoTaxa, this is not necessary since the model will not be saved
probs = RF.predict_proba(features)
# get the list of classes, defined at the time the model is fitted
classes = RF.classes_
# extract highest score and corresponding label
predicted_scores = np.max(probs, axis=1)
predicted_labels = np.array(classes)[np.argmax(probs, axis=1)]
# compare with reality, just for fun
df = pd.read_csv('io/test_labels.csv', index_col='id')
from sklearn import metrics
metrics.accuracy_score(y_true=df.label, y_pred=predicted_labels)
metrics.log_loss(y_true=df.label, y_pred=probs)