diff --git a/data/output/GTB_2024-04-26_L7_confusion.csv b/data/output/GTB_2024-04-26_L7_confusion.csv new file mode 100644 index 0000000..6e6a34c --- /dev/null +++ b/data/output/GTB_2024-04-26_L7_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,122,6,11,0,19,Landsat 7 +openWater,0,94,0,0,33,Landsat 7 +lightNearShoreSediment,0,0,83,10,14,Landsat 7 +offShoreSediment,0,0,14,11,2,Landsat 7 +darkNearShoreSediment,0,1,5,4,33,Landsat 7 diff --git a/data/output/GTB_2024-04-26_L8_confusion.csv b/data/output/GTB_2024-04-26_L8_confusion.csv new file mode 100644 index 0000000..26d5845 --- /dev/null +++ b/data/output/GTB_2024-04-26_L8_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,80,0,0,0,0,Landsat 8 +openWater,0,27,0,11,2,Landsat 8 +lightNearShoreSediment,0,0,27,1,17,Landsat 8 +offShoreSediment,0,0,27,15,0,Landsat 8 +darkNearShoreSediment,0,2,2,0,49,Landsat 8 diff --git a/data/output/GTB_2024-04-26_Sen2_confusion.csv b/data/output/GTB_2024-04-26_Sen2_confusion.csv new file mode 100644 index 0000000..681f6ec --- /dev/null +++ b/data/output/GTB_2024-04-26_Sen2_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,114,0,0,0,0,Sentinel 2 +openWater,0,49,0,0,0,Sentinel 2 +lightNearShoreSediment,0,0,22,0,13,Sentinel 2 +offShoreSediment,0,6,8,8,4,Sentinel 2 +darkNearShoreSediment,0,7,2,0,27,Sentinel 2 diff --git a/data/output/GTB_2024-04-26_l5_confusion.csv b/data/output/GTB_2024-04-26_l5_confusion.csv new file mode 100644 index 0000000..cdf02b4 --- /dev/null +++ b/data/output/GTB_2024-04-26_l5_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,142,9,1,0,4,Landsat 5 +openWater,0,83,0,0,1,Landsat 5 +lightNearShoreSediment,1,0,37,4,0,Landsat 5 +offShoreSediment,0,2,9,12,0,Landsat 5 +darkNearShoreSediment,0,13,26,9,46,Landsat 5 diff --git a/data/output/GTB_2024-04-26_l5_training_confusion.csv b/data/output/GTB_2024-04-26_l5_training_confusion.csv new file mode 100644 index 0000000..c293734 --- /dev/null +++ b/data/output/GTB_2024-04-26_l5_training_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,242,0,0,0,0,Landsat 5 +openWater,0,301,0,0,2,Landsat 5 +lightNearShoreSediment,1,0,198,6,10,Landsat 5 +offShoreSediment,0,0,5,107,0,Landsat 5 +darkNearShoreSediment,0,4,6,1,168,Landsat 5 diff --git a/data/output/GTB_2024-04-26_l7_training_confusion.csv b/data/output/GTB_2024-04-26_l7_training_confusion.csv new file mode 100644 index 0000000..fe8ffd8 --- /dev/null +++ b/data/output/GTB_2024-04-26_l7_training_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,286,0,0,2,0,Landsat 7 +openWater,0,173,0,0,2,Landsat 7 +lightNearShoreSediment,2,0,181,0,2,Landsat 7 +offShoreSediment,0,2,6,36,1,Landsat 7 +darkNearShoreSediment,1,2,3,0,246,Landsat 7 diff --git a/data/output/GTB_2024-04-26_l8_training_confusion.csv b/data/output/GTB_2024-04-26_l8_training_confusion.csv new file mode 100644 index 0000000..c6bc37f --- /dev/null +++ b/data/output/GTB_2024-04-26_l8_training_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,240,0,0,1,0,Landsat 8 +openWater,0,25,0,0,0,Landsat 8 +lightNearShoreSediment,0,0,92,4,1,Landsat 8 +offShoreSediment,0,0,1,54,1,Landsat 8 +darkNearShoreSediment,0,0,2,0,81,Landsat 8 diff --git a/data/output/GTB_2024-04-26_l9_confusion.csv b/data/output/GTB_2024-04-26_l9_confusion.csv new file mode 100644 index 0000000..3e899d4 --- /dev/null +++ b/data/output/GTB_2024-04-26_l9_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,44,0,0,0,0,Landsat 9 +openWater,0,0,0,0,5,Landsat 9 +lightNearShoreSediment,0,0,24,5,0,Landsat 9 +offShoreSediment,0,0,4,10,1,Landsat 9 +darkNearShoreSediment,0,0,9,1,23,Landsat 9 diff --git a/data/output/GTB_2024-04-26_l9_training_confusion.csv b/data/output/GTB_2024-04-26_l9_training_confusion.csv new file mode 100644 index 0000000..0cc993c --- /dev/null +++ b/data/output/GTB_2024-04-26_l9_training_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,146,1,0,0,1,Landsat 9 +openWater,0,14,0,0,0,Landsat 9 +lightNearShoreSediment,3,0,29,2,1,Landsat 9 +offShoreSediment,0,0,1,11,2,Landsat 9 +darkNearShoreSediment,0,0,1,0,31,Landsat 9 diff --git a/data/output/GTB_2024-04-26_sen2_training_confusion.csv b/data/output/GTB_2024-04-26_sen2_training_confusion.csv new file mode 100644 index 0000000..1ff20a9 --- /dev/null +++ b/data/output/GTB_2024-04-26_sen2_training_confusion.csv @@ -0,0 +1,6 @@ +class,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,mission +cloud,311,0,0,0,1,Sentinel 2 +openWater,0,94,0,0,2,Sentinel 2 +lightNearShoreSediment,1,0,64,0,2,Sentinel 2 +offShoreSediment,0,0,3,35,0,Sentinel 2 +darkNearShoreSediment,0,1,1,0,79,Sentinel 2 diff --git a/data/output/GTB_3class_2024-04-26_L8_confusion.csv b/data/output/GTB_3class_2024-04-26_L8_confusion.csv index 4715edc..e125661 100644 --- a/data/output/GTB_3class_2024-04-26_L8_confusion.csv +++ b/data/output/GTB_3class_2024-04-26_L8_confusion.csv @@ -1,4 +1,4 @@ class,cloud,openWater,sediment,mission cloud,80,0,0,Landsat 8 -openWater,0,27,13,Landsat 8 -sediment,0,2,138,Landsat 8 +openWater,0,28,12,Landsat 8 +sediment,0,3,137,Landsat 8 diff --git a/data/output/GTB_3class_2024-04-26_l8_training_confusion.csv b/data/output/GTB_3class_2024-04-26_l8_training_confusion.csv index 7be5320..48facf3 100644 --- a/data/output/GTB_3class_2024-04-26_l8_training_confusion.csv +++ b/data/output/GTB_3class_2024-04-26_l8_training_confusion.csv @@ -1,4 +1,4 @@ class,cloud,openWater,sediment,mission cloud,240,0,1,Landsat 8 openWater,0,25,0,Landsat 8 -sediment,0,0,236,Landsat 8 +sediment,0,1,235,Landsat 8 diff --git a/data/output/GTB_3class_2024-04-26_l9_confusion.csv b/data/output/GTB_3class_2024-04-26_l9_confusion.csv new file mode 100644 index 0000000..7dea7d9 --- /dev/null +++ b/data/output/GTB_3class_2024-04-26_l9_confusion.csv @@ -0,0 +1,4 @@ +class,cloud,openWater,sediment,mission +cloud,44,0,0,Landsat 9 +openWater,0,0,5,Landsat 9 +sediment,1,0,76,Landsat 9 diff --git a/data/output/GTB_3class_2024-04-26_l9_training_confusion.csv b/data/output/GTB_3class_2024-04-26_l9_training_confusion.csv new file mode 100644 index 0000000..670e93c --- /dev/null +++ b/data/output/GTB_3class_2024-04-26_l9_training_confusion.csv @@ -0,0 +1,4 @@ +class,cloud,openWater,sediment,mission +cloud,147,0,1,Landsat 9 +openWater,0,14,0,Landsat 9 +sediment,4,0,77,Landsat 9 diff --git a/data/output/GTB_3class_2024-04-26_sen2_training_confusion.csv b/data/output/GTB_3class_2024-04-26_sen2_training_confusion.csv index 6354fe4..f28a36a 100644 --- a/data/output/GTB_3class_2024-04-26_sen2_training_confusion.csv +++ b/data/output/GTB_3class_2024-04-26_sen2_training_confusion.csv @@ -1,4 +1,4 @@ class,cloud,openWater,sediment,mission -cloud,309,0,3,Sentinel 2 +cloud,310,0,2,Sentinel 2 openWater,0,94,2,Sentinel 2 -sediment,2,1,183,Sentinel 2 +sediment,3,1,182,Sentinel 2 diff --git a/data/output/GTB_LS5_3class_2024-04-26_performance_stats.csv b/data/output/GTB_3class_LS5_2024-04-26_performance_stats.csv similarity index 100% rename from data/output/GTB_LS5_3class_2024-04-26_performance_stats.csv rename to data/output/GTB_3class_LS5_2024-04-26_performance_stats.csv diff --git a/data/output/GTB_3class_LS5_variable_importance_2024-04-26.csv b/data/output/GTB_3class_LS5_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..f52b7aa --- /dev/null +++ b/data/output/GTB_3class_LS5_variable_importance_2024-04-26.csv @@ -0,0 +1,7 @@ +Band,Feature_Importance +SR_B1,18812.471306527856 +SR_B2,9549.443494193567 +SR_B5,6868.694465861253 +SR_B7,4148.460863420916 +SR_B3,4078.7299725148127 +SR_B4,3319.503212954148 diff --git a/data/output/GTB_LS7_3class_2024-04-26_performance_stats.csv b/data/output/GTB_3class_LS7_2024-04-26_performance_stats.csv similarity index 100% rename from data/output/GTB_LS7_3class_2024-04-26_performance_stats.csv rename to data/output/GTB_3class_LS7_2024-04-26_performance_stats.csv diff --git a/data/output/GTB_3class_LS7_variable_importance_2024-04-26.csv b/data/output/GTB_3class_LS7_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..0d762cd --- /dev/null +++ b/data/output/GTB_3class_LS7_variable_importance_2024-04-26.csv @@ -0,0 +1,7 @@ +Band,Feature_Importance +SR_B1,21802.88908072159 +SR_B2,13762.525768628366 +SR_B7,6239.8749385403235 +SR_B3,3904.7766719194096 +SR_B4,2879.977733354244 +SR_B5,2662.4585224998414 diff --git a/data/output/GTB_3class_LS8_2024-04-26_performance_stats.csv b/data/output/GTB_3class_LS8_2024-04-26_performance_stats.csv new file mode 100644 index 0000000..b874375 --- /dev/null +++ b/data/output/GTB_3class_LS8_2024-04-26_performance_stats.csv @@ -0,0 +1,2 @@ +satellite,cloud,openWater,sediment,GTB_accuracy,GTB_kappa +Landsat 8,1,0.7887323943661972,0.9480968858131488,0.9423076923076923,0.9002557544757033 diff --git a/data/output/GTB_3class_LS8_variable_importance_2024-04-26.csv b/data/output/GTB_3class_LS8_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..dabd1d3 --- /dev/null +++ b/data/output/GTB_3class_LS8_variable_importance_2024-04-26.csv @@ -0,0 +1,7 @@ +Band,Feature_Importance +SR_B2,14643.103396913908 +SR_B6,3395.6127470544643 +SR_B5,3273.5132376628235 +SR_B4,2699.139266260013 +SR_B3,2612.927276291597 +SR_B7,922.7486231628105 diff --git a/data/output/GTB_3class_LS9_2024-04-26_performance_stats.csv b/data/output/GTB_3class_LS9_2024-04-26_performance_stats.csv new file mode 100644 index 0000000..478f09f --- /dev/null +++ b/data/output/GTB_3class_LS9_2024-04-26_performance_stats.csv @@ -0,0 +1,2 @@ +satellite,cloud,openWater,sediment,GTB_accuracy,GTB_kappa +Landsat 9,0.9887640449438202,NaN,0.9620253164556961,0.9523809523809523,0.9012925969447707 diff --git a/data/output/GTB_3class_LS9_variable_importance_2024-04-26.csv b/data/output/GTB_3class_LS9_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..cbac14b --- /dev/null +++ b/data/output/GTB_3class_LS9_variable_importance_2024-04-26.csv @@ -0,0 +1,7 @@ +Band,Feature_Importance +SR_B2,2675.1332504067195 +SR_B7,1973.5913909387793 +SR_B3,1226.253456754587 +SR_B6,781.4176513484896 +SR_B5,475.3995434976706 +SR_B4,282.5587996890195 diff --git a/data/output/GTB_Sen2_3_class2024-04-26_performance_stats.csv b/data/output/GTB_3class_Sen2_2024-04-26_performance_stats.csv similarity index 100% rename from data/output/GTB_Sen2_3_class2024-04-26_performance_stats.csv rename to data/output/GTB_3class_Sen2_2024-04-26_performance_stats.csv diff --git a/data/output/GTB_3class_Sen2_variable_importance_2024-04-26.csv b/data/output/GTB_3class_Sen2_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..3c7d17e --- /dev/null +++ b/data/output/GTB_3class_Sen2_variable_importance_2024-04-26.csv @@ -0,0 +1,11 @@ +Band,Feature_Importance +SR_B2,15374.212867173124 +SR_B3,3835.764715482057 +SR_B12,2392.937485376106 +SR_B11,1666.6628157976288 +SR_B7,1348.6881043711717 +SR_B4,1313.6587690502586 +SR_B5,897.8635137483344 +SR_B8,896.5671572165742 +SR_B6,757.3922887400425 +SR_B8A,235.32077933368356 diff --git a/data/output/GTB_LS5_2024-04-26_performance_stats.csv b/data/output/GTB_LS5_2024-04-26_performance_stats.csv new file mode 100644 index 0000000..a2dc7bb --- /dev/null +++ b/data/output/GTB_LS5_2024-04-26_performance_stats.csv @@ -0,0 +1,2 @@ +satellite,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,GTB_accuracy,GTB_kappa +Landsat 5,0.9498327759197325,0.8691099476439791,0.6434782608695652,0.4999999999999999,0.6344827586206897,0.8020050125313283,0.7361597053653636 diff --git a/data/output/GTB_LS5_variable_importance_2024-04-26.csv b/data/output/GTB_LS5_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..43477e7 --- /dev/null +++ b/data/output/GTB_LS5_variable_importance_2024-04-26.csv @@ -0,0 +1,7 @@ +Band,Feature_Importance +SR_B1,15977.468584189084 +SR_B2,6577.103019372671 +SR_B3,4814.905637503011 +SR_B4,4296.879249594861 +SR_B5,3482.6942676700833 +SR_B7,2753.9285719747536 diff --git a/data/output/GTB_LS7_2024-04-26_performance_stats.csv b/data/output/GTB_LS7_2024-04-26_performance_stats.csv new file mode 100644 index 0000000..c716929 --- /dev/null +++ b/data/output/GTB_LS7_2024-04-26_performance_stats.csv @@ -0,0 +1,2 @@ +satellite,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,GTB_accuracy,GTB_kappa +Landsat 7,0.8714285714285713,0.824561403508772,0.7545454545454546,0.4230769230769231,0.45833333333333337,0.7424242424242424,0.6652418529884554 diff --git a/data/output/GTB_LS7_variable_importance_2024-04-26.csv b/data/output/GTB_LS7_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..732ed20 --- /dev/null +++ b/data/output/GTB_LS7_variable_importance_2024-04-26.csv @@ -0,0 +1,7 @@ +Band,Feature_Importance +SR_B1,21377.131195358965 +SR_B2,10257.691105788164 +SR_B3,7681.288615380383 +SR_B4,3735.37359322693 +SR_B5,3699.5887365909834 +SR_B7,2651.703533056513 diff --git a/data/output/GTB_LS8_2024-04-26_performance_stats.csv b/data/output/GTB_LS8_2024-04-26_performance_stats.csv new file mode 100644 index 0000000..52765f2 --- /dev/null +++ b/data/output/GTB_LS8_2024-04-26_performance_stats.csv @@ -0,0 +1,2 @@ +satellite,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,GTB_accuracy,GTB_kappa +Landsat 8,1,0.7826086956521738,0.5346534653465347,0.43478260869565216,0.8099173553719008,0.7615384615384615,0.6945928536243415 diff --git a/data/output/GTB_LS8_3class_2024-04-26_performance_stats.csv b/data/output/GTB_LS8_3class_2024-04-26_performance_stats.csv deleted file mode 100644 index 5385c18..0000000 --- a/data/output/GTB_LS8_3class_2024-04-26_performance_stats.csv +++ /dev/null @@ -1,2 +0,0 @@ -satellite,cloud,openWater,sediment,GTB_accuracy,GTB_kappa -Landsat 8,1,0.7826086956521738,0.9484536082474228,0.9423076923076923,0.8997429305912596 diff --git a/data/output/GTB_LS8_variable_importance_2024-04-26.csv b/data/output/GTB_LS8_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..edd7bc8 --- /dev/null +++ b/data/output/GTB_LS8_variable_importance_2024-04-26.csv @@ -0,0 +1,7 @@ +Band,Feature_Importance +SR_B2,10685.64885088815 +SR_B5,3121.8708464928204 +SR_B4,2844.5746121895495 +SR_B3,2505.41890742741 +SR_B6,1993.4923745719534 +SR_B7,648.9104588281326 diff --git a/data/output/GTB_LS9_variable_importance_2024-04-26.csv b/data/output/GTB_LS9_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..c5c8a5d --- /dev/null +++ b/data/output/GTB_LS9_variable_importance_2024-04-26.csv @@ -0,0 +1,7 @@ +Band,Feature_Importance +SR_B2,2344.0278640639085 +SR_B7,2343.3450731800185 +SR_B3,714.1547206507968 +SR_B6,697.8956703759942 +SR_B5,543.5723855116364 +SR_B4,467.1840635651406 diff --git a/data/output/GTB_Sen2_2024-04-26_performance_stats.csv b/data/output/GTB_Sen2_2024-04-26_performance_stats.csv new file mode 100644 index 0000000..4024694 --- /dev/null +++ b/data/output/GTB_Sen2_2024-04-26_performance_stats.csv @@ -0,0 +1,2 @@ +satellite,cloud,openWater,lightNearShoreSediment,offShoreSediment,darkNearShoreSediment,GTB_accuracy,GTB_kappa +Sentinel 2,1,0.8828828828828829,0.6567164179104478,0.47058823529411764,0.6749999999999999,0.8461538461538461,0.7862457351913513 diff --git a/data/output/GTB_Sen2_variable_importance_2024-04-26.csv b/data/output/GTB_Sen2_variable_importance_2024-04-26.csv new file mode 100644 index 0000000..abb3207 --- /dev/null +++ b/data/output/GTB_Sen2_variable_importance_2024-04-26.csv @@ -0,0 +1,11 @@ +Band,Feature_Importance +SR_B2,20035.862197400995 +SR_B3,3293.9311659905447 +SR_B12,2152.76554448701 +SR_B4,2031.7475500568055 +SR_B5,1676.7103180926922 +SR_B11,958.2319762831871 +SR_B7,742.8803428052754 +SR_B8,735.7737422974362 +SR_B6,354.91550566303636 +SR_B8A,236.09013653665116 diff --git a/data/output/GTB_2024-01-08_L8_confusion.csv b/data/output/archive/GTB_2024-01-08_L8_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_L8_confusion.csv rename to data/output/archive/GTB_2024-01-08_L8_confusion.csv diff --git a/data/output/GTB_2024-01-08_Sen2_confusion.csv b/data/output/archive/GTB_2024-01-08_Sen2_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_Sen2_confusion.csv rename to data/output/archive/GTB_2024-01-08_Sen2_confusion.csv diff --git a/data/output/GTB_2024-01-08_l5_confusion.csv b/data/output/archive/GTB_2024-01-08_l5_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_l5_confusion.csv rename to data/output/archive/GTB_2024-01-08_l5_confusion.csv diff --git a/data/output/GTB_2024-01-08_l5_training_confusion.csv b/data/output/archive/GTB_2024-01-08_l5_training_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_l5_training_confusion.csv rename to data/output/archive/GTB_2024-01-08_l5_training_confusion.csv diff --git a/data/output/GTB_2024-01-08_l7_confusion.csv b/data/output/archive/GTB_2024-01-08_l7_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_l7_confusion.csv rename to data/output/archive/GTB_2024-01-08_l7_confusion.csv diff --git a/data/output/GTB_2024-01-08_l7_training_confusion.csv b/data/output/archive/GTB_2024-01-08_l7_training_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_l7_training_confusion.csv rename to data/output/archive/GTB_2024-01-08_l7_training_confusion.csv diff --git a/data/output/GTB_2024-01-08_l8_training_confusion.csv b/data/output/archive/GTB_2024-01-08_l8_training_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_l8_training_confusion.csv rename to data/output/archive/GTB_2024-01-08_l8_training_confusion.csv diff --git a/data/output/GTB_2024-01-08_l9_confusion.csv b/data/output/archive/GTB_2024-01-08_l9_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_l9_confusion.csv rename to data/output/archive/GTB_2024-01-08_l9_confusion.csv diff --git a/data/output/GTB_2024-01-08_l9_training_confusion.csv b/data/output/archive/GTB_2024-01-08_l9_training_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_l9_training_confusion.csv rename to data/output/archive/GTB_2024-01-08_l9_training_confusion.csv diff --git a/data/output/GTB_2024-01-08_sen2_training_confusion.csv b/data/output/archive/GTB_2024-01-08_sen2_training_confusion.csv similarity index 100% rename from data/output/GTB_2024-01-08_sen2_training_confusion.csv rename to data/output/archive/GTB_2024-01-08_sen2_training_confusion.csv diff --git a/data/output/GTB_LS5_2024-01-08_performance_stats.csv b/data/output/archive/GTB_LS5_2024-01-08_performance_stats.csv similarity index 100% rename from data/output/GTB_LS5_2024-01-08_performance_stats.csv rename to data/output/archive/GTB_LS5_2024-01-08_performance_stats.csv diff --git a/data/output/GTB_LS7_2024-01-08_performance_stats.csv b/data/output/archive/GTB_LS7_2024-01-08_performance_stats.csv similarity index 100% rename from data/output/GTB_LS7_2024-01-08_performance_stats.csv rename to data/output/archive/GTB_LS7_2024-01-08_performance_stats.csv diff --git a/data/output/GTB_LS8_2024-01-08_performance_stats.csv b/data/output/archive/GTB_LS8_2024-01-08_performance_stats.csv similarity index 100% rename from data/output/GTB_LS8_2024-01-08_performance_stats.csv rename to data/output/archive/GTB_LS8_2024-01-08_performance_stats.csv diff --git a/data/output/GTB_LS9_2024-01-08_performance_stats.csv b/data/output/archive/GTB_LS9_2024-01-08_performance_stats.csv similarity index 100% rename from data/output/GTB_LS9_2024-01-08_performance_stats.csv rename to data/output/archive/GTB_LS9_2024-01-08_performance_stats.csv diff --git a/data/output/GTB_Sen2_2024-01-08_performance_stats.csv b/data/output/archive/GTB_Sen2_2024-01-08_performance_stats.csv similarity index 100% rename from data/output/GTB_Sen2_2024-01-08_performance_stats.csv rename to data/output/archive/GTB_Sen2_2024-01-08_performance_stats.csv diff --git a/modeling/08_Train_Test_Split.Rmd b/modeling/08_Train_Test_Split.Rmd new file mode 100644 index 0000000..a375918 --- /dev/null +++ b/modeling/08_Train_Test_Split.Rmd @@ -0,0 +1,503 @@ +--- +title: "eePlumB Train-Test Set" +author: "ROSSyndicate" +date: "2024-04-26" +output: html_document +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, echo = F} +libs = c('reticulate', 'tidyverse') + +package_loader <- function(x) { + if (x %in% installed.packages()) { + library(x, character.only = TRUE) + } else { + install.packages(x) + library(x, character.only = TRUE) + } +} + +lapply(libs, package_loader) +``` + +# Purpose + +This script processes the filtered eePlumB labels from the outlier and class analysis +and creates a train-test-split for model development. + +# Activate conda environment + +Check for virtual environment and activate, otherwise, set up virtual +environment. + +```{r, conda env} +if (!dir.exists("env")) { + source("pySetup.R") +} else { + use_condaenv(file.path(getwd(), "env")) +} +``` + +## Settings/modules + +Indicate the label version and set the seed for random processes + +```{r} +training_set_version = "2024-04-25" + +set.seed(12) +``` + +And then import the needed modules + +```{python} +import ee +import os +import time +import pandas as pd + +v_date = '2024-04-26' +``` + +## GEE Setup + +```{python} +ee.Authenticate() +``` + +When your browser states 'You are now authenticated with the gcloud CLI', the +authentication is complete. This authentication is valid for 7 days. + +Now, we need to initialize our GEE session. You may need to change the project +name to one you own if you do not have write access. + +```{python} +ee.Initialize(project = 'ee-ross-superior') +``` + +# Import assets + +## Load the labels file into the environment + +Read these into R since I saved them as RDS files + +```{r} +ls5_labels = read_rds("data/labels/LS5_labels_for_tvt_2024-04-25.RDS") +ls7_labels = read_rds("data/labels/LS7_labels_for_tvt_2024-04-25.RDS") +ls8_labels = read_rds("data/labels/LS8_labels_for_tvt_2024-04-25.RDS") +ls9_labels = read_rds("data/labels/LS9_labels_for_tvt_2024-04-25.RDS") +sen2_labels = read_rds("data/labels/S2_labels_for_tvt_2024-04-25.RDS") +``` + +List the classes we care about: + +```{r} +class_list = c("cloud", "openWater", "lightNearShoreSediment", + "darkNearShoreSediment", "offShoreSediment") + +``` + +### Check for complete obs + +```{r} +ls5_filt <- ls5_labels %>% + drop_na(SR_B1:SR_B7) +ls7_filt <- ls7_labels %>% + drop_na(SR_B1:SR_B7) +ls8_filt <- ls8_labels %>% + drop_na(SR_B1:SR_B7) +ls9_filt <- ls9_labels %>% + drop_na(SR_B1:SR_B7) +sen_filt <- sen2_labels %>% + drop_na(SR_B1:SR_B9) +``` + +These are all the same, so good-to-go! + +## Devise train-val-test splits + +First we need to see how many unique scenes are in each of these datsets, as +we will want to split the train-val-tests by image-date, not randomly: + +```{r} +length(unique(ls5_filt$date)) +length(unique(ls7_filt$date)) +length(unique(ls8_filt$date)) +length(unique(ls9_filt$date)) +length(unique(sen_filt$date)) +``` + +Oof on the LS9 images - for the time being, we'll just try to split by scene +with the acknowledgement that the val/train is going to be a single scene each. +We are likely going to have to come back and label more data for LS9 to be viable. + +Because GEE's GTB doesn't use the validation set for training (it uses cross-fold +validation), we only need train-validate. + +### Landsat 5 + +```{r} +# define the number of scenes that is ~ 70% of scenes +ls5_seventyperc <- round((length(unique(ls5_filt$date)))*0.7, 0) + +# get unique train-val dates +ls5_tv_dates <- ls5_filt %>% + pluck("date") %>% + unique() + +# sample 60% of the dates remaining +train_dates <- sample(ls5_tv_dates, ls5_seventyperc) + +# and split the t-v dataset +ls5_train <- ls5_filt %>% + filter(date %in% train_dates) +ls5_validate <- anti_join(ls5_filt, ls5_train) +``` + +### Landsat 7 + +```{r} +# define the number of scenes that is ~ 70% of scenes +ls7_seventyperc <- round((length(unique(ls7_filt$date)))*0.7, 0) + +# get unique train-val dates +ls7_tv_dates <- ls7_filt %>% + pluck("date") %>% + unique() + +# sample 60% of the dates remaining +train_dates <- sample(ls7_tv_dates, ls7_seventyperc) + +# and split the t-v dataset +ls7_train <- ls7_filt %>% + filter(date %in% train_dates) +ls7_validate <- anti_join(ls7_filt, ls7_train) + +``` + + +### Landsat 8 + +```{r} +# define the number of scenes that is ~ 70% of scenes +ls8_seventyperc <- round((length(unique(ls8_filt$date)))*0.7, 0) + +# get unique train-val dates +ls8_tv_dates <- ls8_filt %>% + pluck("date") %>% + unique() + +# sample 60% of the dates remaining +train_dates <- sample(ls8_tv_dates, ls8_seventyperc) + +# and split the t-v dataset +ls8_train <- ls8_filt %>% + filter(date %in% train_dates) +ls8_validate <- anti_join(ls8_filt, ls8_train) + +``` + + +### Landsat 9 + +Before we do a random split, let's see what classes are represetned across the dates: + +```{r} +ls9_filt %>% + group_by(date) %>% + summarise(n = length(unique(class))) +``` + + date n + +1 2021-11-06 4 +2 2022-05-05 4 +3 2022-05-21 5 +4 2022-06-06 3 +5 2022-07-24 2 + +Eeep. only one image has all classes. + +```{r} +ls9_filt %>% + group_by(date, class) %>% + summarise(n = n()) %>% + arrange(class) +``` + + date class n + + 1 2021-11-06 cloud 37 + 2 2022-05-21 cloud 44 + 3 2022-06-06 cloud 39 + 4 2022-07-24 cloud 72 + 5 2021-11-06 darkNearShoreSediment 2 + 6 2022-05-05 darkNearShoreSediment 12 + 7 2022-05-21 darkNearShoreSediment 15 + 8 2021-11-06 lightNearShoreSediment 7 + 9 2022-05-05 lightNearShoreSediment 16 +10 2022-05-21 lightNearShoreSediment 29 +11 2022-06-06 lightNearShoreSediment 8 +12 2022-07-24 lightNearShoreSediment 4 +13 2021-11-06 offShoreSediment 19 +14 2022-05-05 offShoreSediment 4 +15 2022-05-21 offShoreSediment 33 +16 2022-06-06 offShoreSediment 9 +17 2022-05-05 openWater 14 +18 2022-05-21 openWater 5 + +Good news, all classes are represented in more than one image. This is not ideal, +but I think we can just run with it. Because there is only one image with all +classes represented, and there are only 5 image-dates, our test will have that +single image date with all classes represented. + +```{r} +ls9_validate <- ls9_filt %>% + filter(date == "2022-05-21") +# and split the t-v dataset +ls9_train <- anti_join(ls9_filt, ls9_validate) +``` + +### Sentinel 2 + +```{r} +# define the number of scenes that is ~ 70% of scenes +sen_seventyperc <- round((length(unique(sen_filt$date)))*0.7, 0) + +# get unique train-val dates +sen_tv_dates <- sen_filt %>% + pluck("date") %>% + unique() + +# sample 60% of the dates remaining +train_dates <- sample(sen_tv_dates, sen_seventyperc) + +# and split the t-v dataset +sen_train <- sen_filt %>% + filter(date %in% train_dates) +sen_validate <- anti_join(sen_filt, sen_train) + +``` + + +### Make ee feature collections + +Transform each of the r dataframes to ee feature collections. + +First, define the functions: + +```{python} +def ls57_to_eeFeat(df): + features=[] + for i in range(df.shape[0]): + x,y = df.lon[i],df.lat[i] + latlong =[x,y] + loc_properties = ({'class': str(df['class'][i]), # note 'class' is a special word, must use different syntax + 'mission': str(df.mission[i]), + 'SR_B1': df.SR_B1[i], + 'SR_B2': df.SR_B2[i], + 'SR_B3': df.SR_B3[i], + 'SR_B4': df.SR_B4[i], + 'SR_B5': df.SR_B5[i], + 'SR_B7': df.SR_B7[i] + }) + g = ee.Geometry.Point(latlong, 'EPSG:4326') + feature = ee.Feature(g, loc_properties) + features.append(feature) + ee_object = ee.FeatureCollection(features) + return ee_object + + +def ls89_to_eeFeat(df): + features=[] + for i in range(df.shape[0]): + x,y = df.lon[i],df.lat[i] + latlong =[x,y] + loc_properties = ({'class': str(df['class'][i]), # note 'class' is a special word, must use different syntax + 'mission': str(df.mission[i]), + 'SR_B1': df.SR_B1[i], + 'SR_B2': df.SR_B2[i], + 'SR_B3': df.SR_B3[i], + 'SR_B4': df.SR_B4[i], + 'SR_B5': df.SR_B5[i], + 'SR_B6': df.SR_B6[i], + 'SR_B7': df.SR_B7[i] + }) + g = ee.Geometry.Point(latlong, 'EPSG:4326') + feature = ee.Feature(g, loc_properties) + features.append(feature) + ee_object = ee.FeatureCollection(features) + return ee_object + + +def sen_to_eeFeat(df): + features=[] + for i in range(df.shape[0]): + x,y = df.lon[i],df.lat[i] + latlong =[x,y] + loc_properties = ({'class': str(df['class'][i]), # note 'class' is a special word, must use different syntax + 'mission': str(df.mission[i]), + 'SR_B1': df.SR_B1[i], + 'SR_B2': df.SR_B2[i], + 'SR_B3': df.SR_B3[i], + 'SR_B4': df.SR_B4[i], + 'SR_B5': df.SR_B5[i], + 'SR_B6': df.SR_B6[i], + 'SR_B7': df.SR_B7[i], + 'SR_B8': df.SR_B8[i], + 'SR_B8A': df.SR_B8A[i], + 'SR_B11': df.SR_B11[i], + 'SR_B12': df.SR_B12[i] + }) + g = ee.Geometry.Point(latlong, 'EPSG:4326') + feature = ee.Feature(g, loc_properties) + features.append(feature) + ee_object = ee.FeatureCollection(features) + return ee_object + +``` + +```{python} +ee_ls5_train = ls57_to_eeFeat(r.ls5_train) +ee_ls5_validate = ls57_to_eeFeat(r.ls5_validate) + +ee_ls7_train = ls57_to_eeFeat(r.ls7_train) +ee_ls7_validate = ls57_to_eeFeat(r.ls7_validate) + +ee_ls8_train = ls89_to_eeFeat(r.ls8_train) +ee_ls8_validate = ls89_to_eeFeat(r.ls8_validate) + +ee_ls9_train = ls89_to_eeFeat(r.ls9_train) +ee_ls9_validate = ls89_to_eeFeat(r.ls9_validate) + +ee_s2_train = sen_to_eeFeat(r.sen_train) +ee_s2_validate = sen_to_eeFeat(r.sen_validate) + +``` + + +## Build the train/validate sets + +Define the input features and output labels + +```{python} +output_label = "class" +class_values = r.class_list +``` + +Remap the label values to a 0-based sequential series. + +First for training. + +```{python} +remap_values = ee.List.sequence(0, 4) +labels_ls5_train = ee_ls5_train.remap(ee.List(class_values), remap_values, ee.String(output_label)) +labels_ls7_train = ee_ls7_train.remap(ee.List(class_values), remap_values, ee.String(output_label)) +labels_ls8_train = ee_ls8_train.remap(ee.List(class_values), remap_values, ee.String(output_label)) +labels_ls9_train = ee_ls9_train.remap(ee.List(class_values), remap_values, ee.String(output_label)) +labels_sen_train = ee_s2_train.remap(ee.List(class_values), remap_values, ee.String(output_label)) + +def class_to_byte(feature): + byte_value = ee.Number(feature.get(output_label)).toByte() + return feature.set('byte_property', byte_value) + +labels_ls5_train = labels_ls5_train.map(class_to_byte) +labels_ls7_train = labels_ls7_train.map(class_to_byte) +labels_ls8_train = labels_ls8_train.map(class_to_byte) +labels_ls9_train = labels_ls9_train.map(class_to_byte) +labels_sen_train = labels_sen_train.map(class_to_byte) + +def add_class_by_remap(feature): + class_no = ee.Number(feature.get(output_label)) + return feature.set('class', ee.List(class_values).get(class_no)) + +labels_ls5_train = labels_ls5_train.map(add_class_by_remap) +labels_ls7_train = labels_ls7_train.map(add_class_by_remap) +labels_ls8_train = labels_ls8_train.map(add_class_by_remap) +labels_ls9_train = labels_ls9_train.map(add_class_by_remap) +labels_sen_train = labels_sen_train.map(add_class_by_remap) +``` + + +For validation: + +```{python} +labels_ls5_validate = ee_ls5_validate.remap(ee.List(class_values), remap_values, ee.String(output_label)) +labels_ls7_validate = ee_ls7_validate.remap(ee.List(class_values), remap_values, ee.String(output_label)) +labels_ls8_validate = ee_ls8_validate.remap(ee.List(class_values), remap_values, ee.String(output_label)) +labels_ls9_validate = ee_ls9_validate.remap(ee.List(class_values), remap_values, ee.String(output_label)) +labels_sen_validate = ee_s2_validate.remap(ee.List(class_values), remap_values, ee.String(output_label)) + +labels_ls5_validate = labels_ls5_validate.map(class_to_byte) +labels_ls7_validate = labels_ls7_validate.map(class_to_byte) +labels_ls8_validate = labels_ls8_validate.map(class_to_byte) +labels_ls9_validate = labels_ls9_validate.map(class_to_byte) +labels_sen_validate = labels_sen_validate.map(class_to_byte) + +labels_ls5_validate = labels_ls5_validate.map(add_class_by_remap) +labels_ls7_validate = labels_ls7_validate.map(add_class_by_remap) +labels_ls8_validate = labels_ls8_validate.map(add_class_by_remap) +labels_ls9_validate = labels_ls9_validate.map(add_class_by_remap) +labels_sen_validate = labels_sen_validate.map(add_class_by_remap) +``` + +## Save Train-Validate to Assets + +And now we'll save the training and testing sets as ee objects for use later. + +Training: + +```{python} +ee.batch.Export.table.toAsset(labels_ls5_train, + "LS5 Training", + "projects/ee-ross-superior/assets/train-test/training_ls5_v2024").start() + +ee.batch.Export.table.toAsset(labels_ls7_train, + "LS7 Training", + "projects/ee-ross-superior/assets/train-test/training_ls7_v2024").start() + +ee.batch.Export.table.toAsset(labels_ls8_train, + "LS8 Training", + "projects/ee-ross-superior/assets/train-test/training_ls8_v2024").start() + +ee.batch.Export.table.toAsset(labels_ls9_train, + "LS9 Training", + "projects/ee-ross-superior/assets/train-test/training_ls9_v2024").start() + +ee.batch.Export.table.toAsset(labels_sen_train, + "Sen Training", + "projects/ee-ross-superior/assets/train-test/training_sen_v2024").start() +``` + +Validation: + +```{python} +ee.batch.Export.table.toAsset(labels_ls5_validate, + "LS5 Validation", + "projects/ee-ross-superior/assets/train-test/validation_ls5_v2024").start() + +ee.batch.Export.table.toAsset(labels_ls7_validate, + "LS7 Validation", + "projects/ee-ross-superior/assets/train-test/validation_ls7_v2024").start() + +ee.batch.Export.table.toAsset(labels_ls8_validate, + "LS8 Validation", + "projects/ee-ross-superior/assets/train-test/validation_ls8_v2024").start() + +ee.batch.Export.table.toAsset(labels_ls9_validate, + "LS9 Validation", + "projects/ee-ross-superior/assets/train-test/validation_ls9_v2024").start() + +ee.batch.Export.table.toAsset(labels_sen_validate, + "Sen Validation", + "projects/ee-ross-superior/assets/train-test/validation_sen_v2024").start() + +``` + + + + + diff --git a/modeling/08_Train_Test_Split_3class.Rmd b/modeling/08_Train_Test_Split_3class.Rmd index c93f9d9..6175165 100644 --- a/modeling/08_Train_Test_Split_3class.Rmd +++ b/modeling/08_Train_Test_Split_3class.Rmd @@ -108,25 +108,15 @@ sen2_labels = read_rds("data/labels/S2_labels_for_tvt_2024-04-25.RDS") %>% class)) ``` -Make some helper lists +List the classes we care about here: ```{r} class_list = c("cloud", "openWater", "sediment") - -ls57_band_list = c(expr(SR_B1), expr(SR_B2), expr(SR_B3), expr(SR_B4), expr(SR_B5), expr(SR_B7)) -ls89_band_list = c(expr(SR_B2), expr(SR_B3), expr(SR_B4), expr(SR_B5), expr(SR_B6), expr(SR_B7)) -sen_band_list = c(expr(SR_B1), expr(SR_B2), expr(SR_B3), expr(SR_B4), expr(SR_B5), expr(SR_B6), - expr(SR_B7), expr(SR_B8), expr(SR_B8A), expr(SR_B11), expr(SR_B12)) ``` ### Check for complete obs ```{r} -ls57_bands_text = c("SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B7") -ls89_bands_text = c("SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7") -sen_bands_text = c("SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", - "SR_B7", "SR_B8", "SR_B8A", "SR_B9", "SR_B11", "SR_B12") - ls5_filt <- ls5_labels %>% drop_na(SR_B1:SR_B7) ls7_filt <- ls7_labels %>% @@ -400,10 +390,6 @@ ee_s2_validate = sen_to_eeFeat(r.sen_validate) Define the input features and output labels ```{python} -ls57_input_feat = ["SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B7"] -ls89_input_feat = ["SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7"] -sen_input_feat = (["SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", - "SR_B7", "SR_B8", "SR_B8A","SR_B9", 'SR_B11', 'SR_B12']) output_label = "class" class_values = r.class_list ``` diff --git a/modeling/09_Landsat_5_GTB.Rmd b/modeling/09_Landsat_5_GTB.Rmd new file mode 100644 index 0000000..949e628 --- /dev/null +++ b/modeling/09_Landsat_5_GTB.Rmd @@ -0,0 +1,407 @@ +--- +title: "eePlumB Develop and Apply GTB for Landsat 5" +author: "ROSSyndicate" +date: "2024-04-26" +output: html_document +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, echo = F} +libs = c('reticulate', 'tidyverse') + +package_loader <- function(x) { + if (x %in% installed.packages()) { + library(x, character.only = TRUE) + } else { + install.packages(x) + library(x, character.only = TRUE) + } +} + +lapply(libs, package_loader) +``` + +# Purpose + +This script develops and applies Gradient Tree Boost Models to the Landsat 5 image +stack. + +## Activate conda environment + +Check for virtual environment and activate, otherwise, set up virtual +environment. + +```{r, conda env} +if (!dir.exists("env")) { + source("pySetup.R") +} else { + use_condaenv(file.path(getwd(), "env")) +} +``` + +### Settings/modules + +Import the needed modules and set model version date + +```{python} +import ee +import os +import time +import matplotlib.pyplot as plt +import pandas as pd + +v_date = '2024-04-26' +``` + +## GEE Setup + +```{python} +ee.Authenticate() +``` + +When your browser states 'Google Earth Engine authentication successful!' or the +console reads "TRUE", the +authentication is complete. + +Now, we need to initialize our GEE session. You may need to change the project +name to one you own if you do not have write access. + +```{python} +ee.Initialize(project = 'ee-ross-superior') +``` + + +Import custom functions (these require ee.Authenticate()) +```{python} +import imp +imp.load_source("gee_funx", "modeling/gee_functions.py") +import gee_funx as gf +``` + +# Import assets + +These assets were created in the 03_Train_Test_Split.Rmd file + +```{python} +training_ls5 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/training_ls5_v2024") +testing_ls5 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/validation_ls5_v2024") +``` + +## Train the GTB model + +```{python} +ls_input_feat = ["SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B7"] +output_label = "class" +class_values = (['cloud', + 'openWater', + 'lightNearShoreSediment', + 'offShoreSediment', + 'darkNearShoreSediment']) +``` + +### Landsat 5 + +```{python} +trainedGTB_ls5 = (ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47).train( + features = training_ls5, + classProperty = 'byte_property', + inputProperties = ls_input_feat +)) + +print(trainedGTB_ls5.getInfo()) +``` + + +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + + +```{python} +# Variable Importance - Graph +GTB_ls5_dict = trainedGTB_ls5.explain() + +variable_importance = (ee.Dictionary(GTB_ls5_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 5-class GTB model for Landsat 5') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_LS5_variable_importance_'+v_date+'.csv', index = False) +``` + +## Evaluate the models + +### Landsat 5 + +```{python} +trainingMatrixGTB_ls5 = (trainedGTB_ls5 + .confusionMatrix()) + +#convert to pandas dataframe with class info +training_conf_l5 = (pd.DataFrame( + trainingMatrixGTB_ls5.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Training Confusion Matrix for Landsat 5:') +print(training_conf_l5) + +#reformat and save +training_conf_l5['mission'] = 'Landsat 5' +training_conf_l5.reset_index(inplace = True) +training_conf_l5 = training_conf_l5.rename(columns = {'level_0': 'class'}) +training_conf_l5.to_csv('data/output/GTB_'+v_date+'_l5_training_confusion.csv', index = False) + +confusionMatrixGTB_ls5 = (testing_ls5 + .classify(trainedGTB_ls5) + .errorMatrix('byte_property', "classification")) + +#convert to pandas dataframe with class info +confusion_l5 = (pd.DataFrame( + confusionMatrixGTB_ls5.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Confusion Matrix for Landsat 5:') +print(confusion_l5) + +#reformat and save +confusion_l5['mission'] = 'Landsat 5' +confusion_l5.reset_index(inplace = True) +confusion_l5 = confusion_l5.rename(columns = {'level_0': 'class'}) +confusion_l5.to_csv('data/output/GTB_'+v_date+'_l5_confusion.csv', index = False) + +acc_values_GTB_ls5 = (confusionMatrixGTB_ls5.accuracy().getInfo()) +print("GTB Confusion Overall Accuracy for Landsat 5: ", acc_values_GTB_ls5) +k_GTB_ls5 = (confusionMatrixGTB_ls5.kappa().getInfo()) +print("GTB kappa for LS5: ", k_GTB_ls5) +fs_GTB_ls5 = (confusionMatrixGTB_ls5.fscore().getInfo()) +print('GTB fScore for each class: ', fs_GTB_ls5) +``` + + +### Collate model stats, save to data folder + +First, we'll copy over some values and make a big pandas dataframe. Note that +the df.copy() function unlinks the original list from the new one. Silly python. + +```{python} +accuracy_heads = class_values.copy() +accuracy_heads.extend(['GTB_accuracy', 'GTB_kappa']) +landsat5_perf = fs_GTB_ls5.copy() +landsat5_perf.extend([acc_values_GTB_ls5, k_GTB_ls5]) + +performance_collation = pd.DataFrame( + [landsat5_perf], + index = [ + 'Landsat 5' + ], + columns = [accuracy_heads] + ) + +# reset the index +performance_collation.reset_index(inplace = True) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_LS5_'+v_date+'_performance_stats.csv', index = False) +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modeling/09_Landsat_5_GTB_3class.Rmd b/modeling/09_Landsat_5_GTB_3class.Rmd index 6def52f..eff51e4 100644 --- a/modeling/09_Landsat_5_GTB_3class.Rmd +++ b/modeling/09_Landsat_5_GTB_3class.Rmd @@ -49,6 +49,7 @@ Import the needed modules and set model version date import ee import os import time +import matplotlib.pyplot as plt import pandas as pd v_date = '2024-04-26' @@ -101,21 +102,59 @@ class_values = (['cloud', ### Landsat 5 ```{python} -trainedGTB_ls5 = (ee.Classifier.smileGradientTreeBoost(10).train( - features = training_ls5, - classProperty = 'byte_property', - inputProperties = ls_input_feat -)) +trainedGTB_ls5 = ( + ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47) + .train(features = training_ls5, + classProperty = 'byte_property', + inputProperties = ls_input_feat)) print(trainedGTB_ls5.getInfo()) ``` -Unfortunately, there is no current mechanism to save the GTB object. This is a -bummer because you can't really set a seed for these either, however! GEE is a bit -more rudimentary and recognizes the inputs and therefore creates the same output -objects. I did a quick check of this by running the model here and then again -in the browser. Both have identical versions, so I feel confident that GEE is -making the 'same' model. +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + + +```{python} +# Variable Importance - Graph +GTB_ls5_dict = trainedGTB_ls5.explain() + +variable_importance = (ee.Dictionary(GTB_ls5_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 3-class GTB model for Landsat 5') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_3class_LS5_variable_importance_'+v_date+'.csv', index = False) + +``` + ## Evaluate the models @@ -189,7 +228,7 @@ performance_collation = pd.DataFrame( # reset the index performance_collation.reset_index(inplace = True) -performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_LS5_3class_'+v_date+'_performance_stats.csv', index = False) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_3class_LS5_'+v_date+'_performance_stats.csv', index = False) ``` ## Apply model to image stack for Landsat diff --git a/modeling/10_Landsat_7_GTB.Rmd b/modeling/10_Landsat_7_GTB.Rmd new file mode 100644 index 0000000..c0a6111 --- /dev/null +++ b/modeling/10_Landsat_7_GTB.Rmd @@ -0,0 +1,397 @@ +--- +title: "eePlumB Develop and Apply GTB for Landsat 7" +author: "ROSSyndicate" +date: "2024-04-26" +output: html_document +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, echo = F} +libs = c('reticulate', 'tidyverse') + +package_loader <- function(x) { + if (x %in% installed.packages()) { + library(x, character.only = TRUE) + } else { + install.packages(x) + library(x, character.only = TRUE) + } +} + +lapply(libs, package_loader) +``` + +# Purpose + +This script develops and applies Gradient Tree Boost Models to the Landsat 7 +image stack. + +## Activate conda environment + +Check for virtual environment and activate, otherwise, set up virtual +environment. + +```{r, conda env} +if (!dir.exists("env")) { + source("pySetup.R") +} else { + use_condaenv(file.path(getwd(), "env")) +} +``` + +### Settings/modules + +Import the needed modules and set model version date + +```{python} +import ee +import os +import time +import matplotlib.pyplot as plt +import pandas as pd + +v_date = '2024-04-26' +``` + +## GEE Setup + +```{python} +ee.Authenticate() +``` + +When your browser states 'Google Earth Engine authentication successful!' or the +console reads "TRUE", the +authentication is complete. + +Now, we need to initialize our GEE session. You may need to change the project +name to one you own if you do not have write access. + +```{python} +ee.Initialize(project = 'ee-ross-superior') +``` + + +Import custom functions (these require ee.Authenticate()) +```{python} +import imp +imp.load_source("gee_funx", "modeling/gee_functions.py") +import gee_funx as gf +``` + +# Import assets + +These assets were created in the 03_Train_Test_Split.Rmd file + +```{python} +training_ls7 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/training_ls7_v2024") +testing_ls7 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/validation_ls7_v2024") +``` + +## Train the GTB model + +```{python} +ls_input_feat = ["SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B7"] +output_label = "class" +class_values = (['cloud', + 'openWater', + 'lightNearShoreSediment', + 'offShoreSediment', + 'darkNearShoreSediment']) +``` + +### Landsat 7 + +```{python} +trainedGTB_ls7 = (ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47).train( + features = training_ls7, + classProperty = 'byte_property', + inputProperties = ls_input_feat +)) + +print(trainedGTB_ls7.getInfo()) +``` + +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + +```{python} +# Variable Importance - Graph +GTB_ls7_dict = trainedGTB_ls7.explain() + +variable_importance = (ee.Dictionary(GTB_ls7_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 5-class GTB model for Landsat 7') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_LS7_variable_importance_'+v_date+'.csv', index = False) + +``` + +## Evaluate the models + +### Landsat 7 + +```{python} +trainingMatrixGTB_ls7 = (trainedGTB_ls7 + .confusionMatrix()) + +#convert to pandas dataframe with class info +training_conf_l7 = (pd.DataFrame( + trainingMatrixGTB_ls7.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Training Confusion Matrix for Landsat 7:') +print(training_conf_l7) + +#reformat and save +training_conf_l7['mission'] = 'Landsat 7' +training_conf_l7.reset_index(inplace = True) +training_conf_l7 = training_conf_l7.rename(columns = {'level_0': 'class'}) +training_conf_l7.to_csv('data/output/GTB_'+v_date+'_l7_training_confusion.csv', index = False) + +confusionMatrixGTB_ls7 = (testing_ls7 + .classify(trainedGTB_ls7) + .errorMatrix('byte_property', "classification")) + +#convert to pandas dataframe with class info +confusion_l7 = (pd.DataFrame( + confusionMatrixGTB_ls7.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Confusion Matrix for Landsat 7:') +print(confusion_l7) + +#reformat and save +confusion_l7['mission'] = 'Landsat 7' +confusion_l7.reset_index(inplace = True) +confusion_l7 = confusion_l7.rename(columns = {'level_0': 'class'}) +confusion_l7.to_csv('data/output/GTB_'+v_date+'_L7_confusion.csv', index = False) + +acc_values_GTB_ls7 = (confusionMatrixGTB_ls7.accuracy().getInfo()) +print("GTB Confusion Overall Accuracy for Landsat 7: ", acc_values_GTB_ls7) +k_GTB_ls7 = (confusionMatrixGTB_ls7.kappa().getInfo()) +print("GTB kappa for LS7: ", k_GTB_ls7) +fs_GTB_ls7 = (confusionMatrixGTB_ls7.fscore().getInfo()) +print('GTB fScore for each class: ', fs_GTB_ls7) +``` + + +### Collate model stats, save to data folder + +First, we'll copy over some values and make a big pandas dataframe. Note that +the df.copy() function unlinks the original list from the new one. Silly python. + +```{python} +accuracy_heads = class_values.copy() +accuracy_heads.extend(['GTB_accuracy', 'GTB_kappa']) +landsat7_perf = fs_GTB_ls7.copy() +landsat7_perf.extend([acc_values_GTB_ls7, k_GTB_ls7]) + +performance_collation = pd.DataFrame( + [landsat7_perf], + index = [ + 'Landsat 7' + ], + columns = [accuracy_heads] + ) + +# reset the index +performance_collation.reset_index(inplace = True) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_LS7_'+v_date+'_performance_stats.csv', index = False) +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modeling/10_Landsat_7_GTB_3class.Rmd b/modeling/10_Landsat_7_GTB_3class.Rmd index 21f3d66..bbddd40 100644 --- a/modeling/10_Landsat_7_GTB_3class.Rmd +++ b/modeling/10_Landsat_7_GTB_3class.Rmd @@ -49,6 +49,7 @@ Import the needed modules and set model version date import ee import os import time +import matplotlib.pyplot as plt import pandas as pd v_date = '2024-04-26' @@ -102,21 +103,57 @@ class_values = (['cloud', ### Landsat 7 ```{python} -trainedGTB_ls7 = (ee.Classifier.smileGradientTreeBoost(10).train( - features = training_ls7, - classProperty = 'byte_property', - inputProperties = ls_input_feat +trainedGTB_ls7 = (ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47) + .train(features = training_ls7, + classProperty = 'byte_property', + inputProperties = ls_input_feat )) print(trainedGTB_ls7.getInfo()) ``` -Unfortunately, there is no current mechanism to save the GTB object. This is a -bummer because you can't really set a seed for these either, however! GEE is a bit -more rudimentary and recognizes the inputs and therefore creates the same output -objects. I did a quick check of this by running the model here and then again -in the browser. Both have identical versions, so I feel confident that GEE is -making the 'same' model. +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + +```{python} +# Variable Importance - Graph +GTB_ls7_dict = trainedGTB_ls7.explain() + +variable_importance = (ee.Dictionary(GTB_ls7_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 3-class GTB model for Landsat 7') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_3class_LS7_variable_importance_'+v_date+'.csv', index = False) + +``` ## Evaluate the models @@ -190,7 +227,7 @@ performance_collation = pd.DataFrame( # reset the index performance_collation.reset_index(inplace = True) -performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_LS7_3class_'+v_date+'_performance_stats.csv', index = False) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_3class_LS7_'+v_date+'_performance_stats.csv', index = False) ``` ## Apply model to image stack for Landsat diff --git a/modeling/11_Landsat_8_GTB.Rmd b/modeling/11_Landsat_8_GTB.Rmd new file mode 100644 index 0000000..f734109 --- /dev/null +++ b/modeling/11_Landsat_8_GTB.Rmd @@ -0,0 +1,403 @@ + --- +title: "eePlumB Develop and Apply GTB for Landsat 8" +author: "ROSSyndicate" +date: "2024-04-26" +output: html_document +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, echo = F} +libs = c('reticulate', 'tidyverse') + +package_loader <- function(x) { + if (x %in% installed.packages()) { + library(x, character.only = TRUE) + } else { + install.packages(x) + library(x, character.only = TRUE) + } +} + +lapply(libs, package_loader) +``` + +# Purpose + +This script develops and applies Gradient Tree Boost Models to the Landsat 8 +image stack. + +## Activate conda environment + +Check for virtual environment and activate, otherwise, set up virtual +environment. + +```{r, conda env} +if (!dir.exists("env")) { + source("pySetup.R") +} else { + use_condaenv(file.path(getwd(), "env")) +} +``` + +### Settings/modules + +Import the needed modules and set model version date + +```{python} +import ee +import os +import time +import matplotlib.pyplot as plt +import pandas as pd + +v_date = '2024-04-26' +``` + +## GEE Setup + +```{python} +ee.Authenticate() +``` + +When your browser states 'Google Earth Engine authentication successful!' or the +console reads "TRUE", the +authentication is complete. + +Now, we need to initialize our GEE session. You may need to change the project +name to one you own if you do not have write access. + +```{python} +ee.Initialize(project = 'ee-ross-superior') +``` + + +Import custom functions (these require ee.Authenticate()) +```{python} +import imp +imp.load_source("gee_funx", "modeling/gee_functions.py") +import gee_funx as gf +``` + +# Import assets + +These assets were created in the 03_Train_Test_Split.Rmd file + +```{python} +training_ls8 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/training_ls8_v2024") +testing_ls8 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/validation_ls8_v2024") +``` + +## Train the GTB model + +```{python} +ls_input_feat = ["SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7"] +output_label = "class" +class_values = (['cloud', + 'openWater', + 'lightNearShoreSediment', + 'offShoreSediment', + 'darkNearShoreSediment']) +``` + +### Landsat 8 + +```{python} +trainedGTB_ls8 = (ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47).train( + features = training_ls8, + classProperty = 'byte_property', + inputProperties = ls_input_feat +)) + +print(trainedGTB_ls8.getInfo()) +``` + +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + + +```{python} +# Variable Importance - Graph +GTB_ls8_dict = trainedGTB_ls8.explain() + +variable_importance = (ee.Dictionary(GTB_ls8_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 5-class GTB model for Landsat 8') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_LS8_variable_importance_'+v_date+'.csv', index = False) + +``` + +## Evaluate the models + +### Landsat 8 + +```{python} +trainingMatrixGTB_ls8 = (trainedGTB_ls8 + .confusionMatrix()) + +#convert to pandas dataframe with class info +training_conf_l8 = (pd.DataFrame( + trainingMatrixGTB_ls8.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Training Confusion Matrix for Landsat 8:') +print(training_conf_l8) + +#reformat and save +training_conf_l8['mission'] = 'Landsat 8' +training_conf_l8.reset_index(inplace = True) +training_conf_l8 = training_conf_l8.rename(columns = {'level_0': 'class'}) +training_conf_l8.to_csv('data/output/GTB_'+v_date+'_l8_training_confusion.csv', index = False) + +confusionMatrixGTB_ls8 = (testing_ls8 + .classify(trainedGTB_ls8) + .errorMatrix('byte_property', "classification")) + +#convert to pandas dataframe with class info +confusion_l8 = (pd.DataFrame( + confusionMatrixGTB_ls8.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Confusion Matrix for Landsat 8:') +print(confusion_l8) + +#reformat and save +confusion_l8['mission'] = 'Landsat 8' +confusion_l8.reset_index(inplace = True) +confusion_l8 = confusion_l8.rename(columns = {'level_0': 'class'}) +confusion_l8.to_csv('data/output/GTB_'+v_date+'_L8_confusion.csv', index = False) + +acc_values_GTB_ls8 = (confusionMatrixGTB_ls8.accuracy().getInfo()) +print("GTB Confusion Overall Accuracy for Landsat 8: ", acc_values_GTB_ls8) +k_GTB_ls8 = (confusionMatrixGTB_ls8.kappa().getInfo()) +print("GTB kappa for LS8: ", k_GTB_ls8) +fs_GTB_ls8 = (confusionMatrixGTB_ls8.fscore().getInfo()) +print('GTB fScore for each class: ', fs_GTB_ls8) +``` + + +### Collate model stats, save to data folder + +First, we'll copy over some values and make a big pandas dataframe. Note that +the df.copy() function unlinks the original list from the new one. Silly python. + +```{python} +accuracy_heads = class_values.copy() +accuracy_heads.extend(['GTB_accuracy', 'GTB_kappa']) +landsat8_perf = fs_GTB_ls8.copy() +landsat8_perf.extend([acc_values_GTB_ls8, k_GTB_ls8]) + +performance_collation = pd.DataFrame( + [landsat8_perf], + index = [ + 'Landsat 8' + ], + columns = [accuracy_heads] + ) + +# reset the index +performance_collation.reset_index(inplace = True) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_LS8_'+v_date+'_performance_stats.csv', index = False) +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modeling/11_Landsat_8_GTB_3class.Rmd b/modeling/11_Landsat_8_GTB_3class.Rmd index 5426065..d66d32f 100644 --- a/modeling/11_Landsat_8_GTB_3class.Rmd +++ b/modeling/11_Landsat_8_GTB_3class.Rmd @@ -49,6 +49,7 @@ Import the needed modules and set model version date import ee import os import time +import matplotlib.pyplot as plt import pandas as pd v_date = '2024-04-26' @@ -91,7 +92,7 @@ testing_ls8 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/ ## Train the GTB model ```{python} -ls_input_feat = ["SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7"] +ls_input_feat = ["SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7"] output_label = "class" class_values = (['cloud', 'openWater', @@ -101,7 +102,7 @@ class_values = (['cloud', ### Landsat 8 ```{python} -trainedGTB_ls8 = (ee.Classifier.smileGradientTreeBoost(10).train( +trainedGTB_ls8 = (ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47).train( features = training_ls8, classProperty = 'byte_property', inputProperties = ls_input_feat @@ -110,12 +111,49 @@ trainedGTB_ls8 = (ee.Classifier.smileGradientTreeBoost(10).train( print(trainedGTB_ls8.getInfo()) ``` -Unfortunately, there is no current mechanism to save the GTB object. This is a -bummer because you can't really set a seed for these either, however! GEE is a bit -more rudimentary and recognizes the inputs and therefore creates the same output -objects. I did a quick check of this by running the model here and then again -in the browser. Both have identical versions, so I feel confident that GEE is -making the 'same' model. +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + + +```{python} +# Variable Importance - Graph +GTB_ls8_dict = trainedGTB_ls8.explain() + +variable_importance = (ee.Dictionary(GTB_ls8_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 3-class GTB model for Landsat 8') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_3class_LS8_variable_importance_'+v_date+'.csv', index = False) + +``` ## Evaluate the models @@ -189,7 +227,7 @@ performance_collation = pd.DataFrame( # reset the index performance_collation.reset_index(inplace = True) -performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_LS8_3class_'+v_date+'_performance_stats.csv', index = False) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_3class_LS8_'+v_date+'_performance_stats.csv', index = False) ``` ## Apply model to image stack for Landsat diff --git a/modeling/12_Landsat_9_GTB.Rmd b/modeling/12_Landsat_9_GTB.Rmd new file mode 100644 index 0000000..436bd40 --- /dev/null +++ b/modeling/12_Landsat_9_GTB.Rmd @@ -0,0 +1,408 @@ +--- +title: "eePlumB Develop and Apply GTB for Landsat 9" +author: "ROSSyndicate" +date: "2024-04-26" +output: html_document +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, echo = F} +libs = c('reticulate', 'tidyverse') + +package_loader <- function(x) { + if (x %in% installed.packages()) { + library(x, character.only = TRUE) + } else { + install.packages(x) + library(x, character.only = TRUE) + } +} + +lapply(libs, package_loader) +``` + +# Purpose + +This script develops and applies Gradient Tree Boost Models to the Landsat 9 image +stack. + +## Activate conda environment + +Check for virtual environment and activate, otherwise, set up virtual +environment. + +```{r, conda env} +if (!dir.exists("env")) { + source("pySetup.R") +} else { + use_condaenv(file.path(getwd(), "env")) +} +``` + +### Settings/modules + +Import the needed modules and set model version date + +```{python} +import ee +import os +import time +import matplotlib.pyplot as plt +import pandas as pd + +v_date = '2024-04-26' +``` + +## GEE Setup + +```{python} +ee.Authenticate() +``` + +When your browser states 'Google Earth Engine authentication successful!' or the +console reads "TRUE", the +authentication is complete. + +Now, we need to initialize our GEE session. You may need to change the project +name to one you own if you do not have write access. + +```{python} +ee.Initialize(project = 'ee-ross-superior') +``` + + +Import custom functions (these require ee.Authenticate()) +```{python} +import imp +imp.load_source("gee_funx", "modeling/gee_functions.py") +import gee_funx as gf +``` + +# Import assets + +These assets were created in the 03_Train_Test_Split.Rmd file + +```{python} +training_ls9 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/training_ls9_v2024") +testing_ls9 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/validation_ls9_v2024") +``` + +## Train the GTB model + +```{python} +ls_input_feat = ["SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7"] +output_label = "class" +class_values = (['cloud', + 'openWater', + 'lightNearShoreSediment', + 'offShoreSediment', + 'darkNearShoreSediment']) +``` + +### Landsat 9 + +```{python} +trainedGTB_ls9 = (ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47).train( + features = training_ls9, + classProperty = 'byte_property', + inputProperties = ls_input_feat +)) + +print(trainedGTB_ls9.getInfo()) +``` + +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + + +```{python} +# Variable Importance - Graph +GTB_ls9_dict = trainedGTB_ls9.explain() + +variable_importance = (ee.Dictionary(GTB_ls9_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 5-class GTB model for Landsat 9') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_LS9_variable_importance_'+v_date+'.csv', index = False) + +``` + +## Evaluate the models + +### Landsat 9 + +```{python} +trainingMatrixGTB_ls9 = (trainedGTB_ls9 + .confusionMatrix()) + +#convert to pandas dataframe with class info +training_conf_l9 = (pd.DataFrame( + trainingMatrixGTB_ls9.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Training Confusion Matrix for Landsat 9:') +print(training_conf_l9) + +#reformat and save +training_conf_l9['mission'] = 'Landsat 9' +training_conf_l9.reset_index(inplace = True) +training_conf_l9 = training_conf_l9.rename(columns = {'level_0': 'class'}) +training_conf_l9.to_csv('data/output/GTB_'+v_date+'_l9_training_confusion.csv', index = False) + +confusionMatrixGTB_ls9 = (testing_ls9 + .classify(trainedGTB_ls9) + .errorMatrix('byte_property', "classification")) + +#convert to pandas dataframe with class info +confusion_l9 = (pd.DataFrame( + confusionMatrixGTB_ls9.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Confusion Matrix for Landsat 9:') +print(confusion_l9) + +#reformat and save +confusion_l9['mission'] = 'Landsat 9' +confusion_l9.reset_index(inplace = True) +confusion_l9 = confusion_l9.rename(columns = {'level_0': 'class'}) +confusion_l9.to_csv('data/output/GTB_'+v_date+'_l9_confusion.csv', index = False) + +acc_values_GTB_ls9 = (confusionMatrixGTB_ls9.accuracy().getInfo()) +print("GTB Confusion Overall Accuracy for Landsat 9: ", acc_values_GTB_ls9) +k_GTB_ls9 = (confusionMatrixGTB_ls9.kappa().getInfo()) +print("GTB kappa for LS9: ", k_GTB_ls9) +fs_GTB_ls9 = (confusionMatrixGTB_ls9.fscore().getInfo()) +print('GTB fScore for each class: ', fs_GTB_ls9) +``` + +Well, that's not great - all open water were misclassified as darkNearShoreSediment. +Let's see if it's any better in the 3-class version. We'll exprot the performance +stats anyway. + +### Collate model stats, save to data folder + +First, we'll copy over some values and make a big pandas dataframe. Note that +the df.copy() function unlinks the original list from the new one. Silly python. + +```{python} +accuracy_heads = class_values.copy() +accuracy_heads.extend(['GTB_accuracy', 'GTB_kappa']) +landsat9_perf = fs_GTB_ls9.copy() +landsat9_perf.extend([acc_values_GTB_ls9, k_GTB_ls9]) + +performance_collation = pd.DataFrame( + [landsat9_perf], + index = [ + 'Landsat 9' + ], + columns = [accuracy_heads] + ) + +# reset the index +performance_collation.reset_index(inplace = True) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_LS9_'+v_date+'_performance_stats.csv', index = False) +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modeling/12_Landsat_9_GTB_3class.Rmd b/modeling/12_Landsat_9_GTB_3class.Rmd index 4d3b382..a48a506 100644 --- a/modeling/12_Landsat_9_GTB_3class.Rmd +++ b/modeling/12_Landsat_9_GTB_3class.Rmd @@ -49,6 +49,7 @@ Import the needed modules and set model version date import ee import os import time +import matplotlib.pyplot as plt import pandas as pd v_date = '2024-04-26' @@ -91,7 +92,7 @@ testing_ls9 = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/ ## Train the GTB model ```{python} -ls_input_feat = ["SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7"] +ls_input_feat = ["SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7"] output_label = "class" class_values = (['cloud', 'openWater', @@ -101,7 +102,7 @@ class_values = (['cloud', ### Landsat 9 ```{python} -trainedGTB_ls9 = (ee.Classifier.smileGradientTreeBoost(10).train( +trainedGTB_ls9 = (ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47).train( features = training_ls9, classProperty = 'byte_property', inputProperties = ls_input_feat @@ -110,13 +111,50 @@ trainedGTB_ls9 = (ee.Classifier.smileGradientTreeBoost(10).train( print(trainedGTB_ls9.getInfo()) ``` -Unfortunately, there is no current mechanism to save the GTB object. This is a -bummer because you can't really set a seed for these either, however! GEE is a bit -more rudimentary and recognizes the inputs and therefore creates the same output -objects. I did a quick check of this by running the model here and then again -in the browser. Both have identical versions, so I feel confident that GEE is -making the 'same' model. +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + + +```{python} +# Variable Importance - Graph +GTB_ls9_dict = trainedGTB_ls9.explain() + +variable_importance = (ee.Dictionary(GTB_ls9_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 3-class GTB model for Landsat 9') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_3class_LS9_variable_importance_'+v_date+'.csv', index = False) + +``` ## Evaluate the models ### Landsat 9 @@ -191,7 +229,7 @@ performance_collation = pd.DataFrame( # reset the index performance_collation.reset_index(inplace = True) -performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_LS9_3class_'+v_date+'_performance_stats.csv', index = False) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_3class_LS9_'+v_date+'_performance_stats.csv', index = False) ``` diff --git a/modeling/13_Sentinel2_GTB.Rmd b/modeling/13_Sentinel2_GTB.Rmd new file mode 100644 index 0000000..d93b199 --- /dev/null +++ b/modeling/13_Sentinel2_GTB.Rmd @@ -0,0 +1,628 @@ +--- +title: "eePlumB Develop and Apply GTB for Sentinel 2" +author: "ROSSyndicate" +date: "2023-04-26" +output: html_document +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, echo = F} +libs = c('reticulate', 'tidyverse') + +package_loader <- function(x) { + if (x %in% installed.packages()) { + library(x, character.only = TRUE) + } else { + install.packages(x) + library(x, character.only = TRUE) + } +} + +lapply(libs, package_loader) +``` + +# Purpose + +This script develops and applies Gradient Tree Boost Models to the Sentinel 2 +image stack. + +## Activate conda environment + +Check for virtual environment and activate, otherwise, set up virtual +environment. + +```{r, conda env} +if (!dir.exists("env")) { + source("pySetup.R") +} else { + use_condaenv(file.path(getwd(), "env")) +} +``` + +### Settings/modules + +Import the needed modules and set model version date + +```{python} +import ee +import os +import time +import matplotlib.pyplot as plt +import pandas as pd + +v_date = '2024-04-26' +``` + +## GEE Setup + +```{python} +ee.Authenticate() +``` + +When your browser states 'Google Earth Engine authentication successful!' or the +console reads "TRUE", the +authentication is complete. + +Now, we need to initialize our GEE session. You may need to change the project +name to one you own if you do not have write access. + +```{python} +ee.Initialize(project = 'ee-ross-superior') +``` + + +Import custom functions (these require ee.Authenticate()) +```{python} +import imp +imp.load_source("gee_funx", "modeling/gee_functions.py") +import gee_funx as gf +``` + +# Import assets + +These assets were created in the 03_Train_Test_Split.Rmd file + +```{python} +training_sen = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/training_sen_v2024") +testing_sen = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/validation_sen_v2024") +``` + + +## Train the GTB model + +```{python} +sen_input_feat = ["SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7", 'SR_B8', "SR_B8A", 'SR_B11', 'SR_B12'] +output_label = "class" +class_values = (['cloud', + 'openWater', + 'lightNearShoreSediment', + 'offShoreSediment', + 'darkNearShoreSediment']) +``` + +### Sentinel 2 + +```{python} +trainedGTB_sen = (ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47).train( + features = training_sen, + classProperty = 'byte_property', + inputProperties = sen_input_feat +)) + +print(trainedGTB_sen.getInfo()) +``` + +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + + +```{python} +# Variable Importance - Graph +GTB_sen2_dict = trainedGTB_sen.explain() + +variable_importance = (ee.Dictionary(GTB_sen2_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 5-class GTB model for Sentinel 2') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_Sen2_variable_importance_'+v_date+'.csv', index = False) + +``` + +## Evaluate the models + +### Sentinel 2 + +```{python} +trainingMatrixGTB_sen = (trainedGTB_sen + .confusionMatrix()) + +#convert to pandas dataframe with class info +training_conf_sen = (pd.DataFrame( + trainingMatrixGTB_sen.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Training Confusion Matrix for Sentinel 2:') +print(training_conf_sen) + +#reformat and save +training_conf_sen['mission'] = 'Sentinel 2' +training_conf_sen.reset_index(inplace = True) +training_conf_sen = training_conf_sen.rename(columns = {'level_0': 'class'}) +training_conf_sen.to_csv('data/output/GTB_'+v_date+'_sen2_training_confusion.csv', index = False) + +confusionMatrixGTB_sen = (testing_sen + .classify(trainedGTB_sen) + .errorMatrix('byte_property', "classification")) +#convert to pandas dataframe with class info +confusion_sen = (pd.DataFrame( + confusionMatrixGTB_sen.getInfo(), + index=[class_values], + columns =[class_values] + )) +print('GTB Confusion Matrix for Sentinel 2:') +print(confusion_sen) + +#reformat and save +confusion_sen['mission'] = 'Sentinel 2' +confusion_sen.reset_index(inplace = True) +confusion_sen = confusion_sen.rename(columns = {'level_0': 'class'}) +confusion_sen.to_csv('data/output/GTB_'+v_date+'_Sen2_confusion.csv', index = False) + +acc_values_GTB_sen = (confusionMatrixGTB_sen.accuracy().getInfo()) +print("GTB Confusion Overall Accuracy for Sentinel 2: ", acc_values_GTB_sen) +k_GTB_sen = (confusionMatrixGTB_sen.kappa().getInfo()) +print("GTB kappa for S2: ", k_GTB_sen) +fs_GTB_sen = (confusionMatrixGTB_sen.fscore().getInfo()) +print('GTB fScore for each class: ', fs_GTB_sen) + +``` + +Not great, let's save the stats, and run the 3-class model. + +### Collate model stats, save to data folder + +First, we'll copy over some values and make a big pandas dataframe. Note that the df.copy() function unlinks the original list from the new one. Silly python. + +```{python} +accuracy_heads = class_values.copy() +accuracy_heads.extend(['GTB_accuracy', 'GTB_kappa']) +sentinel2_perf = fs_GTB_sen.copy() +sentinel2_perf.extend([acc_values_GTB_sen, k_GTB_sen]) + +performance_collation = pd.DataFrame( + [sentinel2_perf], + index = [ + 'Sentinel 2' + ], + columns = [accuracy_heads] + ) + +# reset the index +performance_collation.reset_index(inplace = True) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_Sen2_'+v_date+'_performance_stats.csv', index = False) +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/modeling/13_Sentinel2_GTB_3class.Rmd b/modeling/13_Sentinel2_GTB_3class.Rmd index 2f39d03..54b8359 100644 --- a/modeling/13_Sentinel2_GTB_3class.Rmd +++ b/modeling/13_Sentinel2_GTB_3class.Rmd @@ -49,6 +49,7 @@ Import the needed modules and set model version date import ee import os import time +import matplotlib.pyplot as plt import pandas as pd v_date = '2024-04-26' @@ -92,7 +93,7 @@ testing_sen = ee.FeatureCollection("projects/ee-ross-superior/assets/train-test/ ## Train the GTB model ```{python} -sen_input_feat = ["SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7", 'SR_B8', "SR_B8A", 'SR_B11', 'SR_B12'] +sen_input_feat = ["SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7", 'SR_B8', "SR_B8A", 'SR_B11', 'SR_B12'] output_label = "class" class_values = (['cloud', 'openWater', @@ -102,7 +103,7 @@ class_values = (['cloud', ### Sentinel 2 ```{python} -trainedGTB_sen = (ee.Classifier.smileGradientTreeBoost(10).train( +trainedGTB_sen = (ee.Classifier.smileGradientTreeBoost(numberOfTrees = 10, seed = 47).train( features = training_sen, classProperty = 'byte_property', inputProperties = sen_input_feat @@ -111,12 +112,49 @@ trainedGTB_sen = (ee.Classifier.smileGradientTreeBoost(10).train( print(trainedGTB_sen.getInfo()) ``` -Unfortunately, there is no current mechanism to save the GTB object. This is a -bummer because you can't really set a seed for these either, however! GEE is a bit -more rudimentary and recognizes the inputs and therefore creates the same output -objects. I did a quick check of this by running the model here and then again -in the browser. Both have identical versions, so I feel confident that GEE is -making the 'same' model. +Unfortunately, there is no current mechanism to save the GTB object as an asset, +so we are relying on setting the seed here to take care of reproducibility. Let's +also take a look at the variable importance to make sure that this all makes sense. + + +```{python} +# Variable Importance - Graph +GTB_sen_dict = trainedGTB_sen.explain() + +variable_importance = (ee.Dictionary(GTB_sen_dict) + .get('importance') + .getInfo()) + +# Sort the dictionary by values in descending order +sorted_importance = dict(sorted(variable_importance.items(), key=lambda item: item[1], reverse=True)) + +# Extract keys and values +keys = list(sorted_importance.keys()) +values = list(sorted_importance.values()) + +# Plot the bar graph +plt.figure(figsize=(10, 6)) +plt.barh(keys, values, color='skyblue') + +# Adding titles and labels +plt.xlabel('Feature Importance') +plt.ylabel('Band') +plt.title('Feature importance for 3-class GTB model for Sentinel 2') + +# Reverse the y-axis to show highest value at the top +plt.gca().invert_yaxis() + +# Display the plot +plt.tight_layout() +# Display the plot +plt.show() + +df = pd.DataFrame(list(sorted_importance.items()), columns=['Band', 'Feature_Importance']) + +# And save the variable importance for later use. +df.to_csv('data/output/GTB_3class_Sen2_variable_importance_'+v_date+'.csv', index = False) + +``` ## Evaluate the models @@ -190,7 +228,7 @@ performance_collation = pd.DataFrame( # reset the index performance_collation.reset_index(inplace = True) -performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_Sen2_3_class'+v_date+'_performance_stats.csv', index = False) +performance_collation.rename(columns = {'index':'satellite'}).to_csv('data/output/GTB_3class_Sen2_'+v_date+'_performance_stats.csv', index = False) ``` @@ -210,8 +248,8 @@ sen = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') .filter(ee.Filter.eq('GENERAL_QUALITY', 'PASSED')) .filter(ee.Filter.eq('GEOMETRIC_QUALITY', 'PASSED')) .filter(ee.Filter.eq('SENSOR_QUALITY', 'PASSED')) - .filter(ee.Filter.eq('DEGRADED_MSI_DATA_PERCENTAGE', 0)) - .filter(ee.Filter.eq('SNOW_ICE_PERCENTAGE', 0)) + .filter(ee.Filter.lt('DEGRADED_MSI_DATA_PERCENTAGE', 10)) + .filter(ee.Filter.lt('SNOW_ICE_PERCENTAGE', 10)) # masks .map(gf.apply_sat_defect_mask) .map(gf.mask_SCL_qa)