514 lines
19 KiB
Python
514 lines
19 KiB
Python
#only for reference do not change only read this original code produced highly accurate models
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
import glob
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import warnings
|
|
import joblib
|
|
from scipy.signal import savgol_filter
|
|
from scipy.stats import linregress
|
|
from scipy.integrate import trapezoid
|
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
|
|
from sklearn.model_selection import StratifiedKFold, train_test_split, cross_val_score
|
|
from sklearn.metrics import classification_report, accuracy_score, f1_score, confusion_matrix, precision_score, recall_score, top_k_accuracy_score
|
|
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
|
|
from sklearn.svm import SVC
|
|
|
|
# Suppress warnings
|
|
warnings.simplefilter(action='ignore', category=FutureWarning)
|
|
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
|
|
warnings.filterwarnings("ignore", category=UserWarning) # Helper for some sklearn versions
|
|
pd.set_option('future.no_silent_downcasting', True)
|
|
|
|
# Check/Install Libraries
|
|
def install_libs():
|
|
libs = ["minisom", "imbalanced-learn", "xgboost", "lightgbm", "catboost"]
|
|
for lib in libs:
|
|
try:
|
|
__import__(lib.replace("-", "_"))
|
|
except ImportError:
|
|
print(f"Installing {lib}...")
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", lib])
|
|
|
|
install_libs()
|
|
import xgboost as xgb
|
|
import lightgbm as lgb
|
|
from catboost import CatBoostClassifier
|
|
|
|
from google.colab import drive
|
|
|
|
# ==========================================
|
|
# 1. DATA LOADING
|
|
# ==========================================
|
|
def load_balanced_data(folder_path):
|
|
print(f"Mounting Google Drive and searching in: {folder_path}")
|
|
drive.mount('/content/drive')
|
|
|
|
# Load the RAW balanced dataset created in previous step
|
|
# We use RAW because smoothing/phenology calcs need physical units, not scaled values
|
|
file_path = os.path.join(folder_path, "Zimbabwe_Crop_Balanced_Raw.csv")
|
|
|
|
if os.path.exists(file_path):
|
|
print(f"Loading Balanced Raw Data: {file_path}")
|
|
df = pd.read_csv(file_path)
|
|
print(f"Data Loaded. Shape: {df.shape}")
|
|
return df
|
|
else:
|
|
print("Error: 'Zimbabwe_Crop_Balanced_Raw.csv' not found. Please run the balancing step first.")
|
|
return None
|
|
|
|
# ==========================================
|
|
# 2. FEATURE ENGINEERING: SMOOTHING
|
|
# ==========================================
|
|
def apply_smoothing(df, indices=['ndvi', 'ndre', 'evi', 'savi', 'ci_re', 'ndwi']):
|
|
print("\n[FE] Applying Savitzky-Golay Smoothing (with 0-interpolation)...")
|
|
|
|
# Get dates
|
|
date_pattern = re.compile(r"(\d{8})_")
|
|
dates = sorted(list(set([date_pattern.search(col).group(1) for col in df.columns if date_pattern.search(col)])))
|
|
|
|
eng_df = df.copy()
|
|
|
|
for idx in indices:
|
|
cols = [f"{d}_{idx}" for d in dates if f"{d}_{idx}" in df.columns]
|
|
if not cols: continue
|
|
|
|
# 1. Replace 0 with NaN (assuming 0 indicates gap/fill)
|
|
raw_df = df[cols].replace(0, np.nan)
|
|
|
|
# 2. Interpolate linearly across time (axis=1) to fill gaps before smoothing
|
|
interp_values = raw_df.interpolate(method='linear', axis=1, limit_direction='both').fillna(0).values
|
|
|
|
# 3. Apply smoothing row-wise
|
|
# window_length=5 (approx 50 days), polyorder=2
|
|
smooth_values = savgol_filter(interp_values, window_length=5, polyorder=2, axis=1)
|
|
|
|
# Store smooth values with new suffix
|
|
smooth_cols = [c + "_smooth" for c in cols]
|
|
eng_df[smooth_cols] = smooth_values
|
|
|
|
return eng_df, dates
|
|
|
|
# ==========================================
|
|
# 3. FEATURE ENGINEERING: PHENOLOGY
|
|
# ==========================================
|
|
def extract_phenology(df, dates, indices=['ndvi', 'ndre', 'evi']):
|
|
print("[FE] Extracting Phenology Metrics (Green-up, Peak, Senescence)...")
|
|
|
|
eng_df = df.copy()
|
|
|
|
for idx in indices:
|
|
# Use SMOOTHED columns if available
|
|
cols = [f"{d}_{idx}_smooth" for d in dates]
|
|
if not all([c in df.columns for c in cols]):
|
|
cols = [f"{d}_{idx}" for d in dates] # Fallback to raw
|
|
|
|
if not cols: continue
|
|
|
|
values = df[cols].values
|
|
|
|
# 1. Magnitude Metrics
|
|
eng_df[f'{idx}_max'] = np.max(values, axis=1)
|
|
eng_df[f'{idx}_min'] = np.min(values, axis=1)
|
|
eng_df[f'{idx}_mean'] = np.mean(values, axis=1)
|
|
eng_df[f'{idx}_std'] = np.std(values, axis=1)
|
|
eng_df[f'{idx}_amplitude'] = eng_df[f'{idx}_max'] - eng_df[f'{idx}_min']
|
|
|
|
# 2. Area Under Curve (Integral) with dx=10 (10-day intervals)
|
|
eng_df[f'{idx}_auc'] = trapezoid(values, dx=10, axis=1)
|
|
|
|
# 3. Timing Metrics (Vectorized approximation)
|
|
# Argmax gives the index of the peak
|
|
peak_indices = np.argmax(values, axis=1)
|
|
eng_df[f'{idx}_peak_timestep'] = peak_indices
|
|
|
|
# Rates (Slopes)
|
|
slopes = np.diff(values, axis=1)
|
|
eng_df[f'{idx}_max_slope_up'] = np.max(slopes, axis=1)
|
|
eng_df[f'{idx}_max_slope_down'] = np.min(slopes, axis=1)
|
|
|
|
return eng_df
|
|
|
|
# ==========================================
|
|
# 4. FEATURE ENGINEERING: HARMONICS
|
|
# ==========================================
|
|
def add_harmonics(df, dates, indices=['ndvi']):
|
|
print("[FE] Fitting Harmonic/Fourier Features (Normalized)...")
|
|
|
|
eng_df = df.copy()
|
|
|
|
# Convert dates to relative time (0 to 1 scaling or radians)
|
|
time_steps = np.arange(len(dates))
|
|
t = 2 * np.pi * time_steps / len(dates) # Normalize to one full cycle 0-2pi
|
|
|
|
sin_t = np.sin(t)
|
|
cos_t = np.cos(t)
|
|
sin_2t = np.sin(2*t)
|
|
cos_2t = np.cos(2*t)
|
|
n_dates = len(dates)
|
|
|
|
for idx in indices:
|
|
# Prefer smoothed values for cleaner harmonics
|
|
cols = [f"{d}_{idx}_smooth" for d in dates]
|
|
if not all(c in df.columns for c in cols):
|
|
cols = [f"{d}_{idx}" for d in dates]
|
|
|
|
if not cols: continue
|
|
|
|
values = df[cols].values
|
|
|
|
# Normalized coefficients
|
|
eng_df[f'{idx}_harmonic1_sin'] = np.dot(values, sin_t) / n_dates
|
|
eng_df[f'{idx}_harmonic1_cos'] = np.dot(values, cos_t) / n_dates
|
|
eng_df[f'{idx}_harmonic2_sin'] = np.dot(values, sin_2t) / n_dates
|
|
eng_df[f'{idx}_harmonic2_cos'] = np.dot(values, cos_2t) / n_dates
|
|
|
|
return eng_df
|
|
|
|
# ==========================================
|
|
# 5. FEATURE ENGINEERING: INTERACTIONS & WINDOWS
|
|
# ==========================================
|
|
def add_interactions_and_windows(df, dates):
|
|
print("[FE] Computing Multi-Index Interactions & Seasonal Windows...")
|
|
eng_df = df.copy()
|
|
|
|
# Interactions
|
|
if 'ndvi_max' in df.columns and 'ndre_max' in df.columns:
|
|
eng_df['ndvi_ndre_peak_diff'] = df['ndvi_max'] - df['ndre_max']
|
|
if 'evi_mean' in df.columns and 'ndvi_mean' in df.columns:
|
|
eng_df['canopy_density_contrast'] = df['evi_mean'] / (df['ndvi_mean'] + 0.001)
|
|
|
|
# Windowed Summaries (assuming Zimbabwe Season: Oct-Jun)
|
|
# Correctly parsing dates to handle year crossover
|
|
dt_dates = pd.to_datetime(dates, format='%Y%m%d')
|
|
date_to_col_map = {d: d for d in dates} # Helper if needed
|
|
|
|
# Define Windows (Months)
|
|
windows = {
|
|
'early': [10, 11, 12], # Oct-Dec
|
|
'peak': [1, 2, 3], # Jan-Mar
|
|
'late': [4, 5, 6] # Apr-Jun
|
|
}
|
|
|
|
for idx in ['ndvi', 'ndwi', 'ndre']:
|
|
# Prefer smooth if avail
|
|
suffix = "_smooth" if f"{dates[0]}_{idx}_smooth" in df.columns else ""
|
|
|
|
for win_name, months in windows.items():
|
|
# Identify columns belonging to this window
|
|
relevant_dates = [d_str for d_dt, d_str in zip(dt_dates, dates) if d_dt.month in months]
|
|
relevant_cols = [f"{d}_{idx}{suffix}" for d in relevant_dates if f"{d}_{idx}{suffix}" in df.columns]
|
|
|
|
if relevant_cols:
|
|
eng_df[f'{idx}_{win_name}_mean'] = df[relevant_cols].mean(axis=1)
|
|
eng_df[f'{idx}_{win_name}_max'] = df[relevant_cols].max(axis=1)
|
|
|
|
return eng_df
|
|
|
|
# ==========================================
|
|
# 6. FEATURE SELECTION (Helper)
|
|
# ==========================================
|
|
def get_selected_feature_names(X_train, y_train):
|
|
print("\n" + "="*40)
|
|
print("FEATURE SELECTION (Inside Train Split)")
|
|
print("="*40)
|
|
|
|
# 1. Train scout LightGBM
|
|
print("Training scout LightGBM model...")
|
|
lgbm = lgb.LGBMClassifier(n_estimators=100, random_state=42, verbose=-1)
|
|
lgbm.fit(X_train, y_train)
|
|
|
|
# 2. Get Importance
|
|
importances = pd.DataFrame({
|
|
'Feature': X_train.columns,
|
|
'Importance': lgbm.feature_importances_
|
|
}).sort_values('Importance', ascending=False)
|
|
|
|
print("Top 10 Features:")
|
|
print(importances.head(10))
|
|
|
|
# 3. Keep non-zero importance features
|
|
selected_feats = importances[importances['Importance'] > 0]['Feature'].tolist()
|
|
print(f"Kept {len(selected_feats)} features (dropped {len(X_train.columns) - len(selected_feats)} zero-importance).")
|
|
|
|
return selected_feats
|
|
|
|
# ==========================================
|
|
# 7. MODEL EVALUATION VISUALIZATIONS
|
|
# ==========================================
|
|
def plot_confusion_matrix_custom(y_true, y_pred, classes, model_name, folder_path, dataset_suffix=""):
|
|
cm = confusion_matrix(y_true, y_pred)
|
|
plt.figure(figsize=(14, 12))
|
|
|
|
# Use annotation only if reasonable number of classes
|
|
annot = len(classes) < 25
|
|
sns.heatmap(cm, annot=annot, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes, cbar=False)
|
|
|
|
plt.title(f'Confusion Matrix: {model_name} ({dataset_suffix})', fontsize=16)
|
|
plt.xlabel('Predicted Label', fontsize=12)
|
|
plt.ylabel('True Label', fontsize=12)
|
|
plt.xticks(rotation=45, ha='right')
|
|
plt.yticks(rotation=0)
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(folder_path, f"ConfMat_{model_name}_{dataset_suffix}.png"))
|
|
plt.show()
|
|
plt.close()
|
|
|
|
def plot_feature_importance_custom(model, feature_names, model_name, folder_path, dataset_suffix="", top_n=20):
|
|
if not hasattr(model, 'feature_importances_'):
|
|
return
|
|
|
|
imp = model.feature_importances_
|
|
df_imp = pd.DataFrame({'feature': feature_names, 'importance': imp})
|
|
df_imp = df_imp.sort_values('importance', ascending=False).head(top_n)
|
|
|
|
plt.figure(figsize=(10, 8))
|
|
sns.barplot(x='importance', y='feature', data=df_imp, palette='viridis')
|
|
plt.title(f'Top {top_n} Features: {model_name} ({dataset_suffix})', fontsize=16)
|
|
plt.xlabel('Importance Score')
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(folder_path, f"FeatImp_{model_name}_{dataset_suffix}.png"))
|
|
plt.show()
|
|
plt.close()
|
|
|
|
def plot_per_class_f1(y_true, y_pred, classes, model_name, folder_path, dataset_suffix=""):
|
|
# Fix: Added zero_division=0 to suppress UndefinedMetricWarning for empty classes
|
|
report = classification_report(y_true, y_pred, target_names=classes, output_dict=True, zero_division=0)
|
|
df_report = pd.DataFrame(report).transpose()
|
|
# Drop avg rows
|
|
df_report = df_report.drop(['accuracy', 'macro avg', 'weighted avg'], errors='ignore')
|
|
|
|
plt.figure(figsize=(14, 6))
|
|
sns.barplot(x=df_report.index, y=df_report['f1-score'], palette='coolwarm')
|
|
plt.title(f'F1-Score per Class: {model_name} ({dataset_suffix})', fontsize=16)
|
|
plt.xticks(rotation=45, ha='right')
|
|
plt.ylim(0, 1.05)
|
|
plt.grid(axis='y', alpha=0.3)
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(folder_path, f"ClassF1_{model_name}_{dataset_suffix}.png"))
|
|
plt.show()
|
|
plt.close()
|
|
|
|
# Save Report to CSV
|
|
df_report.to_csv(os.path.join(folder_path, f"Report_{model_name}_{dataset_suffix}.csv"))
|
|
|
|
# ==========================================
|
|
# 8. TRAINING & COMPARISON LOOP
|
|
# ==========================================
|
|
def train_and_compare(df, folder_path, dataset_suffix="Raw"):
|
|
print("\n" + "="*40)
|
|
print(f"MODEL TRAINING & COMPARISON ({dataset_suffix} Data)")
|
|
print("="*40)
|
|
|
|
# 1. Drop Junk Columns (Spatial Leakage Prevention)
|
|
# Explicitly including 'is_syn' and 'system:index' as requested
|
|
junk_cols = ['.geo', 'system:index', 'latitude', 'longitude', 'lat', 'lon', 'ID', 'parent_id', 'batch_id', 'is_syn']
|
|
# Filter only those present
|
|
cols_to_drop = [c for c in junk_cols if c in df.columns]
|
|
print(f"Dropping junk/spatial columns: {cols_to_drop}")
|
|
df_clean = df.drop(columns=cols_to_drop)
|
|
|
|
X = df_clean.drop(columns=['label'])
|
|
y = df_clean['label']
|
|
|
|
le = LabelEncoder()
|
|
y_enc = le.fit_transform(y)
|
|
class_names = le.classes_
|
|
|
|
# 2. Split Data FIRST (Fixing Data Leakage)
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42, stratify=y_enc)
|
|
|
|
# 3. Feature Selection on X_train ONLY
|
|
selected_features = get_selected_feature_names(X_train, y_train)
|
|
|
|
# Apply selection
|
|
X_train = X_train[selected_features]
|
|
X_test = X_test[selected_features]
|
|
print(f"Final Feature Count: {X_train.shape[1]}")
|
|
|
|
# Define Models with Robust Configs
|
|
models = {
|
|
'RandomForest': RandomForestClassifier(n_estimators=200, n_jobs=-1, random_state=42, class_weight='balanced'),
|
|
|
|
'XGBoost': xgb.XGBClassifier(
|
|
n_estimators=300,
|
|
learning_rate=0.05,
|
|
max_depth=7,
|
|
subsample=0.8,
|
|
colsample_bytree=0.8,
|
|
eval_metric='mlogloss',
|
|
n_jobs=-1,
|
|
random_state=42
|
|
),
|
|
|
|
'LightGBM': lgb.LGBMClassifier(
|
|
n_estimators=800,
|
|
learning_rate=0.03,
|
|
num_leaves=63,
|
|
subsample=0.8,
|
|
colsample_bytree=0.8,
|
|
min_child_samples=30,
|
|
class_weight='balanced',
|
|
n_jobs=-1,
|
|
random_state=42,
|
|
verbose=-1
|
|
),
|
|
|
|
'CatBoost': CatBoostClassifier(
|
|
iterations=500,
|
|
learning_rate=0.05,
|
|
depth=6,
|
|
verbose=0,
|
|
random_seed=42,
|
|
auto_class_weights='Balanced'
|
|
)
|
|
}
|
|
|
|
results = []
|
|
trained_models = {}
|
|
|
|
for name, model in models.items():
|
|
print(f"\n--- Training {name} ({dataset_suffix}) ---")
|
|
model.fit(X_train, y_train)
|
|
|
|
# Probabilities needed for Top-K
|
|
if hasattr(model, "predict_proba"):
|
|
probs = model.predict_proba(X_test)
|
|
preds = np.argmax(probs, axis=1)
|
|
# Check k constraints (k must be < n_classes)
|
|
k = 3 if len(class_names) > 3 else 1
|
|
top_k_acc = top_k_accuracy_score(y_test, probs, k=k)
|
|
else:
|
|
preds = model.predict(X_test)
|
|
top_k_acc = 0.0
|
|
|
|
# Metrics
|
|
# Zero division check handled inside sklearn for some versions, but standard accuracy/f1 are fine
|
|
acc = accuracy_score(y_test, preds)
|
|
f1_macro = f1_score(y_test, preds, average='macro', zero_division=0)
|
|
f1_weighted = f1_score(y_test, preds, average='weighted', zero_division=0)
|
|
|
|
print(f"{name} -> Acc: {acc:.4f}, F1-Macro: {f1_macro:.4f}, F1-Weighted: {f1_weighted:.4f}, Top-{k}: {top_k_acc:.4f}")
|
|
|
|
# Store Results
|
|
results.append({
|
|
'Model': name,
|
|
'Dataset': dataset_suffix,
|
|
'Accuracy': acc,
|
|
'F1-Macro': f1_macro,
|
|
'F1-Weighted': f1_weighted,
|
|
f'Top-{k} Acc': top_k_acc
|
|
})
|
|
trained_models[name] = model
|
|
|
|
# Save Model PKL
|
|
pkl_path = os.path.join(folder_path, f"Zimbabwe_{name}_{dataset_suffix}_Model.pkl")
|
|
joblib.dump(model, pkl_path)
|
|
|
|
# VISUALIZATIONS
|
|
plot_confusion_matrix_custom(y_test, preds, class_names, name, folder_path, dataset_suffix)
|
|
plot_per_class_f1(y_test, preds, class_names, name, folder_path, dataset_suffix)
|
|
plot_feature_importance_custom(model, selected_features, name, folder_path, dataset_suffix)
|
|
|
|
# --- Voting Ensemble ---
|
|
print(f"\n--- Training Voting Ensemble (Soft Vote) - {dataset_suffix} ---")
|
|
ensemble = VotingClassifier(
|
|
estimators=[(n, m) for n, m in trained_models.items()],
|
|
voting='soft', n_jobs=-1
|
|
)
|
|
ensemble.fit(X_train, y_train)
|
|
|
|
ens_probs = ensemble.predict_proba(X_test)
|
|
ens_preds = np.argmax(ens_probs, axis=1)
|
|
|
|
k = 3 if len(class_names) > 3 else 1
|
|
ens_acc = accuracy_score(y_test, ens_preds)
|
|
ens_f1 = f1_score(y_test, ens_preds, average='macro', zero_division=0)
|
|
ens_f1_w = f1_score(y_test, ens_preds, average='weighted', zero_division=0)
|
|
ens_topk = top_k_accuracy_score(y_test, ens_probs, k=k)
|
|
|
|
print(f"Ensemble -> Acc: {ens_acc:.4f}, F1-Macro: {ens_f1:.4f}, F1-Weighted: {ens_f1_w:.4f}, Top-{k}: {ens_topk:.4f}")
|
|
|
|
# Save Ensemble & Plots
|
|
joblib.dump(ensemble, os.path.join(folder_path, f"Zimbabwe_Ensemble_{dataset_suffix}_Model.pkl"))
|
|
plot_confusion_matrix_custom(y_test, ens_preds, class_names, "Ensemble", folder_path, dataset_suffix)
|
|
plot_per_class_f1(y_test, ens_preds, class_names, "Ensemble", folder_path, dataset_suffix)
|
|
|
|
# Final Comparison Plot
|
|
results.append({
|
|
'Model': 'Ensemble',
|
|
'Dataset': dataset_suffix,
|
|
'Accuracy': ens_acc,
|
|
'F1-Macro': ens_f1,
|
|
'F1-Weighted': ens_f1_w,
|
|
f'Top-{k} Acc': ens_topk
|
|
})
|
|
|
|
return results
|
|
|
|
# ==========================================
|
|
# MAIN EXECUTION
|
|
# ==========================================
|
|
if __name__ == "__main__":
|
|
print("\n--- STARTING UPDATED CROP MAPPING SCRIPT ---")
|
|
FOLDER_PATH = '/content/drive/MyDrive/GEE_Crop_Mapping_V2_Universal'
|
|
|
|
# 1. Load Data
|
|
df = load_balanced_data(FOLDER_PATH)
|
|
|
|
if df is not None:
|
|
# 2. Feature Engineering Pipeline
|
|
df_smooth, dates = apply_smoothing(df)
|
|
df_pheno = extract_phenology(df_smooth, dates)
|
|
df_harm = add_harmonics(df_pheno, dates)
|
|
df_full = add_interactions_and_windows(df_harm, dates)
|
|
|
|
# Save Engineered Dataset (All columns, before split/selection)
|
|
eng_path = os.path.join(FOLDER_PATH, "Zimbabwe_Crop_Engineered_Ready.csv")
|
|
df_full.to_csv(eng_path, index=False)
|
|
print(f"\nSaved Engineered Dataset (All Features): {eng_path}")
|
|
|
|
# 3. Prepare Two Datasets: Raw vs Scaled
|
|
print("\nPreparing Datasets: Raw vs StandardScaled...")
|
|
|
|
# Raw is just df_full
|
|
df_raw = df_full.copy()
|
|
|
|
# Scaled: Apply StandardScaler to features only
|
|
df_scaled = df_full.copy()
|
|
features = df_scaled.drop(columns=['label']).columns
|
|
scaler = StandardScaler()
|
|
df_scaled[features] = scaler.fit_transform(df_scaled[features])
|
|
|
|
# 4. Train & Compare Both
|
|
all_results = []
|
|
|
|
# Train on Raw
|
|
res_raw = train_and_compare(df_raw, FOLDER_PATH, dataset_suffix="Raw")
|
|
all_results.extend(res_raw)
|
|
|
|
# Train on Scaled
|
|
res_scaled = train_and_compare(df_scaled, FOLDER_PATH, dataset_suffix="Scaled")
|
|
all_results.extend(res_scaled)
|
|
|
|
# 5. Final Combined Comparison
|
|
final_df = pd.DataFrame(all_results)
|
|
print("\nFinal Comparative Results (Raw vs Scaled):")
|
|
print(final_df[['Model', 'Dataset', 'Accuracy', 'F1-Macro']])
|
|
|
|
# Combined Plot
|
|
plt.figure(figsize=(14, 7))
|
|
sns.barplot(data=final_df, x='Model', y='F1-Macro', hue='Dataset', palette='Paired')
|
|
plt.title("Model Comparison: Raw vs StandardScaled Features (F1-Macro)", fontsize=16)
|
|
plt.ylim(0, 1.05)
|
|
plt.grid(axis='y', alpha=0.3)
|
|
plt.legend(title='Dataset Version', loc='lower right')
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(FOLDER_PATH, "Final_Comparison_Raw_vs_Scaled.png"))
|
|
plt.show() |