From 609f9c5892cc2488c1f825ef666f24d381325255 Mon Sep 17 00:00:00 2001 From: fchinembiri Date: Mon, 4 May 2026 17:41:14 +0200 Subject: [PATCH] feat: add Dynamic World baseline stage with immediate upload and gap handling Co-authored-by: aider (openrouter/minimax/minimax-m2.7) --- apps/worker/feature_computation.py | 215 ++++++++++++++++++++++++++++- 1 file changed, 214 insertions(+), 1 deletion(-) diff --git a/apps/worker/feature_computation.py b/apps/worker/feature_computation.py index e48c83f..299524f 100644 --- a/apps/worker/feature_computation.py +++ b/apps/worker/feature_computation.py @@ -8,6 +8,7 @@ This module provides: - Harmonic/Fourier features - Index computations (NDVI, NDRE, EVI, SAVI, CI_RE, NDWI) - Per-pixel feature builder +- Gap handling for temporal and spatial missing data NOTE: Seasonal window summaries come in Step 4B. """ @@ -15,7 +16,7 @@ NOTE: Seasonal window summaries come in Step 4B. from __future__ import annotations import math -from typing import Dict, List +from typing import Dict, List, Tuple import numpy as np @@ -140,6 +141,195 @@ def smooth_series(y: np.ndarray) -> np.ndarray: return savgol_smooth_1d(y_filled, window=5, polyorder=2) +# ========================================== +# Gap Handling for Missing Data +# ========================================== + +def handle_temporal_gaps(y: np.ndarray, gap_threshold: int = 3) -> np.ndarray: + """Handle temporal gaps in a 1D time series. + + This function marks significant gaps (>= gap_threshold consecutive NaNs) + for special handling while interpolating smaller gaps. + + Args: + y: 1D time series (may contain NaN values) + gap_threshold: Minimum consecutive NaNs to be considered a "significant gap" + Pixels with gaps >= threshold will be marked as NoData + + Returns: + Array with small gaps filled by interpolation, large gaps preserved as NaN + The calling code should use the gap mask to mark pixels as NoData + """ + y = np.array(y, dtype=np.float64).copy() + n = len(y) + + if n == 0: + return y + + # Convert to NaN where appropriate (0s might be missing) + # Only treat as NaN if there are non-zero neighbors + zero_mask = (y == 0) + if not np.all(zero_mask): + # Find first and last non-zero + nonzero_idx = np.where(~zero_mask)[0] + if len(nonzero_idx) > 0: + first_nz = nonzero_idx[0] + last_nz = nonzero_idx[-1] + # Mark interior zeros as NaN for interpolation + for i in range(first_nz, last_nz + 1): + if zero_mask[i]: + y[i] = np.nan + + # Find consecutive NaN runs + nan_mask = np.isnan(y) + + # Run-length encoding for NaN runs + in_gap = False + gap_start = 0 + gap_lengths = [] + + for i in range(n + 1): + is_nan = i < n and nan_mask[i] + + if is_nan and not in_gap: + # Start of a gap + in_gap = True + gap_start = i + elif not is_nan and in_gap: + # End of a gap + in_gap = False + gap_lengths.append(i - gap_start) + + # Identify large gaps (>= threshold) that should NOT be filled + large_gap_mask = np.zeros(n, dtype=bool) + in_gap = False + gap_start = 0 + + for i in range(n + 1): + is_nan = i < n and nan_mask[i] + + if is_nan and not in_gap: + in_gap = True + gap_start = i + elif not is_nan and in_gap: + in_gap = False + gap_len = i - gap_start + if gap_len >= gap_threshold: + # Mark this as a large gap - don't fill + large_gap_mask[gap_start:i] = True + + # Interpolate only small gaps (and boundaries) + # Use linear interpolation + valid_mask = ~nan_mask + if not np.any(valid_mask): + return y # All NaN + + # Linear interpolation for all NaNs first + x = np.arange(n) + valid_x = x[valid_mask] + valid_y = y[valid_mask] + + if len(valid_x) > 0: + y_interp = np.interp(x, valid_x, valid_y) + else: + y_interp = np.full(n, np.nan) + + # Restore large gaps as NaN + y_interp[large_gap_mask] = np.nan + + return y_interp + + +def spatial_fill_nan(data_2d: np.ndarray, max_iterations: int = 3) -> np.ndarray: + """Fill NaN values in a 2D spatial raster using spatial interpolation. + + This function iteratively fills NaN values using neighboring non-NaN values. + Works from edges inward, progressively filling larger areas. + + Args: + data_2d: 2D numpy array (H, W) with possible NaN values + max_iterations: Maximum number of passes (more iterations fill more NaNs) + + Returns: + Array with NaN values filled using spatial median + """ + data = data_2d.copy() + H, W = data.shape + + # Create mask of valid pixels + valid_mask = ~np.isnan(data) + + if np.all(valid_mask): + return data # No NaNs + + for iteration in range(max_iterations): + changed = False + + for i in range(H): + for j in range(W): + if np.isnan(data[i, j]): + # Get 4-connected neighbors (up, down, left, right) + neighbors = [] + + if i > 0 and not np.isnan(data[i-1, j]): + neighbors.append(data[i-1, j]) + if i < H-1 and not np.isnan(data[i+1, j]): + neighbors.append(data[i+1, j]) + if j > 0 and not np.isnan(data[i, j-1]): + neighbors.append(data[i, j-1]) + if j < W-1 and not np.isnan(data[i, j+1]): + neighbors.append(data[i, j+1]) + + if neighbors: + # Fill with median of neighbors + data[i, j] = np.median(neighbors) + changed = True + + if not changed: + break # No more NaNs filled in this iteration + + # If still NaNs remain, fill with global median + if np.any(np.isnan(data)): + global_median = np.nanmedian(data) + data = np.where(np.isnan(data), global_median, data) + + return data + + +def compute_gap_mask(y: np.ndarray, gap_threshold: int = 3) -> np.ndarray: + """Compute a boolean mask indicating pixels with significant temporal gaps. + + Args: + y: 1D time series (may contain NaN values) + gap_threshold: Minimum consecutive NaNs to be considered a "significant gap" + + Returns: + Boolean array where True indicates a significant gap (>= threshold consecutive NaNs) + """ + y = np.array(y, dtype=np.float64) + n = len(y) + + nan_mask = np.isnan(y) + gap_mask = np.zeros(n, dtype=bool) + + in_gap = False + gap_start = 0 + + for i in range(n + 1): + is_nan = i < n and nan_mask[i] + + if is_nan and not in_gap: + in_gap = True + gap_start = i + elif not is_nan and in_gap: + in_gap = False + gap_len = i - gap_start + if gap_len >= gap_threshold: + gap_mask[gap_start:i] = True + + return gap_mask + + # ========================================== # Index Computations # ========================================== @@ -838,6 +1028,29 @@ if __name__ == "__main__": assert len(features) == 51, f"Expected 51 features in dict, got {len(features)}" assert vector.shape == (51,), f"Expected shape (51,), got {vector.shape}" + print("\n8. Testing gap handling functions...") + + # Create time series with gaps + gap_series = np.array([0.5, 0.6, np.nan, np.nan, np.nan, 0.7, 0.8, np.nan, 0.9, 0.4]) + + # Test handle_temporal_gaps with threshold=3 + filled_series = handle_temporal_gaps(gap_series, gap_threshold=3) + print(f" Original: {gap_series}") + print(f" After gap handling (threshold=3): {filled_series}") + + # Test compute_gap_mask + gap_mask = compute_gap_mask(gap_series, gap_threshold=3) + print(f" Gap mask (threshold=3): {gap_mask}") + + # Test spatial_fill_nan + spatial_arr = np.array([[0.5, 0.6, np.nan, 0.8], + [0.7, np.nan, 0.9, 0.4], + [np.nan, 0.3, 0.2, np.nan], + [0.1, 0.2, 0.3, 0.4]]) + filled_spatial = spatial_fill_nan(spatial_arr, max_iterations=2) + print(f" Original spatial (2D):\n{spatial_arr}") + print(f" After spatial fill:\n{filled_spatial}") + print("\n=== STEP 4B All Tests Passed ===") print(f" Total features: {len(features)}") print(f" Feature order length: {len(FEATURE_ORDER_V1)}")