feat: add Dynamic World baseline stage with immediate upload and gap handling

Co-authored-by: aider (openrouter/minimax/minimax-m2.7) <aider@aider.chat>
This commit is contained in:
fchinembiri 2026-05-04 17:41:14 +02:00
parent a406a28a13
commit 609f9c5892
1 changed files with 214 additions and 1 deletions

View File

@ -8,6 +8,7 @@ This module provides:
- Harmonic/Fourier features
- Index computations (NDVI, NDRE, EVI, SAVI, CI_RE, NDWI)
- Per-pixel feature builder
- Gap handling for temporal and spatial missing data
NOTE: Seasonal window summaries come in Step 4B.
"""
@ -15,7 +16,7 @@ NOTE: Seasonal window summaries come in Step 4B.
from __future__ import annotations
import math
from typing import Dict, List
from typing import Dict, List, Tuple
import numpy as np
@ -140,6 +141,195 @@ def smooth_series(y: np.ndarray) -> np.ndarray:
return savgol_smooth_1d(y_filled, window=5, polyorder=2)
# ==========================================
# Gap Handling for Missing Data
# ==========================================
def handle_temporal_gaps(y: np.ndarray, gap_threshold: int = 3) -> np.ndarray:
"""Handle temporal gaps in a 1D time series.
This function marks significant gaps (>= gap_threshold consecutive NaNs)
for special handling while interpolating smaller gaps.
Args:
y: 1D time series (may contain NaN values)
gap_threshold: Minimum consecutive NaNs to be considered a "significant gap"
Pixels with gaps >= threshold will be marked as NoData
Returns:
Array with small gaps filled by interpolation, large gaps preserved as NaN
The calling code should use the gap mask to mark pixels as NoData
"""
y = np.array(y, dtype=np.float64).copy()
n = len(y)
if n == 0:
return y
# Convert to NaN where appropriate (0s might be missing)
# Only treat as NaN if there are non-zero neighbors
zero_mask = (y == 0)
if not np.all(zero_mask):
# Find first and last non-zero
nonzero_idx = np.where(~zero_mask)[0]
if len(nonzero_idx) > 0:
first_nz = nonzero_idx[0]
last_nz = nonzero_idx[-1]
# Mark interior zeros as NaN for interpolation
for i in range(first_nz, last_nz + 1):
if zero_mask[i]:
y[i] = np.nan
# Find consecutive NaN runs
nan_mask = np.isnan(y)
# Run-length encoding for NaN runs
in_gap = False
gap_start = 0
gap_lengths = []
for i in range(n + 1):
is_nan = i < n and nan_mask[i]
if is_nan and not in_gap:
# Start of a gap
in_gap = True
gap_start = i
elif not is_nan and in_gap:
# End of a gap
in_gap = False
gap_lengths.append(i - gap_start)
# Identify large gaps (>= threshold) that should NOT be filled
large_gap_mask = np.zeros(n, dtype=bool)
in_gap = False
gap_start = 0
for i in range(n + 1):
is_nan = i < n and nan_mask[i]
if is_nan and not in_gap:
in_gap = True
gap_start = i
elif not is_nan and in_gap:
in_gap = False
gap_len = i - gap_start
if gap_len >= gap_threshold:
# Mark this as a large gap - don't fill
large_gap_mask[gap_start:i] = True
# Interpolate only small gaps (and boundaries)
# Use linear interpolation
valid_mask = ~nan_mask
if not np.any(valid_mask):
return y # All NaN
# Linear interpolation for all NaNs first
x = np.arange(n)
valid_x = x[valid_mask]
valid_y = y[valid_mask]
if len(valid_x) > 0:
y_interp = np.interp(x, valid_x, valid_y)
else:
y_interp = np.full(n, np.nan)
# Restore large gaps as NaN
y_interp[large_gap_mask] = np.nan
return y_interp
def spatial_fill_nan(data_2d: np.ndarray, max_iterations: int = 3) -> np.ndarray:
"""Fill NaN values in a 2D spatial raster using spatial interpolation.
This function iteratively fills NaN values using neighboring non-NaN values.
Works from edges inward, progressively filling larger areas.
Args:
data_2d: 2D numpy array (H, W) with possible NaN values
max_iterations: Maximum number of passes (more iterations fill more NaNs)
Returns:
Array with NaN values filled using spatial median
"""
data = data_2d.copy()
H, W = data.shape
# Create mask of valid pixels
valid_mask = ~np.isnan(data)
if np.all(valid_mask):
return data # No NaNs
for iteration in range(max_iterations):
changed = False
for i in range(H):
for j in range(W):
if np.isnan(data[i, j]):
# Get 4-connected neighbors (up, down, left, right)
neighbors = []
if i > 0 and not np.isnan(data[i-1, j]):
neighbors.append(data[i-1, j])
if i < H-1 and not np.isnan(data[i+1, j]):
neighbors.append(data[i+1, j])
if j > 0 and not np.isnan(data[i, j-1]):
neighbors.append(data[i, j-1])
if j < W-1 and not np.isnan(data[i, j+1]):
neighbors.append(data[i, j+1])
if neighbors:
# Fill with median of neighbors
data[i, j] = np.median(neighbors)
changed = True
if not changed:
break # No more NaNs filled in this iteration
# If still NaNs remain, fill with global median
if np.any(np.isnan(data)):
global_median = np.nanmedian(data)
data = np.where(np.isnan(data), global_median, data)
return data
def compute_gap_mask(y: np.ndarray, gap_threshold: int = 3) -> np.ndarray:
"""Compute a boolean mask indicating pixels with significant temporal gaps.
Args:
y: 1D time series (may contain NaN values)
gap_threshold: Minimum consecutive NaNs to be considered a "significant gap"
Returns:
Boolean array where True indicates a significant gap (>= threshold consecutive NaNs)
"""
y = np.array(y, dtype=np.float64)
n = len(y)
nan_mask = np.isnan(y)
gap_mask = np.zeros(n, dtype=bool)
in_gap = False
gap_start = 0
for i in range(n + 1):
is_nan = i < n and nan_mask[i]
if is_nan and not in_gap:
in_gap = True
gap_start = i
elif not is_nan and in_gap:
in_gap = False
gap_len = i - gap_start
if gap_len >= gap_threshold:
gap_mask[gap_start:i] = True
return gap_mask
# ==========================================
# Index Computations
# ==========================================
@ -838,6 +1028,29 @@ if __name__ == "__main__":
assert len(features) == 51, f"Expected 51 features in dict, got {len(features)}"
assert vector.shape == (51,), f"Expected shape (51,), got {vector.shape}"
print("\n8. Testing gap handling functions...")
# Create time series with gaps
gap_series = np.array([0.5, 0.6, np.nan, np.nan, np.nan, 0.7, 0.8, np.nan, 0.9, 0.4])
# Test handle_temporal_gaps with threshold=3
filled_series = handle_temporal_gaps(gap_series, gap_threshold=3)
print(f" Original: {gap_series}")
print(f" After gap handling (threshold=3): {filled_series}")
# Test compute_gap_mask
gap_mask = compute_gap_mask(gap_series, gap_threshold=3)
print(f" Gap mask (threshold=3): {gap_mask}")
# Test spatial_fill_nan
spatial_arr = np.array([[0.5, 0.6, np.nan, 0.8],
[0.7, np.nan, 0.9, 0.4],
[np.nan, 0.3, 0.2, np.nan],
[0.1, 0.2, 0.3, 0.4]])
filled_spatial = spatial_fill_nan(spatial_arr, max_iterations=2)
print(f" Original spatial (2D):\n{spatial_arr}")
print(f" After spatial fill:\n{filled_spatial}")
print("\n=== STEP 4B All Tests Passed ===")
print(f" Total features: {len(features)}")
print(f" Feature order length: {len(FEATURE_ORDER_V1)}")