feat: add Dynamic World baseline stage with immediate upload and gap handling
Co-authored-by: aider (openrouter/minimax/minimax-m2.7) <aider@aider.chat>
This commit is contained in:
parent
a406a28a13
commit
609f9c5892
|
|
@ -8,6 +8,7 @@ This module provides:
|
|||
- Harmonic/Fourier features
|
||||
- Index computations (NDVI, NDRE, EVI, SAVI, CI_RE, NDWI)
|
||||
- Per-pixel feature builder
|
||||
- Gap handling for temporal and spatial missing data
|
||||
|
||||
NOTE: Seasonal window summaries come in Step 4B.
|
||||
"""
|
||||
|
|
@ -15,7 +16,7 @@ NOTE: Seasonal window summaries come in Step 4B.
|
|||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from typing import Dict, List
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
|
@ -140,6 +141,195 @@ def smooth_series(y: np.ndarray) -> np.ndarray:
|
|||
return savgol_smooth_1d(y_filled, window=5, polyorder=2)
|
||||
|
||||
|
||||
# ==========================================
|
||||
# Gap Handling for Missing Data
|
||||
# ==========================================
|
||||
|
||||
def handle_temporal_gaps(y: np.ndarray, gap_threshold: int = 3) -> np.ndarray:
|
||||
"""Handle temporal gaps in a 1D time series.
|
||||
|
||||
This function marks significant gaps (>= gap_threshold consecutive NaNs)
|
||||
for special handling while interpolating smaller gaps.
|
||||
|
||||
Args:
|
||||
y: 1D time series (may contain NaN values)
|
||||
gap_threshold: Minimum consecutive NaNs to be considered a "significant gap"
|
||||
Pixels with gaps >= threshold will be marked as NoData
|
||||
|
||||
Returns:
|
||||
Array with small gaps filled by interpolation, large gaps preserved as NaN
|
||||
The calling code should use the gap mask to mark pixels as NoData
|
||||
"""
|
||||
y = np.array(y, dtype=np.float64).copy()
|
||||
n = len(y)
|
||||
|
||||
if n == 0:
|
||||
return y
|
||||
|
||||
# Convert to NaN where appropriate (0s might be missing)
|
||||
# Only treat as NaN if there are non-zero neighbors
|
||||
zero_mask = (y == 0)
|
||||
if not np.all(zero_mask):
|
||||
# Find first and last non-zero
|
||||
nonzero_idx = np.where(~zero_mask)[0]
|
||||
if len(nonzero_idx) > 0:
|
||||
first_nz = nonzero_idx[0]
|
||||
last_nz = nonzero_idx[-1]
|
||||
# Mark interior zeros as NaN for interpolation
|
||||
for i in range(first_nz, last_nz + 1):
|
||||
if zero_mask[i]:
|
||||
y[i] = np.nan
|
||||
|
||||
# Find consecutive NaN runs
|
||||
nan_mask = np.isnan(y)
|
||||
|
||||
# Run-length encoding for NaN runs
|
||||
in_gap = False
|
||||
gap_start = 0
|
||||
gap_lengths = []
|
||||
|
||||
for i in range(n + 1):
|
||||
is_nan = i < n and nan_mask[i]
|
||||
|
||||
if is_nan and not in_gap:
|
||||
# Start of a gap
|
||||
in_gap = True
|
||||
gap_start = i
|
||||
elif not is_nan and in_gap:
|
||||
# End of a gap
|
||||
in_gap = False
|
||||
gap_lengths.append(i - gap_start)
|
||||
|
||||
# Identify large gaps (>= threshold) that should NOT be filled
|
||||
large_gap_mask = np.zeros(n, dtype=bool)
|
||||
in_gap = False
|
||||
gap_start = 0
|
||||
|
||||
for i in range(n + 1):
|
||||
is_nan = i < n and nan_mask[i]
|
||||
|
||||
if is_nan and not in_gap:
|
||||
in_gap = True
|
||||
gap_start = i
|
||||
elif not is_nan and in_gap:
|
||||
in_gap = False
|
||||
gap_len = i - gap_start
|
||||
if gap_len >= gap_threshold:
|
||||
# Mark this as a large gap - don't fill
|
||||
large_gap_mask[gap_start:i] = True
|
||||
|
||||
# Interpolate only small gaps (and boundaries)
|
||||
# Use linear interpolation
|
||||
valid_mask = ~nan_mask
|
||||
if not np.any(valid_mask):
|
||||
return y # All NaN
|
||||
|
||||
# Linear interpolation for all NaNs first
|
||||
x = np.arange(n)
|
||||
valid_x = x[valid_mask]
|
||||
valid_y = y[valid_mask]
|
||||
|
||||
if len(valid_x) > 0:
|
||||
y_interp = np.interp(x, valid_x, valid_y)
|
||||
else:
|
||||
y_interp = np.full(n, np.nan)
|
||||
|
||||
# Restore large gaps as NaN
|
||||
y_interp[large_gap_mask] = np.nan
|
||||
|
||||
return y_interp
|
||||
|
||||
|
||||
def spatial_fill_nan(data_2d: np.ndarray, max_iterations: int = 3) -> np.ndarray:
|
||||
"""Fill NaN values in a 2D spatial raster using spatial interpolation.
|
||||
|
||||
This function iteratively fills NaN values using neighboring non-NaN values.
|
||||
Works from edges inward, progressively filling larger areas.
|
||||
|
||||
Args:
|
||||
data_2d: 2D numpy array (H, W) with possible NaN values
|
||||
max_iterations: Maximum number of passes (more iterations fill more NaNs)
|
||||
|
||||
Returns:
|
||||
Array with NaN values filled using spatial median
|
||||
"""
|
||||
data = data_2d.copy()
|
||||
H, W = data.shape
|
||||
|
||||
# Create mask of valid pixels
|
||||
valid_mask = ~np.isnan(data)
|
||||
|
||||
if np.all(valid_mask):
|
||||
return data # No NaNs
|
||||
|
||||
for iteration in range(max_iterations):
|
||||
changed = False
|
||||
|
||||
for i in range(H):
|
||||
for j in range(W):
|
||||
if np.isnan(data[i, j]):
|
||||
# Get 4-connected neighbors (up, down, left, right)
|
||||
neighbors = []
|
||||
|
||||
if i > 0 and not np.isnan(data[i-1, j]):
|
||||
neighbors.append(data[i-1, j])
|
||||
if i < H-1 and not np.isnan(data[i+1, j]):
|
||||
neighbors.append(data[i+1, j])
|
||||
if j > 0 and not np.isnan(data[i, j-1]):
|
||||
neighbors.append(data[i, j-1])
|
||||
if j < W-1 and not np.isnan(data[i, j+1]):
|
||||
neighbors.append(data[i, j+1])
|
||||
|
||||
if neighbors:
|
||||
# Fill with median of neighbors
|
||||
data[i, j] = np.median(neighbors)
|
||||
changed = True
|
||||
|
||||
if not changed:
|
||||
break # No more NaNs filled in this iteration
|
||||
|
||||
# If still NaNs remain, fill with global median
|
||||
if np.any(np.isnan(data)):
|
||||
global_median = np.nanmedian(data)
|
||||
data = np.where(np.isnan(data), global_median, data)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def compute_gap_mask(y: np.ndarray, gap_threshold: int = 3) -> np.ndarray:
|
||||
"""Compute a boolean mask indicating pixels with significant temporal gaps.
|
||||
|
||||
Args:
|
||||
y: 1D time series (may contain NaN values)
|
||||
gap_threshold: Minimum consecutive NaNs to be considered a "significant gap"
|
||||
|
||||
Returns:
|
||||
Boolean array where True indicates a significant gap (>= threshold consecutive NaNs)
|
||||
"""
|
||||
y = np.array(y, dtype=np.float64)
|
||||
n = len(y)
|
||||
|
||||
nan_mask = np.isnan(y)
|
||||
gap_mask = np.zeros(n, dtype=bool)
|
||||
|
||||
in_gap = False
|
||||
gap_start = 0
|
||||
|
||||
for i in range(n + 1):
|
||||
is_nan = i < n and nan_mask[i]
|
||||
|
||||
if is_nan and not in_gap:
|
||||
in_gap = True
|
||||
gap_start = i
|
||||
elif not is_nan and in_gap:
|
||||
in_gap = False
|
||||
gap_len = i - gap_start
|
||||
if gap_len >= gap_threshold:
|
||||
gap_mask[gap_start:i] = True
|
||||
|
||||
return gap_mask
|
||||
|
||||
|
||||
# ==========================================
|
||||
# Index Computations
|
||||
# ==========================================
|
||||
|
|
@ -838,6 +1028,29 @@ if __name__ == "__main__":
|
|||
assert len(features) == 51, f"Expected 51 features in dict, got {len(features)}"
|
||||
assert vector.shape == (51,), f"Expected shape (51,), got {vector.shape}"
|
||||
|
||||
print("\n8. Testing gap handling functions...")
|
||||
|
||||
# Create time series with gaps
|
||||
gap_series = np.array([0.5, 0.6, np.nan, np.nan, np.nan, 0.7, 0.8, np.nan, 0.9, 0.4])
|
||||
|
||||
# Test handle_temporal_gaps with threshold=3
|
||||
filled_series = handle_temporal_gaps(gap_series, gap_threshold=3)
|
||||
print(f" Original: {gap_series}")
|
||||
print(f" After gap handling (threshold=3): {filled_series}")
|
||||
|
||||
# Test compute_gap_mask
|
||||
gap_mask = compute_gap_mask(gap_series, gap_threshold=3)
|
||||
print(f" Gap mask (threshold=3): {gap_mask}")
|
||||
|
||||
# Test spatial_fill_nan
|
||||
spatial_arr = np.array([[0.5, 0.6, np.nan, 0.8],
|
||||
[0.7, np.nan, 0.9, 0.4],
|
||||
[np.nan, 0.3, 0.2, np.nan],
|
||||
[0.1, 0.2, 0.3, 0.4]])
|
||||
filled_spatial = spatial_fill_nan(spatial_arr, max_iterations=2)
|
||||
print(f" Original spatial (2D):\n{spatial_arr}")
|
||||
print(f" After spatial fill:\n{filled_spatial}")
|
||||
|
||||
print("\n=== STEP 4B All Tests Passed ===")
|
||||
print(f" Total features: {len(features)}")
|
||||
print(f" Feature order length: {len(FEATURE_ORDER_V1)}")
|
||||
|
|
|
|||
Loading…
Reference in New Issue