Source
!pip install --upgrade seabornRequirement already satisfied: seaborn in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (0.13.2)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from seaborn) (1.26.4)
Requirement already satisfied: pandas>=1.2 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from seaborn) (2.2.3)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from seaborn) (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.2)
Requirement already satisfied: cycler>=0.10 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.58.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.8)
Requirement already satisfied: packaging>=20.0 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (25.0)
Requirement already satisfied: pillow>=8 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (11.2.1)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.3)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from pandas>=1.2->seaborn) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from pandas>=1.2->seaborn) (2025.2)
Requirement already satisfied: six>=1.5 in c:\users\danyl\anaconda3\envs\glq-book\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)
Atelier 2 - Dégroupement¶
📘 Analyse du Dégroupement sur un Gisement Synthétique Spatialement Corrélé¶
Dans cet atelier, nous explorerons les effets de l’échantillonnage et du dégroupement sur un gisement spatialement corrélé suivant une distribution marginale log-normale. Nous analyserons comment la taille des cellules de dégroupement influence les estimations de la moyenne et de la variance des échantillons. Les participants apprendront à visualiser et interpréter les résultats à travers des graphiques interactifs.
🖼️ Descriptions des 4 figures¶
Carte du champ log-normal avec échantillons :
Cette figure montre la distribution spatiale du champ log-normal simulé, avec les emplacements des échantillons aléatoires (en noir) et ceux ciblant une zone à forte valeur (hotspot, en rouge).Histogrammes et fonctions de répartition cumulée (CDF) :
On compare ici la distribution des valeurs du champ complet avec celles des échantillons, en incluant une version pondérée si le dégroupement est activé, pour visualiser les biais d’échantillonnage.Effet de la taille de cellule sur la moyenne pondérée :
Cette courbe montre comment la moyenne des échantillons varie en fonction de la taille des cellules utilisées pour le dégroupement, comparée à la moyenne globale du champ.Effet de la taille de cellule sur la variance pondérée :
De manière similaire, cette figure illustre l’impact de la taille de cellule sur la variance estimée des échantillons, en la comparant à la variance réelle du champ.
📝 Note : Après chaque modification des paramètres, cliquez sur le bouton “Tracer” (en bleu) pour actualiser les figures.
Source
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output
# === Fonctions principales ===
def spherical_cov(h, range_):
c = np.zeros_like(h)
mask = h < range_
hr = h[mask] / range_
c[mask] = 1 - 1.5 * hr + 0.5 * hr**3
return c
def fftma_spherical(n, range_, sigma=1.0, seed=None):
if seed is not None:
np.random.seed(seed)
N = 2 * n
x = np.arange(N)
y = np.arange(N)
X, Y = np.meshgrid(x, y)
dist_x = np.minimum(X, N - X)
dist_y = np.minimum(Y, N - Y)
h = np.sqrt(dist_x**2 + dist_y**2)
cov = spherical_cov(h, range_)
cov = cov / cov.max()
fft_cov = np.fft.fft2(cov)
white_noise = np.random.normal(0, 1, (N, N))
fft_noise = np.fft.fft2(white_noise)
fft_field = np.sqrt(np.abs(fft_cov)) * fft_noise
field = np.fft.ifft2(fft_field).real
field = field[:n, :n]
field = (field - np.mean(field)) / np.std(field)
field = field * sigma
return field
def generate_field(n, range_, mean=1.0, var=1.0, seed=None, distribution='lognormal'):
"""Génère un champ normal ou log-normal avec moyenne et variance exactes."""
gauss_field = fftma_spherical(n, range_, sigma=1.0, seed=seed)
if distribution == 'normal':
field = mean + np.sqrt(var) * gauss_field
elif distribution == 'lognormal':
sigma_ln = np.sqrt(np.log(1 + var / mean**2))
mu_ln = np.log(mean) - 0.5 * sigma_ln**2
field = np.exp(mu_ln + sigma_ln * gauss_field)
else:
raise ValueError("distribution doit être 'normal' ou 'lognormal'")
return gauss_field, field
def compute_cell_declustering_weights(samples, n_cells):
cells = {}
weights = np.zeros(len(samples))
for i, (x, y) in enumerate(samples):
cx = int(x / n_cells)
cy = int(y / n_cells)
key = (cx, cy)
if key not in cells:
cells[key] = []
cells[key].append(i)
for indices in cells.values():
w = 1.0 / len(indices)
for idx in indices:
weights[idx] = w
return weights
def find_specialspot_location(field, spot_size, spot_type='hotspot'):
n = field.shape[0]
if spot_type == 'hotspot':
pos = np.unravel_index(np.argmax(field), field.shape)
elif spot_type == 'coldspot':
pos = np.unravel_index(np.argmin(field), field.shape)
else:
raise ValueError("spot_type doit être 'hotspot' ou 'coldspot'")
x0 = max(0, pos[0] - spot_size // 2)
y0 = max(0, pos[1] - spot_size // 2)
x1 = min(n, x0 + spot_size)
y1 = min(n, y0 + spot_size)
return x0, y0, x1, y1
def interactive_sampling_fixed_spot(
n, range_, mean, var, n_samples_all,
n_samples_spot, spot_size, declust_cells,
show_declustering, seed, spot_type, distribution
):
gauss_field, field = generate_field(n, range_, mean=mean, var=var, seed=seed, distribution=distribution)
# Définir hotspot/coldspot
x0, y0, x1, y1 = find_specialspot_location(field, spot_size, spot_type=spot_type)
# Coordonnées indices
indices_all = np.array([(i,j) for i in range(n) for j in range(n)])
rng = np.random.default_rng(seed)
# Échantillons aléatoires hors spot
sampled_indices_all = rng.choice(len(indices_all), size=n_samples_all, replace=False)
samples_all = indices_all[sampled_indices_all]
samples_all_values = field[samples_all[:,0], samples_all[:,1]]
# Échantillons dans le spot
indices_spot = np.array([(i,j) for i in range(x0, x1) for j in range(y0, y1)])
if len(indices_spot) == 0:
samples_spot = np.empty((0,2), dtype=int)
samples_spot_values = np.array([])
else:
n_samples_spot = min(n_samples_spot, len(indices_spot))
sampled_indices_spot = rng.choice(len(indices_spot), size=n_samples_spot, replace=False)
samples_spot = indices_spot[sampled_indices_spot]
samples_spot_values = field[samples_spot[:,0], samples_spot[:,1]]
# Combiner échantillons
combined_samples = np.vstack([samples_all, samples_spot])
combined_values = np.concatenate([samples_all_values, samples_spot_values])
# Dégroupement avec plusieurs translations (debiaisement)
n_shifts = 5
def debiased_weights(samples, cell_size):
mean_list = []
for _ in range(n_shifts):
shift_x = rng.integers(0, cell_size)
shift_y = rng.integers(0, cell_size)
shifted = samples.copy()
shifted[:,0] = (shifted[:,0] + shift_x) % n
shifted[:,1] = (shifted[:,1] + shift_y) % n
w = compute_cell_declustering_weights(shifted, cell_size)
w /= w.sum()
mean_list.append(np.sum(w*combined_values))
return np.mean(mean_list)
# Moyenne/variance pondérée vs taille cellule
cell_sizes = np.arange(1, n//2)
means = [debiased_weights(combined_samples, c) for c in cell_sizes]
variances = []
for c in cell_sizes:
var_list = []
for _ in range(n_shifts):
shift_x = rng.integers(0, c)
shift_y = rng.integers(0, c)
shifted = combined_samples.copy()
shifted[:,0] = (shifted[:,0] + shift_x) % n
shifted[:,1] = (shifted[:,1] + shift_y) % n
w = compute_cell_declustering_weights(shifted, c)
w /= w.sum()
mean_c = np.sum(w*combined_values)
var_list.append(np.sum(w*(combined_values - mean_c)**2))
variances.append(np.mean(var_list))
# === Affichage ===
fig, axs = plt.subplots(2,2, figsize=(10,8))
# Carte
ax = axs[0,0]
im = ax.imshow(field, cmap='jet', origin='lower')
ax.scatter(samples_all[:,1], samples_all[:,0], facecolors='none', edgecolors='black', s=40)
ax.scatter(samples_spot[:,1], samples_spot[:,0], facecolors='none', edgecolors='black', s=40)
ax.set_title("Gisement avec échantillons")
ax.set_xlabel('X'); ax.set_ylabel('Y')
fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label=f'Valeur {distribution}')
# Histogrammes & CDF
ax = axs[0,1]
bins = np.linspace(np.min(field), np.percentile(field,99.5),30)
sns.histplot(field.flatten(), bins=bins, stat="density", color='gray', label='Champ complet', ax=ax, alpha=0.3)
sns.histplot(combined_values, bins=bins, stat="density", color='blue', label='Échantillons', ax=ax, alpha=0.4)
if show_declustering:
w = compute_cell_declustering_weights(combined_samples, declust_cells)
w /= w.sum()
ax.hist(combined_values, bins=bins, weights=w, density=True, color='green', alpha=0.5, edgecolor='black', linewidth=0.5, label='Dégroupés')
ax.set_xlabel("Valeur"); ax.set_ylabel("Densité")
ax2 = ax.twinx()
sorted_all = np.sort(field.flatten())
sorted_non = np.sort(combined_values)
sorted_idx = np.argsort(combined_values)
cdf_all = np.linspace(0,1,len(sorted_all))
cdf_non = np.linspace(0,1,len(sorted_non))
cdf_w = np.cumsum(w[sorted_idx]) if show_declustering else np.zeros_like(cdf_non)
ax2.plot(sorted_all, cdf_all, 'k--', lw=2)
ax2.plot(sorted_non, cdf_non, 'b-', lw=2)
if show_declustering:
ax2.plot(sorted_non, cdf_w, 'g-', lw=2)
ax2.set_ylim(0,1); ax2.set_ylabel("CDF")
# Moyenne pondérée
ax = axs[1,0]
ax.plot(cell_sizes, means, '-o', color='purple')
#ax.axvline(declust_cells, color='red', linestyle='--', label=f'Taille cellule={declust_cells}')
#ax.axhline(np.mean(combined_values), color='blue', linestyle='--', label='Moyenne non pondérée')
#ax.axhline(np.mean(field), color='black', linestyle=':', label='Moyenne globale')
ax.set_xlabel("Taille cellule"); ax.set_ylabel("Moyenne pondérée")
ax.grid(True); #ax.legend()
# Variance pondérée
ax = axs[1,1]
ax.plot(cell_sizes, variances, '-o', color='darkgreen')
#ax.axvline(declust_cells, color='red', linestyle='--')
#ax.axhline(np.var(combined_values), color='blue', linestyle='--', label='Variance non pondérée')
#ax.axhline(np.var(field), color='black', linestyle=':', label='Variance globale')
ax.set_xlabel("Taille cellule"); ax.set_ylabel("Variance pondérée")
ax.grid(True); #ax.legend()
plt.tight_layout(); plt.show()
# === Initialisation paramètres ===
_n = 100
_range_ = 50
_sigma = 0.6
_mean = 2.0
_seed = 42
_spot_size = 25
# === Widgets ===
range_w = widgets.IntText(value=_range_, description="Portée", layout=widgets.Layout(width='180px'))
sigma_w = widgets.FloatText(value=_sigma, description="Variance", layout=widgets.Layout(width='180px'))
mean_w = widgets.FloatText(value=_mean, description="Moyenne", layout=widgets.Layout(width='180px'))
n_samples_all_w = widgets.IntText(value=100, description="N aléatoire", layout=widgets.Layout(width='180px'))
n_samples_spot_w = widgets.IntText(value=50, description="N spot", layout=widgets.Layout(width='180px'))
declust_cells_w = widgets.IntText(value=10, description="Taille cellule", layout=widgets.Layout(width='180px'))
show_declustering_w = widgets.Checkbox(value=False, description="Afficher dégroupement")
spot_type_w = widgets.ToggleButtons(
options=[('Hotspot (fortes valeurs)', 'hotspot'), ('Coldspot (faibles valeurs)', 'coldspot')],
description='Zone spéciale:',
button_style='',
layout=widgets.Layout(width='280px')
)
distribution_w = widgets.Dropdown(
options=[('Lognormal', 'lognormal'), ('Normal', 'normal')],
value='lognormal',
description='Distribution:',
layout=widgets.Layout(width='180px')
)
# Graine dynamique
current_seed = {'val': _seed}
run_button = widgets.Button(description="Tracer", button_style='primary')
random_seed_button = widgets.Button(description="Tracer avec nouvelle graine", button_style='warning')
output = widgets.Output()
def on_run_clicked(b):
with output:
clear_output()
interactive_sampling_fixed_spot(
n=_n,
range_=range_w.value,
mean=mean_w.value,
var=sigma_w.value,
n_samples_all=n_samples_all_w.value,
n_samples_spot=n_samples_spot_w.value,
spot_size=_spot_size,
declust_cells=declust_cells_w.value,
show_declustering=show_declustering_w.value,
seed=current_seed['val'],
spot_type=spot_type_w.value,
distribution=distribution_w.value
)
def on_random_seed_clicked(b):
current_seed['val'] = np.random.randint(0, 100000)
on_run_clicked(b)
run_button.on_click(on_run_clicked)
random_seed_button.on_click(on_random_seed_clicked)
ui = widgets.VBox([
widgets.HBox([range_w, sigma_w, mean_w]),
widgets.HBox([n_samples_all_w, n_samples_spot_w, declust_cells_w]),
widgets.HBox([distribution_w]),
widgets.HBox([show_declustering_w, spot_type_w]),
widgets.HBox([run_button, random_seed_button]),
output
])
display(ui)