SpectrochemPy Case Study: PCA on CO Adsorption Data¶
This notebook demonstrates an end-to-end analysis on a real dataset: CO adsorption on Mo/Al2O3. We will:
- Load the example dataset
irdata/CO@Mo_Al2O3.SPG. - Preprocess the data (Smoothing, Baseline Correction).
- Perform PCA to identify trends during adsorption.
- Inspect results to verify against the Workflow Bench.
In [ ]:
Copied!
import spectrochempy as scp
# 1. LOAD DATA
# Using the standard SpectrochemPy example dataset
dataset = scp.read("irdata/CO@Mo_Al2O3.SPG")
print(f"Loaded dataset: {dataset.shape}")
print(f"X-axis: {dataset.x.title} ({dataset.x[0].values:.1f} to {dataset.x[-1].values:.1f} {dataset.x.units})")
print(f"Y-axis: {dataset.y.title} ({len(dataset.y)} samples)")
import spectrochempy as scp
# 1. LOAD DATA
# Using the standard SpectrochemPy example dataset
dataset = scp.read("irdata/CO@Mo_Al2O3.SPG")
print(f"Loaded dataset: {dataset.shape}")
print(f"X-axis: {dataset.x.title} ({dataset.x[0].values:.1f} to {dataset.x[-1].values:.1f} {dataset.x.units})")
print(f"Y-axis: {dataset.y.title} ({len(dataset.y)} samples)")
In [ ]:
Copied!
# 2. PREPROCESSING
# Step 2a: Savitzky-Golay Smoothing
smoothed = dataset.copy()
smoothed.smooth(size=5, order=2)
print(f"Smoothed data (sample 0, point 100): {smoothed.data[0, 100]:.5f}")
# Step 2b: Baseline Correction (Rubberband)
# Rubberband is often better for broad IR features
corrected = smoothed.copy()
corrected.basc(method="rubberband")
print(f"Baseline corrected (sample 0, point 100): {corrected.data[0, 100]:.5f}")
# 2. PREPROCESSING
# Step 2a: Savitzky-Golay Smoothing
smoothed = dataset.copy()
smoothed.smooth(size=5, order=2)
print(f"Smoothed data (sample 0, point 100): {smoothed.data[0, 100]:.5f}")
# Step 2b: Baseline Correction (Rubberband)
# Rubberband is often better for broad IR features
corrected = smoothed.copy()
corrected.basc(method="rubberband")
print(f"Baseline corrected (sample 0, point 100): {corrected.data[0, 100]:.5f}")
In [ ]:
Copied!
# 3. PCA ANALYSIS
pca = scp.PCA(n_components=3)
pca.fit(corrected)
scores = pca.transform(corrected)
explained_var = pca.explained_variance_ratio
print("Explained Variance Ratio:")
for i, val in enumerate(explained_var.data[:3]):
print(f" PC{i+1}: {val:.4f}")
print(f"
PC1 Score (Sample 0): {scores.data[0, 0]:.4f}")
# 3. PCA ANALYSIS
pca = scp.PCA(n_components=3)
pca.fit(corrected)
scores = pca.transform(corrected)
explained_var = pca.explained_variance_ratio
print("Explained Variance Ratio:")
for i, val in enumerate(explained_var.data[:3]):
print(f" PC{i+1}: {val:.4f}")
print(f"
PC1 Score (Sample 0): {scores.data[0, 0]:.4f}")