Advanced Pipeline
This example demonstrates a complete end-to-end pipeline covering preprocessing, feature extraction, and per-participant normalization.
1"""
2Advanced Pipeline Example
3=========================
4
5This example demonstrates a complete end-to-end pipeline:
61. **Preprocessing**: Convert raw gazes to fixations using a pipeline of Wiener/SavGol filters and IDT.
72. **Feature Extraction**: Calculate stats and measures using Extractor.
83. **Normalization**: Normalize features per participant using IndividualNormalization.
94. **Drop Metadata**: Drop metadata columns.
105. **ML Model**: Add a machine learning model (optional).
11"""
12
13import pandas as pd
14from sklearn.pipeline import Pipeline
15
16from eyefeatures.features.extractor import Extractor
17from eyefeatures.features.measures import HurstExponent, SpectralEntropy
18from eyefeatures.features.shift import IndividualNormalization
19from eyefeatures.features.stats import (
20 FixationFeatures,
21 MicroSaccadeFeatures,
22 RegressionFeatures,
23 SaccadeFeatures,
24)
25from eyefeatures.preprocessing.fixation_extraction import IDT
26from eyefeatures.preprocessing.smoothing import WienerFilter
27from eyefeatures.utils import ColumnDropper
28
29# Column Names in Dataset
30X = "norm_pos_x" # x-coordinate
31Y = "norm_pos_y" # y-coordinate
32T_GAZE = "gaze_timestamp" # timestamp for gazes
33T_FIX = "start_time" # timestamp for fixations
34PK = ["Participant", "tekst"] # primary keys
35
36# 1. Load Data.
37gazes_df = pd.read_csv("data/gazes/gazes_subset.csv")
38print("Loaded gazes data:", gazes_df.shape)
39
40# 2. Preprocessing: Gazes -> Fixations.
41smoother = WienerFilter(
42 x=X,
43 y=Y,
44 t=T_GAZE,
45 pk=PK,
46)
47
48fixation_extractor = IDT(
49 min_duration=0.08,
50 max_duration=2.0,
51 max_dispersion=0.20,
52 x=X,
53 y=Y,
54 t=T_GAZE,
55 pk=PK,
56)
57
58# 3. Feature Extraction.
59feature_extractor = Extractor(
60 features=[
61 FixationFeatures(),
62 SaccadeFeatures(),
63 MicroSaccadeFeatures(),
64 RegressionFeatures(),
65 HurstExponent(coordinate=X, n_iters=4),
66 SpectralEntropy(),
67 ],
68 x=X,
69 y=Y,
70 t=T_FIX,
71 duration="duration",
72 dispersion="dispersion",
73 pk=PK,
74 leave_pk=True,
75 return_df=True,
76)
77
78# 4. Normalization per-group (in this example, per-participant).
79normalizer = IndividualNormalization(pk=[PK[0]], inplace=False)
80
81# 5. Drop metadata columns.
82meta_columns = PK + ["start_time", "end_time"]
83dropper = ColumnDropper(columns=meta_columns)
84
85# 6. Apply Pipeline
86pipe = Pipeline(
87 steps=[
88 ("smoother", smoother),
89 ("fixation_extractor", fixation_extractor),
90 ("feature_extractor", feature_extractor),
91 ("normalizer", normalizer),
92 ("dropper", dropper),
93 # ("model", LinearRegression()) <-- add ML model for supervised tasks
94 ]
95)
96
97processed_df = pipe.fit_transform(gazes_df)
98
99print("\nPipeline Complete.")
100print("Processed DataFrame Head:")
101print(processed_df.head())