Advanced Pipeline

This example demonstrates a complete end-to-end pipeline covering preprocessing, feature extraction, and per-participant normalization.

  1"""
  2Advanced Pipeline Example
  3=========================
  4
  5This example demonstrates a complete end-to-end pipeline:
  61.  **Preprocessing**: Convert raw gazes to fixations using a pipeline of Wiener/SavGol filters and IDT.
  72.  **Feature Extraction**: Calculate stats and measures using Extractor.
  83.  **Normalization**: Normalize features per participant using IndividualNormalization.
  94.  **Drop Metadata**: Drop metadata columns.
 105.  **ML Model**: Add a machine learning model (optional).
 11"""
 12
 13import pandas as pd
 14from sklearn.pipeline import Pipeline
 15
 16from eyefeatures.features.extractor import Extractor
 17from eyefeatures.features.measures import HurstExponent, SpectralEntropy
 18from eyefeatures.features.shift import IndividualNormalization
 19from eyefeatures.features.stats import (
 20    FixationFeatures,
 21    MicroSaccadeFeatures,
 22    RegressionFeatures,
 23    SaccadeFeatures,
 24)
 25from eyefeatures.preprocessing.fixation_extraction import IDT
 26from eyefeatures.preprocessing.smoothing import WienerFilter
 27from eyefeatures.utils import ColumnDropper
 28
 29# Column Names in Dataset
 30X = "norm_pos_x"  # x-coordinate
 31Y = "norm_pos_y"  # y-coordinate
 32T_GAZE = "gaze_timestamp"  # timestamp for gazes
 33T_FIX = "start_time"  # timestamp for fixations
 34PK = ["Participant", "tekst"]  # primary keys
 35
 36# 1. Load Data.
 37gazes_df = pd.read_csv("data/gazes/gazes_subset.csv")
 38print("Loaded gazes data:", gazes_df.shape)
 39
 40# 2. Preprocessing: Gazes -> Fixations.
 41smoother = WienerFilter(
 42    x=X,
 43    y=Y,
 44    t=T_GAZE,
 45    pk=PK,
 46)
 47
 48fixation_extractor = IDT(
 49    min_duration=0.08,
 50    max_duration=2.0,
 51    max_dispersion=0.20,
 52    x=X,
 53    y=Y,
 54    t=T_GAZE,
 55    pk=PK,
 56)
 57
 58# 3. Feature Extraction.
 59feature_extractor = Extractor(
 60    features=[
 61        FixationFeatures(),
 62        SaccadeFeatures(),
 63        MicroSaccadeFeatures(),
 64        RegressionFeatures(),
 65        HurstExponent(coordinate=X, n_iters=4),
 66        SpectralEntropy(),
 67    ],
 68    x=X,
 69    y=Y,
 70    t=T_FIX,
 71    duration="duration",
 72    dispersion="dispersion",
 73    pk=PK,
 74    leave_pk=True,
 75    return_df=True,
 76)
 77
 78# 4. Normalization per-group (in this example, per-participant).
 79normalizer = IndividualNormalization(pk=[PK[0]], inplace=False)
 80
 81# 5. Drop metadata columns.
 82meta_columns = PK + ["start_time", "end_time"]
 83dropper = ColumnDropper(columns=meta_columns)
 84
 85# 6. Apply Pipeline
 86pipe = Pipeline(
 87    steps=[
 88        ("smoother", smoother),
 89        ("fixation_extractor", fixation_extractor),
 90        ("feature_extractor", feature_extractor),
 91        ("normalizer", normalizer),
 92        ("dropper", dropper),
 93        # ("model", LinearRegression())  <-- add ML model for supervised tasks
 94    ]
 95)
 96
 97processed_df = pipe.fit_transform(gazes_df)
 98
 99print("\nPipeline Complete.")
100print("Processed DataFrame Head:")
101print(processed_df.head())