Source code for eyefeatures.features.pairwise

from collections.abc import Callable

import numpy as np
import pandas as pd
from numpy.typing import NDArray
from scipy.cluster.hierarchy import leaves_list, linkage, optimal_leaf_ordering
from scipy.linalg import sqrtm
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigsh
from sklearn.manifold import MDS
from tqdm import tqdm


# ======================== SIMILARITY MATRIX ========================

[docs]
def restore_matrix(matrix: NDArray, tol=1e-9):
    """Estimates A assuming 'matrix' equals :math:`A^T A`."""
    # Get eigenvectors and eigenvalues
    evals, evecs = np.linalg.eigh(matrix)

    # Sort by decrease of eigenvalue modulus
    sorted_evals_indexes = np.argsort(evals)[::-1]
    # sorted_evals_indexes = np.argsort(np.abs(evals))[::-1]
    evecs = evecs[:, sorted_evals_indexes]
    evals = evals[sorted_evals_indexes]

    # Rank is determined by positive eigenvalues
    A_rank = (evals > tol).sum()
    # A_rank = (np.abs(evals) > tol).sum()
    evals = np.diag(evals[:A_rank])
    evecs = evecs[:, :A_rank]

    # Restore matrix
    S = sqrtm(evals)
    U = np.identity(A_rank)

    US = U.dot(S)
    return US.dot(evecs.T).T, A_rank




[docs]
def get_sim_matrix(scanpaths: list[NDArray], sim_metric: Callable) -> np.ndarray:
    """Computes similarity matrix given non-trivial metric.

    Args:
        scanpaths: list of scanpaths, each being 2D-array of shape (2, n).
        sim_metric: similarity metric.

    Returns:
        scaled similarity matrix.
    """
    n = len(scanpaths)
    sim_matrix = np.ones(shape=(n, n))
    for i in range(len(scanpaths)):
        for j in range(i + 1, len(scanpaths)):
            p, q = scanpaths[i], scanpaths[j]
            sim_matrix[i, j] = sim_metric(p, q)

    sim_matrix += sim_matrix.T
    m = np.max(sim_matrix)
    m = m if m != 0 else 1
    return sim_matrix / m




[docs]
def get_dist_matrix(
    scanpaths: list[pd.DataFrame], dist_metric: Callable
) -> pd.DataFrame:
    """Computes pairwise distance matrix given distance metric.

    Args:
        scanpaths: List of scanpaths DataFrames of form (x, y)
        dist_metric: Metric used to calculate distance from features.dist
    """

    if len(scanpaths) == 0:
        raise ValueError("scanpaths list is empty")

    distances = []
    for i in tqdm(range(len(scanpaths))):
        for j in range(i + 1):
            distance = dist_metric(scanpaths[i], scanpaths[j])
            distances.append([i, j, distance])
            distances.append([j, i, distance])

    distances_df = pd.DataFrame(distances, columns=["p", "q", "dist"])
    return distances_df.reset_index().pivot_table(index="p", columns="q", values="dist")




[docs]
def hierarchical_clustering_order(
    sim_matrix: np.ndarray, metric: str = "euclidean"
) -> np.ndarray:
    """Reorder matrix using hierarchical clustering.

    Args:
        sim_matrix: similarity matrix to reorder.
        metric: metric used in building matrix.

    Returns:
        reordered matrix.
    """

    Z = linkage(sim_matrix, method="ward", metric=metric)
    ordered_leaves = leaves_list(Z)
    reordered_matrix = sim_matrix[ordered_leaves, :][:, ordered_leaves]
    return reordered_matrix




[docs]
def optimal_leaf_ordering_clustering(
    sim_matrix: np.ndarray, metric: str = "euclidean"
) -> np.ndarray:
    """Reorder matrix using optimal leaf ordering.

    Args:
        sim_matrix: similarity matrix to reorder.

    Returns:
        reordered matrix.
    """

    Z = linkage(sim_matrix, method="ward", metric=metric)
    Z = optimal_leaf_ordering(Z, sim_matrix)
    ordered_leaves = leaves_list(Z)
    reordered_matrix = sim_matrix[ordered_leaves, :][:, ordered_leaves]
    return reordered_matrix




[docs]
def dimensionality_reduction_order(sim_matrix: np.ndarray) -> np.ndarray:
    """Reorder matrix using Multi-Dimensional Scaling (MDS).

    Args:
        sim_matrix: similarity matrix to reorder.

    Returns:
        reordered matrix.
    """

    mds = MDS(n_components=2, dissimilarity="precomputed")
    coords = mds.fit_transform(1 - sim_matrix)
    # reorder the matrix based on the first MDS component
    order = np.argsort(coords[:, 0])
    reordered_matrix = sim_matrix[order, :][:, order]
    return reordered_matrix




[docs]
def spectral_order(sim_matrix: np.ndarray) -> np.ndarray:
    """Reorder matrix using spectral reordering.

    Args:
        sim_matrix: similarity matrix to reorder.

    Returns:
        reordered matrix.
    """

    L = laplacian(sim_matrix, normed=True)

    _, eigenvectors = eigsh(L, k=2, which="SM")

    # reorder based on the second smallest eigenvector (fiedler vector)
    fiedler_vector = eigenvectors[:, 1]
    order = np.argsort(fiedler_vector)
    reordered_matrix = sim_matrix[order, :][:, order]
    return reordered_matrix



# ======================== COMPROMISE MATRIX ========================
def get_center_matrix(weight_vector: np.ndarray) -> np.ndarray:
    """Calculates centering matrix Theta.

    Args:
        weight_vector: vector of weights.

    Returns:
        centering matrix.
    """
    assert np.sum(weight_vector) > 0, "Sum of weights must be greater than 0"
    weight_vector = weight_vector.astype(np.float32)
    weight_vector /= np.sum(weight_vector)
    E = np.eye(weight_vector.size)
    Theta = E - np.matmul(
        np.ones(weight_vector.size)[:, np.newaxis], weight_vector[:, np.newaxis].T
    )
    return Theta


def get_cross_product_matrix(
    D: np.ndarray, weight_vector: np.ndarray = None
) -> np.ndarray:
    """Calculates cross-product matrix.

    Args:
        D: distance matrix.
        weight_vector: vector of weights.

    Returns:
        cross-product matrix.
    """

    if weight_vector is None:
        weight_vector = np.ones(D.shape[0])

    Theta = get_center_matrix(weight_vector)
    return -0.5 * Theta @ D @ Theta.T


def compute_rv_coefficient(S1: np.ndarray, S2: np.ndarray) -> float:
    """Calculate the RV coefficient between two cross-product matrices.

    Args:
        S1: first cross-product matrix.
        S2: second cross-product matrix.

    Returns:
        RV coefficient.
    """
    numerator = np.trace(S1 @ S2.T)
    denominator = np.sqrt(np.trace(S1 @ S1.T) * np.trace(S2 @ S2.T))
    return numerator / denominator



[docs]
def get_compromise_matrix(distance_matrices: list[np.ndarray]) -> np.ndarray:
    """Compute the compromise matrix from a list of distance matrices.

    Args:
        distance_matrices: List of distance matrices (each a ndarray).

    Returns:
        compromise cross-product matrix.
    """
    assert len(distance_matrices) > 0, "At least one distance matrix is required"
    num_matrices = len(distance_matrices)

    # compute the cross-product matrices
    cross_product_matrices = [get_cross_product_matrix(D) for D in distance_matrices]

    # compute the similarity matrix using RV coefficients
    similarity_matrix = np.zeros((num_matrices, num_matrices))
    for i in range(num_matrices):
        for j in range(i, num_matrices):
            rv = compute_rv_coefficient(
                cross_product_matrices[i], cross_product_matrices[j]
            )
            similarity_matrix[i, j] = similarity_matrix[j, i] = rv

    # eigen-decomposition of the similarity matrix
    _, eigvecs = np.linalg.eigh(similarity_matrix)
    weights = eigvecs[:, -1]  # eigenvector corresponding to the largest eigenvalue
    # Get the compromise matrix as a weighted sum
    comp_mat = sum(w * G for w, G in zip(weights, cross_product_matrices, strict=False))

    return comp_mat