""" ========================================================== Enhancing QuickBundles with different metrics and features ========================================================== QuickBundles [Garyfallidis12]_ is a flexible algorithm that requires only a distance metric and an adjacency threshold to perform clustering. There is a wide variety of metrics that could be used to cluster streamlines. The purpose of this tutorial is to show how to easily create new ``Feature`` and new ``Metric`` classes that can be used by QuickBundles. .. _clustering-framework: Clustering framework ==================== DIPY_ provides a simple, flexible and fast framework to do clustering of sequential data (e.g. streamlines). A *sequential datum* in DIPY is represented as a numpy array of size :math:`(N \times D)`, where each row of the array represents a $D$ dimensional point of the sequence. A set of these sequences is represented as a list of numpy arrays of size :math:`(N_i \times D)` for :math:`i=1:M` where $M$ is the number of sequences in the set. This clustering framework is modular and divided in three parts: #. Feature extraction #. Distance computation #. Clustering algorithm The **feature extraction** part includes any preprocessing needed to be done on the data before computing distances between them (e.g. resampling the number of points of a streamline). To define a new way of extracting features, one has to subclass ``Feature`` (see below). The **distance computation** part includes any metric capable of evaluating a distance between two sets of features previously extracted from the data. To define a new way of extracting features, one has to subclass ``Metric`` (see below). The **clustering algorithm** part represents the clustering algorithm itself (e.g. QuickBundles, K-means, Hierarchical Clustering). More precisely, it includes any algorithms taking as input a list of sequential data and outputting a ``ClusterMap`` object. Extending `Feature` =================== This section will guide you through the creation of a new feature extraction method that can be used in the context of this clustering framework. For a list of available features in DIPY see :ref:`example_segment_clustering_features`. Assuming a set of streamlines, the type of features we want to extract is the arc length (i.e. the sum of the length of each segment for a given streamline). Let's start by importing the necessary modules. """ import numpy as np from dipy.data import get_fnames from dipy.io.streamline import load_tractogram from dipy.tracking.streamline import Streamlines from dipy.viz import window, actor, colormap from dipy.segment.clustering import QuickBundles from dipy.segment.featurespeed import Feature, VectorOfEndpointsFeature from dipy.segment.metric import Metric, SumPointwiseEuclideanMetric from dipy.tracking.streamline import length ############################################################################### # We now define the class ``ArcLengthFeature`` that will perform the desired # feature extraction. When subclassing ``Feature``, two methods have to be # redefined: ``infer_shape`` and ``extract``. # # Also, an important property about feature extraction is whether or not # its process is invariant to the order of the points within a streamline. # This is needed as there is no way one can tell which extremity of a # streamline is the beginning and which one is the end. class ArcLengthFeature(Feature): """ Computes the arc length of a streamline. """ def __init__(self): # The arc length stays the same even if the streamline is reversed. super(ArcLengthFeature, self).__init__(is_order_invariant=True) def infer_shape(self, streamline): """ Infers the shape of features extracted from `streamline`. """ # Arc length is a scalar return 1 def extract(self, streamline): """ Extracts features from `streamline`. """ # return np.sum(np.sqrt(np.sum((streamline[1:] - streamline[:-1]) ** 2))) # or use a DIPY's function that computes the arc length of a streamline. return length(streamline) ############################################################################### # The new feature extraction ``ArcLengthFeature`` is ready to be used. Let's use # it to cluster a set of streamlines by their arc length. For educational # purposes we will try to cluster a small streamline bundle known from # neuroanatomy as the fornix. # # We start by loading the fornix streamlines. fname = get_fnames('fornix') fornix = load_tractogram(fname, 'same', bbox_valid_check=False).streamlines streamlines = Streamlines(fornix) ############################################################################### # Perform QuickBundles clustering using the metric # ``SumPointwiseEuclideanMetric`` and our ``ArcLengthFeature``. metric = SumPointwiseEuclideanMetric(feature=ArcLengthFeature()) qb = QuickBundles(threshold=2., metric=metric) clusters = qb.cluster(streamlines) ############################################################################### # We will now visualize the clustering result. # Color each streamline according to the cluster they belong to. cmap = colormap.create_colormap(np.ravel(clusters.centroids)) colormap_full = np.ones((len(streamlines), 3)) for cluster, color in zip(clusters, cmap): colormap_full[cluster.indices] = color scene = window.Scene() scene.SetBackground(1, 1, 1) scene.add(actor.streamtube(streamlines, colormap_full)) window.record(scene, out_path='fornix_clusters_arclength.png', size=(600, 600)) # Enables/disables interactive visualization interactive = False if interactive: window.show(scene) ############################################################################### # .. figure:: fornix_clusters_arclength.png # :align: center # # Showing the different clusters obtained by using the arc length. # # # Extending `Metric` # ================== # This section will guide you through the creation of a new metric that can be # used in the context of this clustering framework. For a list of available # metrics in DIPY see :ref:`example_segment_clustering_metrics`. # # Assuming a set of streamlines, we want a metric that computes the cosine # distance giving the vector between endpoints of each streamline (i.e. one # minus the cosine of the angle between two vectors). For more information # about this distance check ``_. # # Let's start by importing the necessary modules. ############################################################################### # We now define the class ``CosineMetric`` that will perform the desired # distance computation. When subclassing ``Metric``, two methods have to be # redefined: ``are_compatible`` and ``dist``. Moreover, when implementing the # ``dist`` method, one needs to make sure the distance returned is symmetric # (i.e. `dist(A, B) == dist(B, A)`). class CosineMetric(Metric): """ Computes the cosine distance between two streamlines. """ def __init__(self): # For simplicity, features will be the vector between endpoints of a streamline. super(CosineMetric, self).__init__(feature=VectorOfEndpointsFeature()) def are_compatible(self, shape1, shape2): """ Checks if two features are vectors of same dimension. Basically this method exists so that we don't have to check inside the `dist` method (speedup). """ return shape1 == shape2 and shape1[0] == 1 def dist(self, v1, v2): """ Computes a the cosine distance between two vectors. """ norm = lambda x: np.sqrt(np.sum(x**2)) cos_theta = np.dot(v1, v2.T) / (norm(v1)*norm(v2)) # Make sure it's in [-1, 1], i.e. within domain of arccosine cos_theta = np.minimum(cos_theta, 1.) cos_theta = np.maximum(cos_theta, -1.) return np.arccos(cos_theta) / np.pi # Normalized cosine distance ############################################################################### # The new distance ``CosineMetric`` is ready to be used. Let's use # it to cluster a set of streamlines according to the cosine distance of the # vector between their endpoints. For educational purposes we will try to # cluster a small streamline bundle known from neuroanatomy as the fornix. # # We start by loading the fornix streamlines. fname = get_fnames('fornix') fornix = load_tractogram(fname, 'same', bbox_valid_check=False) streamlines = fornix.streamlines ############################################################################### # Perform QuickBundles clustering using our metric ``CosineMetric``. metric = CosineMetric() qb = QuickBundles(threshold=0.1, metric=metric) clusters = qb.cluster(streamlines) ############################################################################### # We will now visualize the clustering result. # Color each streamline according to the cluster they belong to. cmap = colormap.create_colormap(np.arange(len(clusters))) colormap_full = np.ones((len(streamlines), 3)) for cluster, color in zip(clusters, cmap): colormap_full[cluster.indices] = color scene = window.Scene() scene.SetBackground(1, 1, 1) scene.add(actor.streamtube(streamlines, colormap_full)) window.record(scene, out_path='fornix_clusters_cosine.png', size=(600, 600)) if interactive: window.show(scene) ############################################################################### # .. figure:: fornix_clusters_cosine.png # :align: center # # Showing the different clusters obtained by using the cosine metric. # # .. include:: ../links_names.inc # # References # ---------- # # .. [Garyfallidis12] Garyfallidis E. et al., QuickBundles a method for # tractography simplification, Frontiers in Neuroscience, vol 6, no 175, # 2012. #