Source code for obnb.feature.multifeat

"""Multi modality feature objects."""
from itertools import chain

import numpy as np

from obnb.feature.base import BaseFeature
from obnb.typing import Iterable, List, LogLevel, Optional, Tuple, Union
from obnb.util.idhandler import IDmap


[docs]class MultiFeatureVec(BaseFeature): """MultiFeatureVec object.""" def __init__( self, log_level: LogLevel = "INFO", verbose: bool = False, ): """Initialize MultiFeatureVec.""" super().__init__(log_level=log_level, verbose=verbose) self.indptr = np.array([], dtype=np.uint32) self.fset_idmap = IDmap() @property def feature_ids(self) -> Tuple[str, ...]: """Return feature IDs.""" return tuple(self.fset_idmap.lst)
[docs] def get_features_from_idx( self, idx: Union[Iterable[int], int], fset_idx: Union[Iterable[int], int], ) -> np.ndarray: """Return features given node index and feature set inde. Args: idx (int or sequence of int): node index of interest. fset_id (int or sequence of int): feature set index of interest. """ if isinstance(idx, int): # return as one 2-d array with one row idx = [idx] indptr = self.indptr if isinstance(fset_idx, int): fset_slice = slice(indptr[fset_idx], indptr[fset_idx + 1]) else: fset_slices = [list(range(indptr[i], indptr[i + 1])) for i in fset_idx] fset_slice = list(chain(*fset_slices)) # type: ignore return self.mat[idx][:, fset_slice]
[docs] def get_features( self, ids: Optional[Union[List[str], str]] = None, fset_ids: Optional[Union[List[str], str]] = None, ) -> np.ndarray: """Return features given node IDs and the selected feature set ID. Args: ids (str or list of str, optional): node ID(s) of interest, return a 1-d array if input a single id, otherwise return a 2-d array where each row is the feature vector with the corresponding node ID. If not specified, use all rows. fset_ids (str or list of str, optional): feature set ID(s) of interest. If not specified, use all columns. """ if ids is None: idx = list(range(len(self.idmap))) else: idx = self.idmap[ids] if fset_ids is None: fset_idx = list(range(len(self.fset_idmap))) else: fset_idx = self.fset_idmap[fset_ids] return self.get_features_from_idx(idx, fset_idx)
[docs] @classmethod def from_mat( cls, mat: np.ndarray, ids: Optional[Union[Iterable[str], IDmap]] = None, *, indptr: Optional[np.ndarray] = None, fset_ids: Optional[Union[Iterable[str], IDmap]] = None, **kwargs, ): """Construct MultiFeatureVec object. Args: mat (:obj:`numpy.ndarray`): concatenated feature vector matrix. ids (list of str or :obj:`IDmap`, optional): node IDs, if not specified, use the default ordering as node IDs. indptr (:obj:`numpy.ndarray`, optional): index pointers indicating the start and the end of each feature set (columns). If set to None, and the dimension of fset_ids matches the number of columns in the input matrix, then automatically set indptr to corresponding to all ones. fset_ids (list of str or :obj:`IDmap`, optional): feature set IDs, if not specified, use the default ordering as feature set IDs. """ if indptr is None: if fset_ids is None: raise ValueError("Cannot set both indptr and fset_ids to None.") if (num_fsets := len(fset_ids)) != mat.shape[1]: # type: ignore raise ValueError( "Cannot assign indptr automatically because the dimension " f"of fset_ids ({num_fsets}) does not match the number " f"of columns in the input matrix ({mat.shape[1]}). Please " "specify fset_ids", ) indptr = np.arange(mat.shape[1] + 1) # TODO: refactor the following block(s) if ids is None: ids = list(map(str, range(mat.shape[0]))) if isinstance(ids, IDmap): idmap = ids else: idmap = IDmap.from_list(ids) if fset_ids is None: fset_ids = list(map(str, range(indptr.size - 1))) if isinstance(fset_ids, IDmap): fset_idmap = fset_ids else: fset_idmap = IDmap.from_list(fset_ids) feat = super().from_mat(mat, ids, **kwargs) feat.indptr = indptr # TODO: check indptr feat.idmap = idmap feat.fset_idmap = fset_idmap return feat
[docs] @classmethod def from_mats( cls, mats: List[np.ndarray], ids: Optional[Union[List[str], IDmap]] = None, *, fset_ids: Optional[Union[List[str], IDmap]] = None, **kwargs, ): """Construct MultiFeatureVec object from list of matrices. Args: mats (list of :obj:`numpy.ndarray`): list of feature vector matrices. ids (list of str or :obj:`IDmap`, optional): node IDs, if not specified, use the default ordering as node IDs. fset_ids (list of str or :obj:`IDmap`, optional): feature set IDs, if not specified, use the default ordering as feature set IDs. """ dims = [mat.shape[1] for mat in mats] indptr = np.zeros(len(mats) + 1, dtype=np.uint32) indptr[1:] = np.cumsum(dims) mat = np.hstack(mats) return cls.from_mat(mat, ids, indptr=indptr, fset_ids=fset_ids, **kwargs)