Source code for pyg_lib.sampler

from typing import List, Tuple, Optional, Dict

import torch
from torch import Tensor

NodeType = str
RelType = str
EdgeType = Tuple[str, str, str]


[docs]def neighbor_sample(
    rowptr: Tensor,
    col: Tensor,
    seed: Tensor,
    num_neighbors: List[int],
    node_time: Optional[Tensor] = None,
    edge_time: Optional[Tensor] = None,
    seed_time: Optional[Tensor] = None,
    edge_weight: Optional[Tensor] = None,
    csc: bool = False,
    replace: bool = False,
    directed: bool = True,
    disjoint: bool = False,
    temporal_strategy: str = 'uniform',
    return_edge_id: bool = True,
) -> Tuple[Tensor, Tensor, Tensor, Optional[Tensor], List[int], List[int]]:
    r"""Recursively samples neighbors from all node indices in :obj:`seed`
    in the graph given by :obj:`(rowptr, col)`.

    .. note::

        For temporal sampling, the :obj:`col` vector needs to be sorted
        according to :obj:`time` within individual neighborhoods since we use
        binary search to find neighbors that fulfill temporal constraints.

    Args:
        rowptr (torch.Tensor): Compressed source node indices.
        col (torch.Tensor): Target node indices.
        seed (torch.Tensor): The seed node indices.
        num_neighbors (List[int]): The number of neighbors to sample for each
            node in each iteration. If an entry is set to :obj:`-1`, all
            neighbors will be included.
        node_time (torch.Tensor, optional): Timestamps for the nodes in the
            graph. If set, temporal sampling will be used such that neighbors
            are guaranteed to fulfill temporal constraints, *i.e.* sampled
            nodes have an earlier or equal timestamp than the seed node.
            If used, the :obj:`col` vector needs to be sorted according to time
            within individual neighborhoods. Requires :obj:`disjoint=True`.
            Only either :obj:`node_time` or :obj:`edge_time` can be specified.
            (default: :obj:`None`)
        edge_time (torch.Tensor, optional): Timestamps for the edges in the
            graph. If set, temporal sampling will be used such that neighbors
            are guaranteed to fulfill temporal constraints, *i.e.* sampled
            edges have an earlier or equal timestamp than the seed node.
            If used, the :obj:`col` vector needs to be sorted according to time
            within individual neighborhoods. Requires :obj:`disjoint=True`.
            Only either :obj:`node_time` or :obj:`edge_time` can be specified.
            (default: :obj:`None`)
        seed_time (torch.Tensor, optional): Optional values to override the
            timestamp for seed nodes. If not set, will use timestamps in
            :obj:`node_time` as default for seed nodes.
            Needs to be specified in case edge-level sampling is used via
            :obj:`edge_time`. (default: :obj:`None`)
        edge-weight (torch.Tensor, optional): If given, will perform biased
            sampling based on the weight of each edge. (default: :obj:`None`)
        csc (bool, optional): If set to :obj:`True`, assumes that the graph is
            given in CSC format :obj:`(colptr, row)`. (default: :obj:`False`)
        replace (bool, optional): If set to :obj:`True`, will sample with
            replacement. (default: :obj:`False`)
        directed (bool, optional): If set to :obj:`False`, will include all
            edges between all sampled nodes. (default: :obj:`True`)
        disjoint (bool, optional): If set to :obj:`True` , will create disjoint
            subgraphs for every seed node. (default: :obj:`False`)
        temporal_strategy (string, optional): The sampling strategy when using
            temporal sampling (:obj:`"uniform"`, :obj:`"last"`).
            (default: :obj:`"uniform"`)
        return_edge_id (bool, optional): If set to :obj:`False`, will not
            return the indices of edges of the original graph.
            (default: :obj: `True`)

    Returns:
        (torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor],
        List[int], List[int]):
        Row indices, col indices of the returned subtree/subgraph, as well as
        original node indices for all nodes sampled.
        In addition, may return the indices of edges of the original graph.
        Lastly, returns information about the sampled amount of nodes and edges
        per hop.
    """
    return torch.ops.pyg.neighbor_sample(  #
        rowptr, col, seed, num_neighbors, node_time, edge_time, seed_time,
        edge_weight, csc, replace, directed, disjoint, temporal_strategy,
        return_edge_id)


[docs]def hetero_neighbor_sample(
    rowptr_dict: Dict[EdgeType, Tensor],
    col_dict: Dict[EdgeType, Tensor],
    seed_dict: Dict[NodeType, Tensor],
    num_neighbors_dict: Dict[EdgeType, List[int]],
    node_time_dict: Optional[Dict[NodeType, Tensor]] = None,
    edge_time_dict: Optional[Dict[EdgeType, Tensor]] = None,
    seed_time_dict: Optional[Dict[NodeType, Tensor]] = None,
    edge_weight_dict: Optional[Dict[EdgeType, Tensor]] = None,
    csc: bool = False,
    replace: bool = False,
    directed: bool = True,
    disjoint: bool = False,
    temporal_strategy: str = 'uniform',
    return_edge_id: bool = True,
) -> Tuple[Dict[EdgeType, Tensor], Dict[EdgeType, Tensor], Dict[
        NodeType, Tensor], Optional[Dict[EdgeType, Tensor]], Dict[
            NodeType, List[int]], Dict[EdgeType, List[int]]]:
    r"""Recursively samples neighbors from all node indices in :obj:`seed_dict`
    in the heterogeneous graph given by :obj:`(rowptr_dict, col_dict)`.

    .. note ::
        Similar to :meth:`neighbor_sample`, but expects a dictionary of node
        types (:obj:`str`) and  edge types (:obj:`Tuple[str, str, str]`) for
        each non-boolean argument.

    Args:
        kwargs: Arguments of :meth:`neighbor_sample`.
    """
    src_node_types = {k[0] for k in rowptr_dict.keys()}
    dst_node_types = {k[-1] for k in rowptr_dict.keys()}
    node_types = list(src_node_types | dst_node_types)
    edge_types = list(rowptr_dict.keys())

    TO_REL_TYPE = {key: '__'.join(key) for key in edge_types}
    TO_EDGE_TYPE = {'__'.join(key): key for key in edge_types}

    rowptr_dict = {TO_REL_TYPE[k]: v for k, v in rowptr_dict.items()}
    col_dict = {TO_REL_TYPE[k]: v for k, v in col_dict.items()}
    num_neighbors_dict = {
        TO_REL_TYPE[k]: v
        for k, v in num_neighbors_dict.items()
    }
    if edge_time_dict is not None:
        edge_time_dict = {TO_REL_TYPE[k]: v for k, v in edge_time_dict.items()}
    if edge_weight_dict is not None:
        edge_weight_dict = {
            TO_REL_TYPE[k]: v
            for k, v in edge_weight_dict.items()
        }

    out = torch.ops.pyg.hetero_neighbor_sample(  #
        node_types, edge_types, rowptr_dict, col_dict, seed_dict,
        num_neighbors_dict, node_time_dict, edge_time_dict, seed_time_dict,
        edge_weight_dict, csc, replace, directed, disjoint, temporal_strategy,
        return_edge_id)

    (row_dict, col_dict, node_id_dict, edge_id_dict, num_nodes_per_hop_dict,
     num_edges_per_hop_dict) = out

    row_dict = {TO_EDGE_TYPE[k]: v for k, v in row_dict.items()}
    col_dict = {TO_EDGE_TYPE[k]: v for k, v in col_dict.items()}

    if edge_id_dict is not None:
        edge_id_dict = {TO_EDGE_TYPE[k]: v for k, v in edge_id_dict.items()}

    num_edges_per_hop_dict = {
        TO_EDGE_TYPE[k]: v
        for k, v in num_edges_per_hop_dict.items()
    }

    return (row_dict, col_dict, node_id_dict, edge_id_dict,
            num_nodes_per_hop_dict, num_edges_per_hop_dict)


[docs]def subgraph(
    rowptr: Tensor,
    col: Tensor,
    nodes: Tensor,
    return_edge_id: bool = True,
) -> Tuple[Tensor, Tensor, Optional[Tensor]]:
    r"""Returns the induced subgraph of the graph given by
    :obj:`(rowptr, col)`, containing only the nodes in :obj:`nodes`.

    Args:
        rowptr (torch.Tensor): Compressed source node indices.
        col (torch.Tensor): Target node indices.
        nodes (torch.Tensor): Node indices of the induced subgraph.
        return_edge_id (bool, optional): If set to :obj:`False`, will not
            return the indices of edges of the original graph contained in the
            induced subgraph. (default: :obj:`True`)

    Returns:
        (torch.Tensor, torch.Tensor, Optional[torch.Tensor]): Compressed source
        node indices and target node indices of the induced subgraph.
        In addition, may return the indices of edges of the original graph.
    """
    return torch.ops.pyg.subgraph(rowptr, col, nodes, return_edge_id)


[docs]def random_walk(rowptr: Tensor, col: Tensor, seed: Tensor, walk_length: int,
                p: float = 1.0, q: float = 1.0) -> Tensor:
    r"""Samples random walks of length :obj:`walk_length` from all node
    indices in :obj:`seed` in the graph given by :obj:`(rowptr, col)`, as
    described in the `"node2vec: Scalable Feature Learning for Networks"
    <https://arxiv.org/abs/1607.00653>`_ paper.

    Args:
        rowptr (torch.Tensor): Compressed source node indices.
        col (torch.Tensor): Target node indices.
        seed (torch.Tensor): Seed node indices from where random walks start.
        walk_length (int): The walk length of a random walk.
        p (float, optional): Likelihood of immediately revisiting a node in the
            walk. (default: :obj:`1.0`)
        q (float, optional): Control parameter to interpolate between
            breadth-first strategy and depth-first strategy.
            (default: :obj:`1.0`)

    Returns:
        torch.Tensor: A tensor of shape :obj:`[seed.size(0), walk_length + 1]`,
        holding the nodes indices of each walk for each seed node.
    """
    return torch.ops.pyg.random_walk(rowptr, col, seed, walk_length, p, q)


__all__ = [
    'neighbor_sample',
    'hetero_neighbor_sample',
    'subgraph',
    'random_walk',
]