Source code for torchdrug.layers.common

import inspect
import warnings
from collections.abc import Sequence

import torch
from torch import nn
from torch.nn import functional as F
from torch_scatter import scatter_mean

from torchdrug.layers import functional


[docs]class MultiLayerPerceptron(nn.Module): """ Multi-layer Perceptron. Note there is no batch normalization, activation or dropout in the last layer. Parameters: input_dim (int): input dimension hidden_dim (list of int): hidden dimensions short_cut (bool, optional): use short cut or not batch_norm (bool, optional): apply batch normalization or not activation (str or function, optional): activation function dropout (float, optional): dropout rate """ def __init__(self, input_dim, hidden_dims, short_cut=False, batch_norm=False, activation="relu", dropout=0): super(MultiLayerPerceptron, self).__init__() if not isinstance(hidden_dims, Sequence): hidden_dims = [hidden_dims] self.dims = [input_dim] + hidden_dims self.short_cut = short_cut if isinstance(activation, str): self.activation = getattr(F, activation) else: self.activation = activation if dropout: self.dropout = nn.Dropout(dropout) else: self.dropout = None self.layers = nn.ModuleList() for i in range(len(self.dims) - 1): self.layers.append(nn.Linear(self.dims[i], self.dims[i + 1])) if batch_norm: self.batch_norms = nn.ModuleList() for i in range(len(self.dims) - 2): self.batch_norms.append(nn.BatchNorm1d(self.dims[i + 1])) else: self.batch_norms = None def forward(self, input): """""" layer_input = input for i, layer in enumerate(self.layers): hidden = layer(layer_input) if i < len(self.layers) - 1: if self.batch_norms: x = hidden.flatten(0, -2) hidden = self.batch_norms[i](x).view_as(hidden) hidden = self.activation(hidden) if self.dropout: hidden = self.dropout(hidden) if self.short_cut and hidden.shape == layer_input.shape: hidden = hidden + layer_input layer_input = hidden return hidden
[docs]class GaussianSmearing(nn.Module): r""" Gaussian smearing from `SchNet: A continuous-filter convolutional neural network for modeling quantum interactions`_. There are two modes for Gaussian smearing. Non-centered mode: .. math:: \mu = [0, 1, ..., n], \sigma = [1, 1, ..., 1] Centered mode: .. math:: \mu = [0, 0, ..., 0], \sigma = [0, 1, ..., n] .. _SchNet\: A continuous-filter convolutional neural network for modeling quantum interactions: https://arxiv.org/pdf/1706.08566.pdf Parameters: start (int, optional): minimal input value stop (int, optional): maximal input value num_kernel (int, optional): number of RBF kernels centered (bool, optional): centered mode or not learnable (bool, optional): learnable gaussian parameters or not """ def __init__(self, start=0, stop=5, num_kernel=100, centered=False, learnable=False): super(GaussianSmearing, self).__init__() if centered: mu = torch.zeros(num_kernel) sigma = torch.linspace(start, stop, num_kernel) else: mu = torch.linspace(start, stop, num_kernel) sigma = torch.ones(num_kernel) * (mu[1] - mu[0]) if learnable: self.mu = nn.Parameter(mu) self.sigma = nn.Parameter(sigma) else: self.register_buffer("mu", mu) self.register_buffer("sigma", sigma)
[docs] def forward(self, x, y): """ Compute smeared gaussian features between data. Parameters: x (Tensor): data of shape :math:`(..., d)` y (Tensor): data of shape :math:`(..., d)` Returns: Tensor: features of shape :math:`(..., num\_kernel)` """ distance = (x - y).norm(2, dim=-1, keepdim=True) z = (distance - self.mu) / self.sigma prob = torch.exp(-0.5 * z * z) return prob
[docs]class PairNorm(nn.Module): """ Pair normalization layer proposed in `PairNorm: Tackling Oversmoothing in GNNs`_. .. _PairNorm\: Tackling Oversmoothing in GNNs: https://openreview.net/pdf?id=rkecl1rtwB Parameters: scale_individual (bool, optional): additionally normalize each node representation to have the same L2-norm """ eps = 1e-8 def __init__(self, scale_individual=False): super(PairNorm, self).__init__() self.scale_individual = scale_individual def forward(self, graph, input): """""" if graph.batch_size > 1: warnings.warn("PairNorm is proposed for a single graph, but now applied to a batch of graphs.") x = input.flatten(1) x = x - x.mean(dim=0) if self.scale_individual: output = x / (x.norm(dim=-1, keepdim=True) + self.eps) else: output = x * x.shape[0] ** 0.5 / (x.norm() + self.eps) return output.view_as(input)
class InstanceNorm(nn.modules.instancenorm._InstanceNorm): """ Instance normalization for graphs. This layer follows the definition in `GraphNorm: A Principled Approach to Accelerating Graph Neural Network Training`_. .. _GraphNorm\: A Principled Approach to Accelerating Graph Neural Network Training: https://arxiv.org/pdf/2009.03294.pdf Parameters: input_dim (int): input dimension eps (float, optional): epsilon added to the denominator affine (bool, optional): use learnable affine parameters or not """ def __init__(self, input_dim, eps=1e-5, affine=False): super(InstanceNorm, self).__init__(input_dim, eps, affine=affine) def forward(self, graph, input): """""" assert (graph.num_nodes >= 1).all() mean = scatter_mean(input, graph.node2graph, dim=0, dim_size=graph.batch_size) centered = input - mean[graph.node2graph] var = scatter_mean(centered ** 2, graph.node2graph, dim=0, dim_size=graph.batch_size) std = (var + self.eps).sqrt() output = centered / std[graph.node2graph] if self.affine: output = torch.addcmul(self.bias, self.weight, output) return output
[docs]class MutualInformation(nn.Module): """ Mutual information estimator from `Learning deep representations by mutual information estimation and maximization`_. .. _Learning deep representations by mutual information estimation and maximization: https://arxiv.org/pdf/1808.06670.pdf Parameters: input_dim (int): input dimension num_mlp_layer (int, optional): number of MLP layers activation (str or function, optional): activation function """ def __init__(self, input_dim, num_mlp_layer=2, activation="relu"): super(MutualInformation, self).__init__() self.x_mlp = MultiLayerPerceptron(input_dim, [input_dim] * num_mlp_layer, activation=activation) self.y_mlp = MultiLayerPerceptron(input_dim, [input_dim] * num_mlp_layer, activation=activation) def forward(self, x, y, pair_index=None): """""" x = self.x_mlp(x) y = self.y_mlp(y) score = x @ y.t() score = score.flatten() if pair_index is None: assert len(x) == len(y) pair_index = torch.arange(len(x), device=x.device).unsqueeze(-1).expand(-1, 2) index = pair_index[:, 0] * len(y) + pair_index[:, 1] positive = torch.zeros_like(score, dtype=torch.bool) positive[index] = 1 negative = ~positive mutual_info = - functional.shifted_softplus(-score[positive]).mean() \ - functional.shifted_softplus(score[negative]).mean() return mutual_info
[docs]class Sequential(nn.Sequential): """ Improved sequential container. Modules will be called in the order they are passed to the constructor. Compared to the vanilla nn.Sequential, this layer additionally supports the following features. 1. Multiple input / output arguments. >>> # layer1 signature: (...) -> (a, b) >>> # layer2 signature: (a, b) -> (...) >>> layer = layers.Sequential(layer1, layer2) 2. Global arguments. >>> # layer1 signature: (graph, a) -> b >>> # layer2 signature: (graph, b) -> c >>> layer = layers.Sequential(layer1, layer2, global_args=("graph",)) Note the global arguments don't need to be present in every layer. >>> # layer1 signature: (graph, a) -> b >>> # layer2 signature: b -> c >>> # layer3 signature: (graph, c) -> d >>> layer = layers.Sequential(layer1, layer2, global_args=("graph",)) 3. Dict outputs. >>> # layer1 signature: a -> {"b": b, "c": c} >>> # layer2 signature: b -> d >>> layer = layers.Sequential(layer1, layer2, allow_unused=True) When dict outputs are used with global arguments, the global arguments can be explicitly overwritten by any layer outputs. >>> # layer1 signature: (graph, a) -> {"graph": graph, "b": b} >>> # layer2 signature: (graph, b) -> c >>> # layer2 takes in the graph output by layer1 >>> layer = layers.Sequential(layer1, layer2, global_args=("graph",)) """ def __init__(self, *args, global_args=None, allow_unused=False): super(Sequential, self).__init__(*args) if global_args is not None: self.global_args = set(global_args) else: self.global_args = {} self.allow_unused = allow_unused def forward(self, *args, **kwargs): """""" global_kwargs = {} for i, module in enumerate(self._modules.values()): sig = inspect.signature(module.forward) parameters = list(sig.parameters.values()) param_names = [param.name for param in parameters] j = 0 for name in param_names: if j == len(args): break if name in kwargs: continue if name in global_kwargs and name not in kwargs: kwargs[name] = global_kwargs[name] continue kwargs[name] = args[j] j += 1 if self.allow_unused: param_names = set(param_names) # pop unused kwargs kwargs = {k: v for k, v in kwargs.items() if k in param_names} if j < len(args): raise TypeError("too many positional arguments") output = module(**kwargs) global_kwargs.update({k: v for k, v in kwargs.items() if k in self.global_args}) args = [] kwargs = {} if isinstance(output, dict): kwargs.update(output) elif isinstance(output, Sequence): args += list(output) else: args.append(output) return output
[docs]class SinusoidalPositionEmbedding(nn.Module): """ Positional embedding based on sine and cosine functions, proposed in `Attention Is All You Need`_. .. _Attention Is All You Need: https://arxiv.org/pdf/1706.03762.pdf Parameters: output_dim (int): output dimension """ def __init__(self, output_dim): super(SinusoidalPositionEmbedding, self).__init__() inverse_frequency = 1 / (10000 ** (torch.arange(0.0, output_dim, 2.0) / output_dim)) self.register_buffer("inverse_frequency", inverse_frequency) def forward(self, input): """""" # input: [B, L, ...] positions = torch.arange(input.shape[1] - 1, -1, -1.0, dtype=input.dtype, device=input.device) sinusoidal_input = torch.outer(positions, self.inverse_frequency) position_embedding = torch.cat([sinusoidal_input.sin(), sinusoidal_input.cos()], -1) return position_embedding