Source code for zae_engine.nn_night.layers.involution

import torch.nn as nn
import torch.nn.functional as F



[docs]
class Inv1d(nn.Module):
    """The involution layer for 1D input [1]_.

    ...
    Parameters
    ----------
    ch : int
        Number of channels in the input tensor (signal or 1D arr)
    num_groups : int
        Number of channels produced by the convolution
    kernel_size : int
        Size of the convolving kernel
    stride : int
        Stride of the convolution. Default: 1
    reduction_ratio : int
        Ratio of channel reduction. This value must be divisor of ch.

    Returns
    -------
    tensor : tensor

    References
    ----------
    .. [1]  Li, Duo, et al.
            "Involution: Inverting the inherence of convolution for visual recognition."
            Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
    """

    def __init__(self, ch: int, num_groups: int, kernel_size: int, stride: int, reduction_ratio: int):
        super(Inv1d, self).__init__()
        self.ch = ch
        self.num_groups = num_groups
        self.group = ch // num_groups
        self.kernel_size = kernel_size
        self.stride = stride
        self.reduction_ratio = reduction_ratio

        self.k_gen = self.kernel_generator()
        self.unfold = nn.Unfold(kernel_size=(1, kernel_size), padding=(0, (kernel_size - 1) // 2), stride=(1, stride))

        assert ch % num_groups == 0


[docs]
    def kernel_generator(self):
        conv1 = nn.Conv1d(self.ch, self.ch // self.reduction_ratio, kernel_size=(1,))
        conv2 = nn.Conv1d(self.ch // self.reduction_ratio, self.kernel_size * self.num_groups, kernel_size=(1,))
        return nn.Sequential(*[conv1, nn.ReLU(), nn.BatchNorm1d(self.ch // self.reduction_ratio), conv2])



[docs]
    def forward(self, x):
        b, ch, dim = x.shape
        assert ch == self.ch
        assert dim % self.stride == 0
        out_dim = dim // self.stride
        unfolded = self.unfold(x.unsqueeze(2))
        unfolded = unfolded.view(b, self.num_groups, self.group, self.kernel_size, out_dim)
        pooled = F.adaptive_max_pool1d(x, out_dim)
        kernel = self.k_gen(pooled).view(b, self.num_groups, self.kernel_size, out_dim).unsqueeze(2)
        out = kernel * unfolded
        return out.sum(dim=3).view(b, ch, out_dim)