Skip to content

odak.learn.models

odak.learn.models

Provides necessary definitions for components used in machine learning and deep learning.

channel_gate

Bases: Module

Channel attention module with various pooling strategies. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class channel_gate(torch.nn.Module):
    """
    Channel attention module with various pooling strategies.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
        """
        Initializes the channel gate module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the intermediate layer.
        pool_types      : list
                          List of pooling operations to apply.
        """
        super().__init__()
        self.gate_channels = gate_channels
        hidden_channels = gate_channels // reduction_ratio
        if hidden_channels == 0:
            hidden_channels = 1
        self.mlp = torch.nn.Sequential(
            convolutional_block_attention.Flatten(),
            torch.nn.Linear(gate_channels, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_channels, gate_channels),
        )
        self.pool_types = pool_types

    def forward(self, x):
        """
        Forward pass of the ChannelGate module.

        Applies channel-wise attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the ChannelGate module.

        Returns
        -------
        output       : torch.tensor
                       Output tensor after applying channel attention.
        """
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == "avg":
                pool = torch.nn.functional.avg_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            elif pool_type == "max":
                pool = torch.nn.functional.max_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            channel_att_raw = self.mlp(pool)
            channel_att_sum = (
                channel_att_raw
                if channel_att_sum is None
                else channel_att_sum + channel_att_raw
            )
        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
        output = x * scale
        return output

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])

Initializes the channel gate module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the intermediate layer.
    
  • pool_types
              List of pooling operations to apply.
    
Source code in odak/learn/models/components.py
def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
    """
    Initializes the channel gate module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the intermediate layer.
    pool_types      : list
                      List of pooling operations to apply.
    """
    super().__init__()
    self.gate_channels = gate_channels
    hidden_channels = gate_channels // reduction_ratio
    if hidden_channels == 0:
        hidden_channels = 1
    self.mlp = torch.nn.Sequential(
        convolutional_block_attention.Flatten(),
        torch.nn.Linear(gate_channels, hidden_channels),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_channels, gate_channels),
    )
    self.pool_types = pool_types

forward(x)

Forward pass of the ChannelGate module.

Applies channel-wise attention to the input tensor.

Parameters:

  • x
           Input tensor to the ChannelGate module.
    

Returns:

  • output ( tensor ) –

    Output tensor after applying channel attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the ChannelGate module.

    Applies channel-wise attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the ChannelGate module.

    Returns
    -------
    output       : torch.tensor
                   Output tensor after applying channel attention.
    """
    channel_att_sum = None
    for pool_type in self.pool_types:
        if pool_type == "avg":
            pool = torch.nn.functional.avg_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        elif pool_type == "max":
            pool = torch.nn.functional.max_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        channel_att_raw = self.mlp(pool)
        channel_att_sum = (
            channel_att_raw
            if channel_att_sum is None
            else channel_att_sum + channel_att_raw
        )
    scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
    output = x * scale
    return output

convolution_layer

Bases: Module

A convolution layer.

Source code in odak/learn/models/components.py
class convolution_layer(torch.nn.Module):
    """
    A convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        bias=False,
        stride=1,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        layers = [
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=kernel_size // 2,
                bias=bias,
            )
        ]
        if normalization:
            layers.append(torch.nn.BatchNorm2d(output_channels))
        if activation:
            layers.append(activation)
        self.model = torch.nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

__init__(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=False, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    bias=False,
    stride=1,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    layers = [
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=kernel_size // 2,
            bias=bias,
        )
    ]
    if normalization:
        layers.append(torch.nn.BatchNorm2d(output_channels))
    if activation:
        layers.append(activation)
    self.model = torch.nn.Sequential(*layers)

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

convolutional_block_attention

Bases: Module

Convolutional Block Attention Module (CBAM) class. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class convolutional_block_attention(torch.nn.Module):
    """
    Convolutional Block Attention Module (CBAM) class.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(
        self,
        gate_channels,
        reduction_ratio=16,
        pool_types=["avg", "max"],
        no_spatial=False,
    ):
        """
        Initializes the convolutional block attention module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the channel attention.
        pool_types      : list
                          List of pooling operations to apply for channel attention.
        no_spatial      : bool
                          If True, spatial attention is not applied.
        """
        super(convolutional_block_attention, self).__init__()
        self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.spatial_gate = spatial_gate()

    class Flatten(torch.nn.Module):
        """
        Flattens the input tensor to a 2D matrix.
        """

        def forward(self, x):
            return x.view(x.size(0), -1)

    def forward(self, x):
        """
        Forward pass of the convolutional block attention module.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the CBAM module.

        Returns
        -------
        x_out        : torch.tensor
                       Output tensor after applying channel and spatial attention.
        """
        x_out = self.channel_gate(x)
        if not self.no_spatial:
            x_out = self.spatial_gate(x_out)
        return x_out

Flatten

Bases: Module

Flattens the input tensor to a 2D matrix.

Source code in odak/learn/models/components.py
class Flatten(torch.nn.Module):
    """
    Flattens the input tensor to a 2D matrix.
    """

    def forward(self, x):
        return x.view(x.size(0), -1)

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)

Initializes the convolutional block attention module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the channel attention.
    
  • pool_types
              List of pooling operations to apply for channel attention.
    
  • no_spatial
              If True, spatial attention is not applied.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    gate_channels,
    reduction_ratio=16,
    pool_types=["avg", "max"],
    no_spatial=False,
):
    """
    Initializes the convolutional block attention module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the channel attention.
    pool_types      : list
                      List of pooling operations to apply for channel attention.
    no_spatial      : bool
                      If True, spatial attention is not applied.
    """
    super(convolutional_block_attention, self).__init__()
    self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
    self.no_spatial = no_spatial
    if not no_spatial:
        self.spatial_gate = spatial_gate()

forward(x)

Forward pass of the convolutional block attention module.

Parameters:

  • x
           Input tensor to the CBAM module.
    

Returns:

  • x_out ( tensor ) –

    Output tensor after applying channel and spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the convolutional block attention module.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the CBAM module.

    Returns
    -------
    x_out        : torch.tensor
                   Output tensor after applying channel and spatial attention.
    """
    x_out = self.channel_gate(x)
    if not self.no_spatial:
        x_out = self.spatial_gate(x_out)
    return x_out

double_convolution

Bases: Module

A double convolution layer.

Source code in odak/learn/models/components.py
class double_convolution(torch.nn.Module):
    """
    A double convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=None,
        output_channels=2,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        Double convolution model.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of channels in the hidden layer between two convolutions.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        if isinstance(mid_channels, type(None)):
            mid_channels = output_channels
        self.activation = activation
        self.model = torch.nn.Sequential(
            convolution_layer(
                input_channels=input_channels,
                output_channels=mid_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
            convolution_layer(
                input_channels=mid_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

__init__(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())

Double convolution model.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of channels in the hidden layer between two convolutions.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    mid_channels=None,
    output_channels=2,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    Double convolution model.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of channels in the hidden layer between two convolutions.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    if isinstance(mid_channels, type(None)):
        mid_channels = output_channels
    self.activation = activation
    self.model = torch.nn.Sequential(
        convolution_layer(
            input_channels=input_channels,
            output_channels=mid_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
        convolution_layer(
            input_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
    )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

downsample_layer

Bases: Module

A downscaling component followed by a double convolution.

Source code in odak/learn/models/components.py
class downsample_layer(torch.nn.Module):
    """
    A downscaling component followed by a double convolution.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.maxpool_conv = torch.nn.Sequential(
            torch.nn.MaxPool2d(2),
            double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x              : torch.tensor
                         First input data.



        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.maxpool_conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.maxpool_conv = torch.nn.Sequential(
        torch.nn.MaxPool2d(2),
        double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        ),
    )

forward(x)

Forward model.

Parameters:

  • x
             First input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x              : torch.tensor
                     First input data.



    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.maxpool_conv(x)
    return result

gaussian_2d

Bases: Module

2D Gaussian model for learning image representations using 2D Gaussian primitives.

This model represents an image as a weighted sum of 2D Gaussians, each defined by: - widths (std_x, std_y): Standard deviations along x and y axes - offsets (offset_x, offset_y): Center positions in normalized coordinates - rotations: Rotation angles for each Gaussian - alphas: Opacity/weight coefficients

Parameters:

  • number_of_elements (int, default: 10 ) –
                Number of 2D Gaussian elements to use. Default is 10.
    

Attributes:

  • widths ((Parameter, shape(2, 1, N))) –

    Standard deviations for x and y dimensions.

  • offsets ((Parameter, shape(2, 1, N))) –

    Center offsets in x and y directions.

  • rotations ((Parameter, shape(1, N))) –

    Rotation angles in radians for each Gaussian.

  • alphas ((Parameter, shape(1, N))) –

    Opacity/weight coefficients blended with tanh activation.

Examples:

>>> model = gaussian_2d(number_of_elements=50)
>>> x = torch.linspace(-1, 1, 256)
>>> y = torch.linspace(-1, 1, 256)
>>> X, Y = torch.meshgrid(x, y, indexing='ij')
>>> output = model(X, Y)
Notes
  • All parameters are initialized on CPU by default. For GPU acceleration, call .to(device) after initializing this model.
  • Input coordinates x and y should typically be normalized to [-1, 1].
  • Output is the sum of weighted Gaussians passed through tanh().
Source code in odak/learn/models/gaussians.py
class gaussian_2d(torch.nn.Module):
    """
    2D Gaussian model for learning image representations using 2D Gaussian primitives.

    This model represents an image as a weighted sum of 2D Gaussians, each defined by:
    - widths (std_x, std_y): Standard deviations along x and y axes
    - offsets (offset_x, offset_y): Center positions in normalized coordinates
    - rotations: Rotation angles for each Gaussian
    - alphas: Opacity/weight coefficients

    Parameters
    ----------
    number_of_elements : int, optional
                        Number of 2D Gaussian elements to use. Default is 10.

    Attributes
    ----------
    widths      : torch.nn.Parameter, shape (2, 1, N)
                  Standard deviations for x and y dimensions.
    offsets     : torch.nn.Parameter, shape (2, 1, N)
                  Center offsets in x and y directions.
    rotations   : torch.nn.Parameter, shape (1, N)
                  Rotation angles in radians for each Gaussian.
    alphas      : torch.nn.Parameter, shape (1, N)
                  Opacity/weight coefficients blended with tanh activation.

    Examples
    --------
    >>> model = gaussian_2d(number_of_elements=50)
    >>> x = torch.linspace(-1, 1, 256)
    >>> y = torch.linspace(-1, 1, 256)
    >>> X, Y = torch.meshgrid(x, y, indexing='ij')
    >>> output = model(X, Y)

    Notes
    -----
    - All parameters are initialized on CPU by default. For GPU acceleration,
      call .to(device) after initializing this model.
    - Input coordinates x and y should typically be normalized to [-1, 1].
    - Output is the sum of weighted Gaussians passed through tanh().
    """

    def __init__(self, number_of_elements=10):
        """
        Initialize the 2D Gaussian model.

        Parameters
        ----------
        number_of_elements : int
                            Number of Gaussian elements (default: 10).
        """
        super(gaussian_2d, self).__init__()

        if not isinstance(number_of_elements, int) or number_of_elements <= 0:
            raise ValueError(
                "number_of_elements must be a positive integer, got {}".format(
                    type(number_of_elements).__name__
                )
            )

        self.number_of_elements = number_of_elements

        # Initialize parameters as learnable tensors
        self.widths = torch.nn.Parameter(torch.rand(2, 1, self.number_of_elements))
        self.offsets = torch.nn.Parameter(
            torch.randn(2, 1, self.number_of_elements)
        )
        self.rotations = torch.nn.Parameter(torch.randn(1, self.number_of_elements))
        self.alphas = torch.nn.Parameter(torch.randn(1, self.number_of_elements))

        # Apply uniform initialization
        self.initialize_parameters_uniformly()

    def initialize_parameters_uniformly(self, ranges=None):
        """
        Initialize parameters using uniform-like distributions within specified ranges.

        This method re-samples the model parameters from normal distributions
        whose mean and standard deviation are derived from the provided ranges.
        For a range [a, b], it uses:
            mean = (a + b) / 2
            std  = (b - a) / 4

        Parameters
        ----------
        ranges : dict or None, optional
                Dictionary specifying custom initialization ranges. Keys can include:
                - 'widths': tuple of (min, max) for Gaussian widths
                - 'offsets': tuple of (min, max) for center offsets
                - 'rotations': tuple of (min, max) for rotation angles in radians
                - 'alphas': tuple of (min, max) for opacity values

                If None, default ranges are used:
                {
                    "widths": (0.1, 0.5),
                    "offsets": (-1.0, 1.0),
                    "rotations": (0.0, 2*pi),
                    "alphas": (0.1, 0.2)
                }

        Notes
        -----
        - Uses torch.no_grad() to avoid tracking gradients during initialization.
        - Parameters are initialized in-place using normal_() method.
        """
        with torch.no_grad():
            default_ranges = {
                "widths": (0.1, 0.5),
                "offsets": (-1.0, 1.0),
                "rotations": (0.0, 2 * torch.pi),
                "alphas": (0.1, 0.2),
            }

            if ranges is None:
                ranges = default_ranges

            # Initialize widths (std_x and std_y)
            if "widths" in ranges:
                self.widths.normal_(
                    mean=(ranges["widths"][0] + ranges["widths"][1]) / 2,
                    std=(ranges["widths"][1] - ranges["widths"][0]) / 4,
                )
            else:
                self.widths.normal_(mean=0.3, std=0.1)

            # Initialize offsets (offset_x and offset_y)
            if "offsets" in ranges:
                self.offsets.normal_(
                    mean=(ranges["offsets"][0] + ranges["offsets"][1]) / 2,
                    std=(ranges["offsets"][1] - ranges["offsets"][0]) / 4,
                )
            else:
                self.offsets.normal_(mean=0.0, std=0.5)

            # Initialize rotations
            if "rotations" in ranges:
                self.rotations.normal_(
                    mean=(ranges["rotations"][0] + ranges["rotations"][1]) / 2,
                    std=(ranges["rotations"][1] - ranges["rotations"][0]) / 4,
                )
            else:
                self.rotations.normal_(mean=torch.pi, std=torch.pi / 2)

            # Initialize alphas (opacity coefficients)
            if "alphas" in ranges:
                self.alphas.normal_(
                    mean=(ranges["alphas"][0] + ranges["alphas"][1]) / 2,
                    std=(ranges["alphas"][1] - ranges["alphas"][0]) / 4,
                )
            else:
                self.alphas.normal_(mean=0.15, std=0.05)

    def forward(self, x, y, residual=1e-6):
        """
        Forward pass: evaluate the 2D Gaussian model at given coordinates.

        Computes a weighted sum of 2D Gaussians evaluated at the input grid
        coordinates (x, y). Each Gaussian is rotated and translated according
        to its learned parameters.

        Parameters
        ----------
        x : torch.Tensor
            X-coordinates of the evaluation grid. Shape should broadcast with y.
        y : torch.Tensor
            Y-coordinates of the evaluation grid. Shape should broadcast with x.
        residual : float, optional
                   Small constant to avoid numerical issues (default: 1e-6).

        Returns
        -------
        results : torch.Tensor
                  The evaluated Gaussian field at input coordinates. The output
                  shape is determined by broadcasting x, y with the parameter shapes.
                  Values are passed through tanh() activation and multiplied by alphas.

        Notes
        -----
        - Coordinates are first rotated using learned rotation angles.
        - Then translated by learned offsets for each Gaussian.
        - The 2D Gaussian function is evaluated as exp(-(x^2 + y^2)) scaled by widths.
        - Final output: tanh(alphas * gaussians) summed over all elements.

    Notes
    -----
    - Supports multiple input shapes via PyTorch broadcasting
    - For grid inputs (H, W): automatically broadcasts to (H, W, N_elements)
    - For flattened inputs (N, 1): broadcasts directly with parameters
    """
        # PyTorch broadcasting handles shape alignment automatically
        # Input shapes: x, y can be (H, W), (H*W,), or (-1, 1)
        # Parameters are stored as (2, 1, N) for offsets/widths and (1, N) for rotations/alphas

        # Rotate coordinates according to each Gaussian's rotation angle
        cos_rot = torch.cos(self.rotations)  # Shape: (1, N)
        sin_rot = torch.sin(self.rotations)  # Shape: (1, N)

        # Broadcasting: x (*), y (*) automatically expand with cos_rot/sin_rot
        x_r = x * cos_rot - y * sin_rot
        y_r = x * sin_rot + y * cos_rot

        # Translate by learned offsets (broadcasts from (2, 1, N) to input shape × (N,))
        x_n = x_r + self.offsets[0]  # Shape: (..., N)
        y_n = y_r + self.offsets[1]

        # Evaluate 2D Gaussian function with learned widths (standard deviations)
        r = (x_n / self.widths[0]) ** 2 + (y_n / self.widths[1]) ** 2
        gaussians = torch.exp(-r)

        # Apply alpha weights and tanh activation
        results = self.alphas * gaussians
        results = torch.tanh(results)

        return results

__init__(number_of_elements=10)

Initialize the 2D Gaussian model.

Parameters:

  • number_of_elements (int, default: 10 ) –
                Number of Gaussian elements (default: 10).
    
Source code in odak/learn/models/gaussians.py
def __init__(self, number_of_elements=10):
    """
    Initialize the 2D Gaussian model.

    Parameters
    ----------
    number_of_elements : int
                        Number of Gaussian elements (default: 10).
    """
    super(gaussian_2d, self).__init__()

    if not isinstance(number_of_elements, int) or number_of_elements <= 0:
        raise ValueError(
            "number_of_elements must be a positive integer, got {}".format(
                type(number_of_elements).__name__
            )
        )

    self.number_of_elements = number_of_elements

    # Initialize parameters as learnable tensors
    self.widths = torch.nn.Parameter(torch.rand(2, 1, self.number_of_elements))
    self.offsets = torch.nn.Parameter(
        torch.randn(2, 1, self.number_of_elements)
    )
    self.rotations = torch.nn.Parameter(torch.randn(1, self.number_of_elements))
    self.alphas = torch.nn.Parameter(torch.randn(1, self.number_of_elements))

    # Apply uniform initialization
    self.initialize_parameters_uniformly()

forward(x, y, residual=1e-06)

Forward pass: evaluate the 2D Gaussian model at given coordinates.

Computes a weighted sum of 2D Gaussians evaluated at the input grid
coordinates (x, y). Each Gaussian is rotated and translated according
to its learned parameters.
Parameters
x : torch.Tensor
    X-coordinates of the evaluation grid. Shape should broadcast with y.
y : torch.Tensor
    Y-coordinates of the evaluation grid. Shape should broadcast with x.
residual : float, optional
           Small constant to avoid numerical issues (default: 1e-6).
Returns
results : torch.Tensor
          The evaluated Gaussian field at input coordinates. The output
          shape is determined by broadcasting x, y with the parameter shapes.
          Values are passed through tanh() activation and multiplied by alphas.
Notes
- Coordinates are first rotated using learned rotation angles.
- Then translated by learned offsets for each Gaussian.
- The 2D Gaussian function is evaluated as exp(-(x^2 + y^2)) scaled by widths.
- Final output: tanh(alphas * gaussians) summed over all elements.
Notes
  • Supports multiple input shapes via PyTorch broadcasting
  • For grid inputs (H, W): automatically broadcasts to (H, W, N_elements)
  • For flattened inputs (N, 1): broadcasts directly with parameters
Source code in odak/learn/models/gaussians.py
def forward(self, x, y, residual=1e-6):
    """
    Forward pass: evaluate the 2D Gaussian model at given coordinates.

    Computes a weighted sum of 2D Gaussians evaluated at the input grid
    coordinates (x, y). Each Gaussian is rotated and translated according
    to its learned parameters.

    Parameters
    ----------
    x : torch.Tensor
        X-coordinates of the evaluation grid. Shape should broadcast with y.
    y : torch.Tensor
        Y-coordinates of the evaluation grid. Shape should broadcast with x.
    residual : float, optional
               Small constant to avoid numerical issues (default: 1e-6).

    Returns
    -------
    results : torch.Tensor
              The evaluated Gaussian field at input coordinates. The output
              shape is determined by broadcasting x, y with the parameter shapes.
              Values are passed through tanh() activation and multiplied by alphas.

    Notes
    -----
    - Coordinates are first rotated using learned rotation angles.
    - Then translated by learned offsets for each Gaussian.
    - The 2D Gaussian function is evaluated as exp(-(x^2 + y^2)) scaled by widths.
    - Final output: tanh(alphas * gaussians) summed over all elements.

Notes
-----
- Supports multiple input shapes via PyTorch broadcasting
- For grid inputs (H, W): automatically broadcasts to (H, W, N_elements)
- For flattened inputs (N, 1): broadcasts directly with parameters
"""
    # PyTorch broadcasting handles shape alignment automatically
    # Input shapes: x, y can be (H, W), (H*W,), or (-1, 1)
    # Parameters are stored as (2, 1, N) for offsets/widths and (1, N) for rotations/alphas

    # Rotate coordinates according to each Gaussian's rotation angle
    cos_rot = torch.cos(self.rotations)  # Shape: (1, N)
    sin_rot = torch.sin(self.rotations)  # Shape: (1, N)

    # Broadcasting: x (*), y (*) automatically expand with cos_rot/sin_rot
    x_r = x * cos_rot - y * sin_rot
    y_r = x * sin_rot + y * cos_rot

    # Translate by learned offsets (broadcasts from (2, 1, N) to input shape × (N,))
    x_n = x_r + self.offsets[0]  # Shape: (..., N)
    y_n = y_r + self.offsets[1]

    # Evaluate 2D Gaussian function with learned widths (standard deviations)
    r = (x_n / self.widths[0]) ** 2 + (y_n / self.widths[1]) ** 2
    gaussians = torch.exp(-r)

    # Apply alpha weights and tanh activation
    results = self.alphas * gaussians
    results = torch.tanh(results)

    return results

initialize_parameters_uniformly(ranges=None)

Initialize parameters using uniform-like distributions within specified ranges.

This method re-samples the model parameters from normal distributions whose mean and standard deviation are derived from the provided ranges. For a range [a, b], it uses: mean = (a + b) / 2 std = (b - a) / 4

Parameters:

  • ranges (dict or None, default: None ) –
    Dictionary specifying custom initialization ranges. Keys can include:
    - 'widths': tuple of (min, max) for Gaussian widths
    - 'offsets': tuple of (min, max) for center offsets
    - 'rotations': tuple of (min, max) for rotation angles in radians
    - 'alphas': tuple of (min, max) for opacity values
    
    If None, default ranges are used:
    {
        "widths": (0.1, 0.5),
        "offsets": (-1.0, 1.0),
        "rotations": (0.0, 2*pi),
        "alphas": (0.1, 0.2)
    }
    
Notes
  • Uses torch.no_grad() to avoid tracking gradients during initialization.
  • Parameters are initialized in-place using normal_() method.
Source code in odak/learn/models/gaussians.py
def initialize_parameters_uniformly(self, ranges=None):
    """
    Initialize parameters using uniform-like distributions within specified ranges.

    This method re-samples the model parameters from normal distributions
    whose mean and standard deviation are derived from the provided ranges.
    For a range [a, b], it uses:
        mean = (a + b) / 2
        std  = (b - a) / 4

    Parameters
    ----------
    ranges : dict or None, optional
            Dictionary specifying custom initialization ranges. Keys can include:
            - 'widths': tuple of (min, max) for Gaussian widths
            - 'offsets': tuple of (min, max) for center offsets
            - 'rotations': tuple of (min, max) for rotation angles in radians
            - 'alphas': tuple of (min, max) for opacity values

            If None, default ranges are used:
            {
                "widths": (0.1, 0.5),
                "offsets": (-1.0, 1.0),
                "rotations": (0.0, 2*pi),
                "alphas": (0.1, 0.2)
            }

    Notes
    -----
    - Uses torch.no_grad() to avoid tracking gradients during initialization.
    - Parameters are initialized in-place using normal_() method.
    """
    with torch.no_grad():
        default_ranges = {
            "widths": (0.1, 0.5),
            "offsets": (-1.0, 1.0),
            "rotations": (0.0, 2 * torch.pi),
            "alphas": (0.1, 0.2),
        }

        if ranges is None:
            ranges = default_ranges

        # Initialize widths (std_x and std_y)
        if "widths" in ranges:
            self.widths.normal_(
                mean=(ranges["widths"][0] + ranges["widths"][1]) / 2,
                std=(ranges["widths"][1] - ranges["widths"][0]) / 4,
            )
        else:
            self.widths.normal_(mean=0.3, std=0.1)

        # Initialize offsets (offset_x and offset_y)
        if "offsets" in ranges:
            self.offsets.normal_(
                mean=(ranges["offsets"][0] + ranges["offsets"][1]) / 2,
                std=(ranges["offsets"][1] - ranges["offsets"][0]) / 4,
            )
        else:
            self.offsets.normal_(mean=0.0, std=0.5)

        # Initialize rotations
        if "rotations" in ranges:
            self.rotations.normal_(
                mean=(ranges["rotations"][0] + ranges["rotations"][1]) / 2,
                std=(ranges["rotations"][1] - ranges["rotations"][0]) / 4,
            )
        else:
            self.rotations.normal_(mean=torch.pi, std=torch.pi / 2)

        # Initialize alphas (opacity coefficients)
        if "alphas" in ranges:
            self.alphas.normal_(
                mean=(ranges["alphas"][0] + ranges["alphas"][1]) / 2,
                std=(ranges["alphas"][1] - ranges["alphas"][0]) / 4,
            )
        else:
            self.alphas.normal_(mean=0.15, std=0.05)

gaussian_3d_volume

Bases: Module

Initialize the 3D Gaussian volume model. This model is useful for learning voxelized 3D volumes.

Parameters:

  • number_of_elements (int, default: 10 ) –
                 Number of Gaussian elements in the volume (default: 10).
    
  • initial_centers
                 Initial centers of the Gaussians (shape: [N, 3]). If not provided,
                 random initialization is used where N is `number_of_elements`.
    
  • initial_angles
                 Initial angles defining the orientation of each Gaussian. If not
                 provided, random initialization is used.
    
  • initial_scales
                 Initial scales controlling the spread (variance) of each Gaussian.
                 If not provided, random initialization is used.
    
  • initial_alphas
                 Initial alphas controlling the blending between Gaussians.
                 If not provided, random initialization is used.
    
Source code in odak/learn/models/gaussians.py
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
class gaussian_3d_volume(torch.nn.Module):
    """
    Initialize the 3D Gaussian volume model. This model is useful for learning voxelized 3D volumes.

    Parameters
    ----------
    number_of_elements : int
                         Number of Gaussian elements in the volume (default: 10).
    initial_centers    : torch.Tensor or None, optional
                         Initial centers of the Gaussians (shape: [N, 3]). If not provided,
                         random initialization is used where N is `number_of_elements`.
    initial_angles     : torch.Tensor or None, optional
                         Initial angles defining the orientation of each Gaussian. If not
                         provided, random initialization is used.
    initial_scales     : torch.Tensor or None, optional
                         Initial scales controlling the spread (variance) of each Gaussian.
                         If not provided, random initialization is used.
    initial_alphas     : torch.Tensor or None, optional
                         Initial alphas controlling the blending between Gaussians.
                         If not provided, random initialization is used.
    """

    def __init__(
        self,
        number_of_elements=10,
        initial_centers=None,
        initial_angles=None,
        initial_scales=None,
        initial_alphas=None,
    ):
        """
        Initialize the 3D Gaussian volume model.

        Parameters
        ----------
        number_of_elements : int
                            Number of Gaussian elements in the volume (default: 10).
        initial_centers    : torch.Tensor or None
                            Initial centers of the Gaussians (shape: [N, 3]).
        initial_angles     : torch.Tensor or None
                            Initial angles for orientation.
        initial_scales     : torch.Tensor or None
                            Initial scales for variance.
        initial_alphas     : torch.Tensor or None
                            Initial alphas for blending.

        Device Placement
        ----------- --
        All parameters are initialized on CPU by default. For GPU acceleration,
        call .to(device) after initializing this model.
        Example:
            model = gaussian_3d_volume().cuda()  # or .to('cuda')
        """
        super(gaussian_3d_volume, self).__init__()
        self.number_of_elements = number_of_elements
        self.initialize_parameters(
            centers=initial_centers,
            angles=initial_angles,
            scales=initial_scales,
            alphas=initial_alphas,
        )
        self.l2_loss = torch.nn.MSELoss()
        self.l1_loss = torch.nn.L1Loss()

    def initialize_parameters(
        self,
        centers=None,
        angles=None,
        scales=None,
        alphas=None,
        device=torch.device("cpu"),
    ):
        """
        Initialize model parameters using PyTorch tensors.

        Parameters
        ----------
        centers : torch.Tensor, optional
                  If None (default), initializes as a tensor of shape
                  (number_of_elements, 3) with values sampled from standard normal distribution.
        angles  : torch.Tensor, optional
                  If None (default), initializes similarly to centers: shape (n,3).
        scales  : torch.Tensor, optional
                  If None (default), initializes as a tensor of shape
                  (number_of_elements, 3) with values uniformly distributed between 0 and 1.
        alphas  : torch.Tensor, optional
                  If None (default), initializes as a tensor of shape
                  (number_of_elements, 1) with values uniformly distributed between 0 and 1.
        device  : torch.device
                  Device to be used to define the parameters.
                  Make sure to pass the device you use with this model for proper manual parameter initilization.
        """
        if isinstance(centers, type(None)):
            centers = torch.randn(self.number_of_elements, 3, device=device)
        if isinstance(angles, type(None)):
            angles = torch.randn(self.number_of_elements, 3, device=device)
        if isinstance(scales, type(None)):
            scales = torch.rand(self.number_of_elements, 3, device=device)
        if isinstance(alphas, type(None)):
            alphas = torch.rand(self.number_of_elements, 1, device=device)
        self.centers = torch.nn.Parameter(centers)
        self.angles = torch.nn.Parameter(angles)
        self.scales = torch.nn.Parameter(scales)
        self.alphas = torch.nn.Parameter(alphas)

    def forward(self, points, test=False):
        """
        Forward pass: evaluate the 3D Gaussian volume at given points.

        Parameters
        ----------
        points            : torch.Tensor,  shape (N, 3)
                            Input points at which to evaluate the Gaussian volume, where each row is a 3D point.
        test              : bool, optional
                            If True, disables gradient computation (default: False).

        Returns
        -------
        total_intensities : torch.Tensor
                            Total intensities at the input points, weighted by alphas.
        """
        if test:
            torch.no_grad()
        intensities = evaluate_3d_gaussians(
            points=points,
            centers=self.centers,
            scales=self.scales,
            angles=self.angles * 180,
            opacity=self.alphas,
        )
        total_intensities = torch.mean(intensities, axis=-1)
        return total_intensities

    def optimize(
        self,
        points,
        ground_truth,
        loss_weights,
        learning_rate=1e-2,
        number_of_epochs=10,
        scheduler_power=1,
        save_at_every=1,
        max_norm=None,
        weights_filename=None,
    ):
        """
        Optimize model parameters using AdamW and a polynomial learning rate scheduler.

        Parameters
        ----------
        points           : torch.Tensor
                           Input data points for the model.
        ground_truth     : torch.Tensor
                           Ground truth values corresponding to the input points.
        loss_weights     : dict
                           Dictionary of weights for each loss component.
        learning_rate    : float, optional
                           Learning rate for the optimizer. Default is 1e-2.
        number_of_epochs : int, optional
                           Number of training epochs. Default is 10.
        scheduler_power  : float, optional
                           Power parameter for the polynomial learning rate scheduler. Default is 1.
        save_at_every    : int
                           Save model weights every `save_at_every` epochs. Default is 1.
        max_norm         : float, optional
                           By default it is None, when set clips the gradient with the given threshold.
        weights_filename : str, optional
                           Filename for saving model weights. If None, weights are not saved.

        Notes
        -----
        - Uses AdamW optimizer and PolynomialLR scheduler.
        - Logs loss at each epoch and saves weights periodically.
        """
        optimizer = torch.optim.AdamW(self.parameters(), lr=learning_rate)
        scheduler = torch.optim.lr_scheduler.PolynomialLR(
            optimizer,
            total_iters=number_of_epochs,
            power=scheduler_power,
            last_epoch=-1,
        )
        t_epoch = tqdm(range(number_of_epochs), leave=False, dynamic_ncols=True)
        for epoch_id in t_epoch:
            optimizer.zero_grad()
            estimates = self.forward(points)
            loss = self.evaluate(
                estimates,
                ground_truth,
                epoch_id=epoch_id,
                epoch_count=number_of_epochs,
                weights=loss_weights,
            )
            loss.backward(retain_graph=True)
            if not isinstance(max_norm, type(None)):
                torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm)
            optimizer.step()
            scheduler.step()
            description = "gaussian_3d_volume model loss:{:.4f}".format(loss.item())
            t_epoch.set_description(description)
            if epoch_id % save_at_every == save_at_every - 1:
                self.save_weights(weights_filename)
        logger.info(description)

    def evaluate(
        self,
        estimate,
        ground_truth,
        epoch_id=0,
        epoch_count=1,
        weights={
            "content": {
                "l2": 1e0,
                "l1": 0e-0,
            },
            "alpha": {"smaller": 0e-0, "larger": 0e-0, "threshold": [0.0, 1.0]},
            "scale": {
                "smaller": 0e-0,
                "larger": 0e-0,
                "threshold": [0.0, 1.0],
            },
            "alpha": 0e-0,
            "angle": 0e-0,
            "center": 0e-0,
            "utilization": {"l2": 0e0, "percentile": 0},
        },
    ):
        """
        Parameters
        ----------
        estimate     : torch.Tensor
                       Model's output estimate.
        ground_truth : torch.Tensor
                       Ground truth values.
        epoch_id     : int, optional
                       ID of the starting epoch. Default: 0.
        epoch_count  : int, optional
                       Total number of epochs for training. Default: 1.
        weights      : dict, optional
                       Dictionary containing weights for various loss components:
                       - content: {'l2': float, 'l1': float}
                       - scale: {'smaller': float, 'larger': float, 'threshold': List[float]}
                       - alpha: {'smaller': float, 'larger': float, 'threshold': List[float]}
                       - angle : float
                       - center: float
                       - utilization: {'l2': float, 'percentile': int}
        """
        loss = 0.0
        if weights["content"]["l2"] != 0.0:
            loss_l2_content = self.l2_loss(estimate, ground_truth)
            loss += weights["content"]["l2"] * loss_l2_content
        if weights["content"]["l1"] != 0.0:
            loss_l1_content = self.l1_loss(estimate, ground_truth)
            loss += weights["content"]["l1"] * loss_l1_content
        if weights["scale"]["smaller"] != 0.0:
            threshold = weights["scale"]["threshold"][0]
            loss_scales_smaller = torch.sum(
                torch.abs(self.scales[self.scales < threshold])
            )
            loss += loss_scales_smaller * weights["scale"]["smaller"]
        if weights["scale"]["larger"] != 0.0:
            threshold = weights["scale"]["threshold"][1]
            loss_scales_larger = torch.sum(self.scales[self.scales > threshold])
            loss += loss_scales_larger * weights["scale"]["larger"]
        if weights["alpha"]["smaller"] != 0.0:
            threshold = weights["alpha"]["threshold"][0]
            loss_alphas_smaller = torch.sum(
                torch.abs(self.alphas[self.alphas < threshold])
            )
            loss += loss_alphas_smaller * weights["alpha"]["smaller"]
        if weights["alpha"]["larger"] != 0.0:
            threshold = weights["alpha"]["threshold"][1]
            loss_alphas_larger = torch.sum(self.alphas[self.alphas > threshold])
            loss += loss_alphas_larger * weights["alpha"]["larger"]
        if weights["angle"] != 0.0:
            loss_angle = torch.sum(self.angles[self.angles > 1.0]) + torch.sum(
                torch.abs(self.angles[self.angles < -1.0])
            )
            loss += weights["angle"] * loss_angle
        if weights["center"] != 0.0:
            centers = torch.abs(self.centers)
            loss_center = torch.sum(centers[centers > 1.0])
            loss += weights["center"] * loss_center
        if weights["utilization"]["l2"] != 0:
            n = self.alphas.numel()
            k = int(weights["utilization"]["percentile"] / 100.0 * n)
            _, low_indices = torch.topk(torch.abs(self.alphas), k, dim=0, largest=False)
            _, high_indices = torch.topk(torch.abs(self.alphas), k, dim=0, largest=True)
            loss_utilization = (
                torch.abs(
                    torch.std(self.centers[low_indices, 0])
                    - torch.std(self.centers[high_indices, 0])
                )
                + torch.abs(
                    torch.std(self.centers[low_indices, 1])
                    - torch.std(self.centers[high_indices, 1])
                )
                + torch.abs(
                    torch.std(self.centers[low_indices, 2])
                    - torch.std(self.centers[high_indices, 2])
                )
                + torch.abs(
                    torch.mean(self.centers[low_indices, 0])
                    - torch.mean(self.centers[high_indices, 0])
                )
                + torch.abs(
                    torch.mean(self.centers[low_indices, 1])
                    - torch.mean(self.centers[high_indices, 1])
                )
                + torch.abs(
                    torch.mean(self.centers[low_indices, 2])
                    - torch.mean(self.centers[high_indices, 2])
                )
                + torch.abs(
                    torch.std(self.scales[low_indices, 0])
                    - torch.std(self.scales[high_indices, 0])
                )
                + torch.abs(
                    torch.std(self.scales[low_indices, 1])
                    - torch.std(self.scales[high_indices, 1])
                )
                + torch.abs(
                    torch.std(self.scales[low_indices, 2])
                    - torch.std(self.scales[high_indices, 2])
                )
                + torch.abs(
                    torch.mean(self.scales[low_indices, 0])
                    - torch.mean(self.scales[high_indices, 0])
                )
                + torch.abs(
                    torch.mean(self.scales[low_indices, 1])
                    - torch.mean(self.scales[high_indices, 1])
                )
                + torch.abs(
                    torch.mean(self.scales[low_indices, 2])
                    - torch.mean(self.scales[high_indices, 2])
                )
                + torch.abs(
                    torch.mean(self.alphas[low_indices])
                    - torch.mean(self.alphas[high_indices])
                )
                + torch.abs(
                    torch.std(self.alphas[low_indices])
                    - torch.std(self.alphas[high_indices])
                )
            )
            loss_distribution = (
                torch.std(self.centers[:, 0])
                + torch.std(self.centers[:, 1])
                + torch.std(self.centers[:, 2])
                + torch.std(self.scales[:, 0])
                + torch.std(self.scales[:, 1])
                + torch.std(self.scales[:, 2])
                + torch.std(self.alphas)
            )
            decay = 1.0 - ((epoch_count - epoch_id) / epoch_count)
            loss += (
                decay
                * weights["utilization"]["l2"]
                * (loss_distribution + loss_utilization)
            )
        return loss

    def save_weights(self, weights_filename):
        """
        Save the model weights to a specified file.


        Parameters
        ----------
        weights_filename : str
                            Path or filename where the weights will be saved. The path can include
                            relative paths and tilde notation (~), which will be expanded by `validate_path`.


        Example:
        --------
        # Save model weights to current directory with filename 'model_weights.pth'
        save_weights('model_weights.pth')

        # Save model weights to home directory using ~ notation
        save_weights('~/.weights.pth')

        Raises
        ------
        ValueError : If path validation fails or extension is not allowed.
        """
        safe_path = validate_path(
            weights_filename, allowed_extensions=[".pth", ".pt", ".bin"]
        )
        torch.save(self.state_dict(), safe_path)
        logger.info("gaussian_3d_volume model weights saved: {}".format(safe_path))

    def load_weights(self, weights_filename=None, device=torch.device("cpu")):
        """
        Load model weights from a file.

        Parameters
        ----------
        weights_filename : str
                            Path to the weights file. If None, no weights are loaded.
        device           : torch.device, optional
                            Device to load the weights onto (default: 'cpu').

        Raises
        ------
        ValueError       : If path validation fails or extension is not allowed.
        FileNotFoundError: If file does not exist after validation.

        Notes
        -----
        - If `weights_filename` is a valid file, the model state is updated and set to eval mode.
        - The file path is validated for security (tilde expanded, path traversal blocked).
        - A log message is emitted upon successful loading.
        """
        if not isinstance(weights_filename, type(None)):
            safe_path = validate_path(
                weights_filename, allowed_extensions=[".pth", ".pt", ".bin"]
            )
            if os.path.isfile(safe_path):
                self.load_state_dict(
                    torch.load(safe_path, weights_only=True, map_location=device)
                )
                self.eval()
                logger.info(
                    "gaussian_3d_volume model weights loaded: {}".format(safe_path)
                )

__init__(number_of_elements=10, initial_centers=None, initial_angles=None, initial_scales=None, initial_alphas=None)

Initialize the 3D Gaussian volume model.

Parameters:

  • number_of_elements (int, default: 10 ) –
                Number of Gaussian elements in the volume (default: 10).
    
  • initial_centers
                Initial centers of the Gaussians (shape: [N, 3]).
    
  • initial_angles
                Initial angles for orientation.
    
  • initial_scales
                Initial scales for variance.
    
  • initial_alphas
                Initial alphas for blending.
    
Device Placement

All parameters are initialized on CPU by default. For GPU acceleration, call .to(device) after initializing this model. Example: model = gaussian_3d_volume().cuda() # or .to('cuda')

Source code in odak/learn/models/gaussians.py
def __init__(
    self,
    number_of_elements=10,
    initial_centers=None,
    initial_angles=None,
    initial_scales=None,
    initial_alphas=None,
):
    """
    Initialize the 3D Gaussian volume model.

    Parameters
    ----------
    number_of_elements : int
                        Number of Gaussian elements in the volume (default: 10).
    initial_centers    : torch.Tensor or None
                        Initial centers of the Gaussians (shape: [N, 3]).
    initial_angles     : torch.Tensor or None
                        Initial angles for orientation.
    initial_scales     : torch.Tensor or None
                        Initial scales for variance.
    initial_alphas     : torch.Tensor or None
                        Initial alphas for blending.

    Device Placement
    ----------- --
    All parameters are initialized on CPU by default. For GPU acceleration,
    call .to(device) after initializing this model.
    Example:
        model = gaussian_3d_volume().cuda()  # or .to('cuda')
    """
    super(gaussian_3d_volume, self).__init__()
    self.number_of_elements = number_of_elements
    self.initialize_parameters(
        centers=initial_centers,
        angles=initial_angles,
        scales=initial_scales,
        alphas=initial_alphas,
    )
    self.l2_loss = torch.nn.MSELoss()
    self.l1_loss = torch.nn.L1Loss()

evaluate(estimate, ground_truth, epoch_id=0, epoch_count=1, weights={'content': {'l2': 1.0, 'l1': 0.0}, 'alpha': {'smaller': 0.0, 'larger': 0.0, 'threshold': [0.0, 1.0]}, 'scale': {'smaller': 0.0, 'larger': 0.0, 'threshold': [0.0, 1.0]}, 'alpha': 0.0, 'angle': 0.0, 'center': 0.0, 'utilization': {'l2': 0.0, 'percentile': 0}})

Parameters:

  • estimate
           Model's output estimate.
    
  • ground_truth (Tensor) –
           Ground truth values.
    
  • epoch_id
           ID of the starting epoch. Default: 0.
    
  • epoch_count
           Total number of epochs for training. Default: 1.
    
  • weights
           Dictionary containing weights for various loss components:
           - content: {'l2': float, 'l1': float}
           - scale: {'smaller': float, 'larger': float, 'threshold': List[float]}
           - alpha: {'smaller': float, 'larger': float, 'threshold': List[float]}
           - angle : float
           - center: float
           - utilization: {'l2': float, 'percentile': int}
    
Source code in odak/learn/models/gaussians.py
def evaluate(
    self,
    estimate,
    ground_truth,
    epoch_id=0,
    epoch_count=1,
    weights={
        "content": {
            "l2": 1e0,
            "l1": 0e-0,
        },
        "alpha": {"smaller": 0e-0, "larger": 0e-0, "threshold": [0.0, 1.0]},
        "scale": {
            "smaller": 0e-0,
            "larger": 0e-0,
            "threshold": [0.0, 1.0],
        },
        "alpha": 0e-0,
        "angle": 0e-0,
        "center": 0e-0,
        "utilization": {"l2": 0e0, "percentile": 0},
    },
):
    """
    Parameters
    ----------
    estimate     : torch.Tensor
                   Model's output estimate.
    ground_truth : torch.Tensor
                   Ground truth values.
    epoch_id     : int, optional
                   ID of the starting epoch. Default: 0.
    epoch_count  : int, optional
                   Total number of epochs for training. Default: 1.
    weights      : dict, optional
                   Dictionary containing weights for various loss components:
                   - content: {'l2': float, 'l1': float}
                   - scale: {'smaller': float, 'larger': float, 'threshold': List[float]}
                   - alpha: {'smaller': float, 'larger': float, 'threshold': List[float]}
                   - angle : float
                   - center: float
                   - utilization: {'l2': float, 'percentile': int}
    """
    loss = 0.0
    if weights["content"]["l2"] != 0.0:
        loss_l2_content = self.l2_loss(estimate, ground_truth)
        loss += weights["content"]["l2"] * loss_l2_content
    if weights["content"]["l1"] != 0.0:
        loss_l1_content = self.l1_loss(estimate, ground_truth)
        loss += weights["content"]["l1"] * loss_l1_content
    if weights["scale"]["smaller"] != 0.0:
        threshold = weights["scale"]["threshold"][0]
        loss_scales_smaller = torch.sum(
            torch.abs(self.scales[self.scales < threshold])
        )
        loss += loss_scales_smaller * weights["scale"]["smaller"]
    if weights["scale"]["larger"] != 0.0:
        threshold = weights["scale"]["threshold"][1]
        loss_scales_larger = torch.sum(self.scales[self.scales > threshold])
        loss += loss_scales_larger * weights["scale"]["larger"]
    if weights["alpha"]["smaller"] != 0.0:
        threshold = weights["alpha"]["threshold"][0]
        loss_alphas_smaller = torch.sum(
            torch.abs(self.alphas[self.alphas < threshold])
        )
        loss += loss_alphas_smaller * weights["alpha"]["smaller"]
    if weights["alpha"]["larger"] != 0.0:
        threshold = weights["alpha"]["threshold"][1]
        loss_alphas_larger = torch.sum(self.alphas[self.alphas > threshold])
        loss += loss_alphas_larger * weights["alpha"]["larger"]
    if weights["angle"] != 0.0:
        loss_angle = torch.sum(self.angles[self.angles > 1.0]) + torch.sum(
            torch.abs(self.angles[self.angles < -1.0])
        )
        loss += weights["angle"] * loss_angle
    if weights["center"] != 0.0:
        centers = torch.abs(self.centers)
        loss_center = torch.sum(centers[centers > 1.0])
        loss += weights["center"] * loss_center
    if weights["utilization"]["l2"] != 0:
        n = self.alphas.numel()
        k = int(weights["utilization"]["percentile"] / 100.0 * n)
        _, low_indices = torch.topk(torch.abs(self.alphas), k, dim=0, largest=False)
        _, high_indices = torch.topk(torch.abs(self.alphas), k, dim=0, largest=True)
        loss_utilization = (
            torch.abs(
                torch.std(self.centers[low_indices, 0])
                - torch.std(self.centers[high_indices, 0])
            )
            + torch.abs(
                torch.std(self.centers[low_indices, 1])
                - torch.std(self.centers[high_indices, 1])
            )
            + torch.abs(
                torch.std(self.centers[low_indices, 2])
                - torch.std(self.centers[high_indices, 2])
            )
            + torch.abs(
                torch.mean(self.centers[low_indices, 0])
                - torch.mean(self.centers[high_indices, 0])
            )
            + torch.abs(
                torch.mean(self.centers[low_indices, 1])
                - torch.mean(self.centers[high_indices, 1])
            )
            + torch.abs(
                torch.mean(self.centers[low_indices, 2])
                - torch.mean(self.centers[high_indices, 2])
            )
            + torch.abs(
                torch.std(self.scales[low_indices, 0])
                - torch.std(self.scales[high_indices, 0])
            )
            + torch.abs(
                torch.std(self.scales[low_indices, 1])
                - torch.std(self.scales[high_indices, 1])
            )
            + torch.abs(
                torch.std(self.scales[low_indices, 2])
                - torch.std(self.scales[high_indices, 2])
            )
            + torch.abs(
                torch.mean(self.scales[low_indices, 0])
                - torch.mean(self.scales[high_indices, 0])
            )
            + torch.abs(
                torch.mean(self.scales[low_indices, 1])
                - torch.mean(self.scales[high_indices, 1])
            )
            + torch.abs(
                torch.mean(self.scales[low_indices, 2])
                - torch.mean(self.scales[high_indices, 2])
            )
            + torch.abs(
                torch.mean(self.alphas[low_indices])
                - torch.mean(self.alphas[high_indices])
            )
            + torch.abs(
                torch.std(self.alphas[low_indices])
                - torch.std(self.alphas[high_indices])
            )
        )
        loss_distribution = (
            torch.std(self.centers[:, 0])
            + torch.std(self.centers[:, 1])
            + torch.std(self.centers[:, 2])
            + torch.std(self.scales[:, 0])
            + torch.std(self.scales[:, 1])
            + torch.std(self.scales[:, 2])
            + torch.std(self.alphas)
        )
        decay = 1.0 - ((epoch_count - epoch_id) / epoch_count)
        loss += (
            decay
            * weights["utilization"]["l2"]
            * (loss_distribution + loss_utilization)
        )
    return loss

forward(points, test=False)

Forward pass: evaluate the 3D Gaussian volume at given points.

Parameters:

  • points
                Input points at which to evaluate the Gaussian volume, where each row is a 3D point.
    
  • test
                If True, disables gradient computation (default: False).
    

Returns:

  • total_intensities ( Tensor ) –

    Total intensities at the input points, weighted by alphas.

Source code in odak/learn/models/gaussians.py
def forward(self, points, test=False):
    """
    Forward pass: evaluate the 3D Gaussian volume at given points.

    Parameters
    ----------
    points            : torch.Tensor,  shape (N, 3)
                        Input points at which to evaluate the Gaussian volume, where each row is a 3D point.
    test              : bool, optional
                        If True, disables gradient computation (default: False).

    Returns
    -------
    total_intensities : torch.Tensor
                        Total intensities at the input points, weighted by alphas.
    """
    if test:
        torch.no_grad()
    intensities = evaluate_3d_gaussians(
        points=points,
        centers=self.centers,
        scales=self.scales,
        angles=self.angles * 180,
        opacity=self.alphas,
    )
    total_intensities = torch.mean(intensities, axis=-1)
    return total_intensities

initialize_parameters(centers=None, angles=None, scales=None, alphas=None, device=torch.device('cpu'))

Initialize model parameters using PyTorch tensors.

Parameters:

  • centers (Tensor, default: None ) –
      If None (default), initializes as a tensor of shape
      (number_of_elements, 3) with values sampled from standard normal distribution.
    
  • angles
      If None (default), initializes similarly to centers: shape (n,3).
    
  • scales
      If None (default), initializes as a tensor of shape
      (number_of_elements, 3) with values uniformly distributed between 0 and 1.
    
  • alphas
      If None (default), initializes as a tensor of shape
      (number_of_elements, 1) with values uniformly distributed between 0 and 1.
    
  • device
      Device to be used to define the parameters.
      Make sure to pass the device you use with this model for proper manual parameter initilization.
    
Source code in odak/learn/models/gaussians.py
def initialize_parameters(
    self,
    centers=None,
    angles=None,
    scales=None,
    alphas=None,
    device=torch.device("cpu"),
):
    """
    Initialize model parameters using PyTorch tensors.

    Parameters
    ----------
    centers : torch.Tensor, optional
              If None (default), initializes as a tensor of shape
              (number_of_elements, 3) with values sampled from standard normal distribution.
    angles  : torch.Tensor, optional
              If None (default), initializes similarly to centers: shape (n,3).
    scales  : torch.Tensor, optional
              If None (default), initializes as a tensor of shape
              (number_of_elements, 3) with values uniformly distributed between 0 and 1.
    alphas  : torch.Tensor, optional
              If None (default), initializes as a tensor of shape
              (number_of_elements, 1) with values uniformly distributed between 0 and 1.
    device  : torch.device
              Device to be used to define the parameters.
              Make sure to pass the device you use with this model for proper manual parameter initilization.
    """
    if isinstance(centers, type(None)):
        centers = torch.randn(self.number_of_elements, 3, device=device)
    if isinstance(angles, type(None)):
        angles = torch.randn(self.number_of_elements, 3, device=device)
    if isinstance(scales, type(None)):
        scales = torch.rand(self.number_of_elements, 3, device=device)
    if isinstance(alphas, type(None)):
        alphas = torch.rand(self.number_of_elements, 1, device=device)
    self.centers = torch.nn.Parameter(centers)
    self.angles = torch.nn.Parameter(angles)
    self.scales = torch.nn.Parameter(scales)
    self.alphas = torch.nn.Parameter(alphas)

load_weights(weights_filename=None, device=torch.device('cpu'))

Load model weights from a file.

Parameters:

  • weights_filename (str, default: None ) –
                Path to the weights file. If None, no weights are loaded.
    
  • device
                Device to load the weights onto (default: 'cpu').
    

Raises:

  • ValueError : If path validation fails or extension is not allowed.
  • FileNotFoundError: If file does not exist after validation.
Notes
  • If weights_filename is a valid file, the model state is updated and set to eval mode.
  • The file path is validated for security (tilde expanded, path traversal blocked).
  • A log message is emitted upon successful loading.
Source code in odak/learn/models/gaussians.py
def load_weights(self, weights_filename=None, device=torch.device("cpu")):
    """
    Load model weights from a file.

    Parameters
    ----------
    weights_filename : str
                        Path to the weights file. If None, no weights are loaded.
    device           : torch.device, optional
                        Device to load the weights onto (default: 'cpu').

    Raises
    ------
    ValueError       : If path validation fails or extension is not allowed.
    FileNotFoundError: If file does not exist after validation.

    Notes
    -----
    - If `weights_filename` is a valid file, the model state is updated and set to eval mode.
    - The file path is validated for security (tilde expanded, path traversal blocked).
    - A log message is emitted upon successful loading.
    """
    if not isinstance(weights_filename, type(None)):
        safe_path = validate_path(
            weights_filename, allowed_extensions=[".pth", ".pt", ".bin"]
        )
        if os.path.isfile(safe_path):
            self.load_state_dict(
                torch.load(safe_path, weights_only=True, map_location=device)
            )
            self.eval()
            logger.info(
                "gaussian_3d_volume model weights loaded: {}".format(safe_path)
            )

optimize(points, ground_truth, loss_weights, learning_rate=0.01, number_of_epochs=10, scheduler_power=1, save_at_every=1, max_norm=None, weights_filename=None)

Optimize model parameters using AdamW and a polynomial learning rate scheduler.

Parameters:

  • points
               Input data points for the model.
    
  • ground_truth
               Ground truth values corresponding to the input points.
    
  • loss_weights
               Dictionary of weights for each loss component.
    
  • learning_rate
               Learning rate for the optimizer. Default is 1e-2.
    
  • number_of_epochs (int, default: 10 ) –
               Number of training epochs. Default is 10.
    
  • scheduler_power
               Power parameter for the polynomial learning rate scheduler. Default is 1.
    
  • save_at_every
               Save model weights every `save_at_every` epochs. Default is 1.
    
  • max_norm
               By default it is None, when set clips the gradient with the given threshold.
    
  • weights_filename (str, default: None ) –
               Filename for saving model weights. If None, weights are not saved.
    
Notes
  • Uses AdamW optimizer and PolynomialLR scheduler.
  • Logs loss at each epoch and saves weights periodically.
Source code in odak/learn/models/gaussians.py
def optimize(
    self,
    points,
    ground_truth,
    loss_weights,
    learning_rate=1e-2,
    number_of_epochs=10,
    scheduler_power=1,
    save_at_every=1,
    max_norm=None,
    weights_filename=None,
):
    """
    Optimize model parameters using AdamW and a polynomial learning rate scheduler.

    Parameters
    ----------
    points           : torch.Tensor
                       Input data points for the model.
    ground_truth     : torch.Tensor
                       Ground truth values corresponding to the input points.
    loss_weights     : dict
                       Dictionary of weights for each loss component.
    learning_rate    : float, optional
                       Learning rate for the optimizer. Default is 1e-2.
    number_of_epochs : int, optional
                       Number of training epochs. Default is 10.
    scheduler_power  : float, optional
                       Power parameter for the polynomial learning rate scheduler. Default is 1.
    save_at_every    : int
                       Save model weights every `save_at_every` epochs. Default is 1.
    max_norm         : float, optional
                       By default it is None, when set clips the gradient with the given threshold.
    weights_filename : str, optional
                       Filename for saving model weights. If None, weights are not saved.

    Notes
    -----
    - Uses AdamW optimizer and PolynomialLR scheduler.
    - Logs loss at each epoch and saves weights periodically.
    """
    optimizer = torch.optim.AdamW(self.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.PolynomialLR(
        optimizer,
        total_iters=number_of_epochs,
        power=scheduler_power,
        last_epoch=-1,
    )
    t_epoch = tqdm(range(number_of_epochs), leave=False, dynamic_ncols=True)
    for epoch_id in t_epoch:
        optimizer.zero_grad()
        estimates = self.forward(points)
        loss = self.evaluate(
            estimates,
            ground_truth,
            epoch_id=epoch_id,
            epoch_count=number_of_epochs,
            weights=loss_weights,
        )
        loss.backward(retain_graph=True)
        if not isinstance(max_norm, type(None)):
            torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm)
        optimizer.step()
        scheduler.step()
        description = "gaussian_3d_volume model loss:{:.4f}".format(loss.item())
        t_epoch.set_description(description)
        if epoch_id % save_at_every == save_at_every - 1:
            self.save_weights(weights_filename)
    logger.info(description)

save_weights(weights_filename)

Save the model weights to a specified file.

Parameters:

  • weights_filename (str) –
                Path or filename where the weights will be saved. The path can include
                relative paths and tilde notation (~), which will be expanded by `validate_path`.
    
Example:

Save model weights to current directory with filename 'model_weights.pth'

save_weights('model_weights.pth')

Save model weights to home directory using ~ notation

save_weights('~/.weights.pth')

Raises:

  • ValueError : If path validation fails or extension is not allowed.
Source code in odak/learn/models/gaussians.py
def save_weights(self, weights_filename):
    """
    Save the model weights to a specified file.


    Parameters
    ----------
    weights_filename : str
                        Path or filename where the weights will be saved. The path can include
                        relative paths and tilde notation (~), which will be expanded by `validate_path`.


    Example:
    --------
    # Save model weights to current directory with filename 'model_weights.pth'
    save_weights('model_weights.pth')

    # Save model weights to home directory using ~ notation
    save_weights('~/.weights.pth')

    Raises
    ------
    ValueError : If path validation fails or extension is not allowed.
    """
    safe_path = validate_path(
        weights_filename, allowed_extensions=[".pth", ".pt", ".bin"]
    )
    torch.save(self.state_dict(), safe_path)
    logger.info("gaussian_3d_volume model weights saved: {}".format(safe_path))

gaussians_2d

Bases: Module

Wrapper class for the 2D Gaussian model with loss computation and evaluation utilities.

This class wraps gaussian_2d and provides additional functionality: - Loss functions (L1, L2) pre-initialized - Weight saving/loading methods - Model parameter counting

Parameters:

  • number_of_elements (int, default: 10 ) –
                Number of 2D Gaussian primitives in the model (default: 10).
    
  • logger
                 Logger instance for tracking progress. If None, creates a new one.
    

Attributes:

  • model (gaussian_2d) –

    The underlying primitive Gaussian model.

  • l2_loss (MSELoss) –

    Mean squared error loss function.

  • l1_loss (L1Loss) –

    L1 absolute loss function.

  • logger (Logger) –

    Logger instance for info/debug messages.

Examples:

>>> model = gaussians_2d(number_of_elements=50)
>>> x = torch.linspace(-1, 1, 256)
>>> y = torch.linspace(-1, 1, 256)
>>> X, Y = torch.meshgrid(x, y, indexing='ij')
>>> output = model(X, Y, test=False)
Notes
  • The test flag in forward() controls gradient computation (not recommended use).
  • Use standard training loop with optimizer.zero_grad(), loss.backward(), optimizer.step().
Source code in odak/learn/models/gaussians.py
class gaussians_2d(torch.nn.Module):
    """
    Wrapper class for the 2D Gaussian model with loss computation and evaluation utilities.

    This class wraps `gaussian_2d` and provides additional functionality:
    - Loss functions (L1, L2) pre-initialized
    - Weight saving/loading methods
    - Model parameter counting

    Parameters
    ----------
    number_of_elements : int, optional
                        Number of 2D Gaussian primitives in the model (default: 10).
    logger             : logging.Logger or None, optional
                         Logger instance for tracking progress. If None, creates a new one.

    Attributes
    ----------
    model       : gaussian_2d
                  The underlying primitive Gaussian model.
    l2_loss     : torch.nn.MSELoss
                  Mean squared error loss function.
    l1_loss     : torch.nn.L1Loss
                  L1 absolute loss function.
    logger      : logging.Logger
                  Logger instance for info/debug messages.

    Examples
    --------
    >>> model = gaussians_2d(number_of_elements=50)
    >>> x = torch.linspace(-1, 1, 256)
    >>> y = torch.linspace(-1, 1, 256)
    >>> X, Y = torch.meshgrid(x, y, indexing='ij')
    >>> output = model(X, Y, test=False)

    Notes
    -----
    - The `test` flag in forward() controls gradient computation (not recommended use).
    - Use standard training loop with optimizer.zero_grad(), loss.backward(), optimizer.step().
    """

    def __init__(
        self,
        number_of_elements=10,
        logger=None,
    ):
        """
        Initialize the gaussians_2d wrapper model.

        Parameters
        ----------
        number_of_elements : int
                            Number of 2D Gaussian elements (default: 10).
        logger             : logging.Logger or None
                             Logger instance (default: creates new logger).
        """
        super(gaussians_2d, self).__init__()

        if not isinstance(number_of_elements, int) or number_of_elements <= 0:
            raise ValueError(
                "number_of_elements must be a positive integer, got {}".format(
                    type(number_of_elements).__name__ if not isinstance(number_of_elements, int) else str(number_of_elements)
                )
            )

        self.number_of_elements = number_of_elements
        self.model = gaussian_2d(
            number_of_elements=self.number_of_elements
        )

        # Count total trainable parameters
        self.total_params = sum(p.numel() for p in self.parameters() if p.requires_grad)

        # Setup logger
        from ...log import logger as default_logger

        self.logger = logger if logger is not None else default_logger

    def forward(
        self,
        x,
        y,
        test=False,
    ):
        """
        Forward pass through the Gaussian model.

        Parameters
        ----------
        x      : torch.Tensor
                 X-coordinates of evaluation grid.
        y      : torch.Tensor
                 Y-coordinates of evaluation grid.
        test   : bool, optional
                 If True, runs in no_grad mode (default: False).

        Returns
        -------
        result : torch.Tensor
                 The summed Gaussian field with shape matching x/y grids plus one dimension.

        Notes
        -----
        The `test` flag is deprecated. Use standard training pattern:
        ```python
        model.train()  # Enable gradients
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        ```
        """
        if test:
            with torch.no_grad():
                result = self.model(x=x, y=y)
        else:
            result = self.model(x=x, y=y)

        # Sum over Gaussian elements and add batch dimension
        result = torch.sum(result, dim=-1).unsqueeze(-1)

        return result

    def save_weights(self, weights_filename):
        """
        Save model weights to a file.

        Parameters
        ----------
        weights_filename : str
                          Path to save weights (must end with .pt, .pth, or similar).
        """
        from ...tools.file import validate_path

        safe_path = validate_path(
            os.path.expanduser(weights_filename), 
            allowed_extensions=[".pt", ".pth"]
        )
        torch.save(self.state_dict(), safe_path)
        self.logger.info("Model weights saved to: {}".format(safe_path))

    def load_weights(
        self, 
        weights_filename=None,
        device=torch.device("cpu")
    ):
        """
        Load model weights from a file.

        Parameters
        ----------
        weights_filename : str or None
                          Path to weights file. If None, skips loading.
        device           : torch.device, optional
                          Device to load weights onto (default: CPU).
        """
        if weights_filename is not None:
            from ...tools.file import validate_path

            safe_path = validate_path(
                os.path.expanduser(weights_filename),
                allowed_extensions=[".pt", ".pth"]
            )

            if not os.path.isfile(safe_path):
                raise FileNotFoundError("Weights file not found: {}".format(safe_path))

            self.load_state_dict(
                torch.load(safe_path, weights_only=True, map_location=device)
            )
            self.eval()  # Set to evaluation mode
            self.logger.info("Model weights loaded from: {}".format(safe_path))

__init__(number_of_elements=10, logger=None)

Initialize the gaussians_2d wrapper model.

Parameters:

  • number_of_elements (int, default: 10 ) –
                Number of 2D Gaussian elements (default: 10).
    
  • logger
                 Logger instance (default: creates new logger).
    
Source code in odak/learn/models/gaussians.py
def __init__(
    self,
    number_of_elements=10,
    logger=None,
):
    """
    Initialize the gaussians_2d wrapper model.

    Parameters
    ----------
    number_of_elements : int
                        Number of 2D Gaussian elements (default: 10).
    logger             : logging.Logger or None
                         Logger instance (default: creates new logger).
    """
    super(gaussians_2d, self).__init__()

    if not isinstance(number_of_elements, int) or number_of_elements <= 0:
        raise ValueError(
            "number_of_elements must be a positive integer, got {}".format(
                type(number_of_elements).__name__ if not isinstance(number_of_elements, int) else str(number_of_elements)
            )
        )

    self.number_of_elements = number_of_elements
    self.model = gaussian_2d(
        number_of_elements=self.number_of_elements
    )

    # Count total trainable parameters
    self.total_params = sum(p.numel() for p in self.parameters() if p.requires_grad)

    # Setup logger
    from ...log import logger as default_logger

    self.logger = logger if logger is not None else default_logger

forward(x, y, test=False)

Forward pass through the Gaussian model.

Parameters:

  • x
     X-coordinates of evaluation grid.
    
  • y
     Y-coordinates of evaluation grid.
    
  • test
     If True, runs in no_grad mode (default: False).
    

Returns:

  • result ( Tensor ) –

    The summed Gaussian field with shape matching x/y grids plus one dimension.

Notes

The test flag is deprecated. Use standard training pattern:

model.train()  # Enable gradients
loss = criterion(output, target)
loss.backward()
optimizer.step()

Source code in odak/learn/models/gaussians.py
def forward(
    self,
    x,
    y,
    test=False,
):
    """
    Forward pass through the Gaussian model.

    Parameters
    ----------
    x      : torch.Tensor
             X-coordinates of evaluation grid.
    y      : torch.Tensor
             Y-coordinates of evaluation grid.
    test   : bool, optional
             If True, runs in no_grad mode (default: False).

    Returns
    -------
    result : torch.Tensor
             The summed Gaussian field with shape matching x/y grids plus one dimension.

    Notes
    -----
    The `test` flag is deprecated. Use standard training pattern:
    ```python
    model.train()  # Enable gradients
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    ```
    """
    if test:
        with torch.no_grad():
            result = self.model(x=x, y=y)
    else:
        result = self.model(x=x, y=y)

    # Sum over Gaussian elements and add batch dimension
    result = torch.sum(result, dim=-1).unsqueeze(-1)

    return result

load_weights(weights_filename=None, device=torch.device('cpu'))

Load model weights from a file.

Parameters:

  • weights_filename (str or None, default: None ) –
              Path to weights file. If None, skips loading.
    
  • device
              Device to load weights onto (default: CPU).
    
Source code in odak/learn/models/gaussians.py
def load_weights(
    self, 
    weights_filename=None,
    device=torch.device("cpu")
):
    """
    Load model weights from a file.

    Parameters
    ----------
    weights_filename : str or None
                      Path to weights file. If None, skips loading.
    device           : torch.device, optional
                      Device to load weights onto (default: CPU).
    """
    if weights_filename is not None:
        from ...tools.file import validate_path

        safe_path = validate_path(
            os.path.expanduser(weights_filename),
            allowed_extensions=[".pt", ".pth"]
        )

        if not os.path.isfile(safe_path):
            raise FileNotFoundError("Weights file not found: {}".format(safe_path))

        self.load_state_dict(
            torch.load(safe_path, weights_only=True, map_location=device)
        )
        self.eval()  # Set to evaluation mode
        self.logger.info("Model weights loaded from: {}".format(safe_path))

save_weights(weights_filename)

Save model weights to a file.

Parameters:

  • weights_filename (str) –
              Path to save weights (must end with .pt, .pth, or similar).
    
Source code in odak/learn/models/gaussians.py
def save_weights(self, weights_filename):
    """
    Save model weights to a file.

    Parameters
    ----------
    weights_filename : str
                      Path to save weights (must end with .pt, .pth, or similar).
    """
    from ...tools.file import validate_path

    safe_path = validate_path(
        os.path.expanduser(weights_filename), 
        allowed_extensions=[".pt", ".pth"]
    )
    torch.save(self.state_dict(), safe_path)
    self.logger.info("Model weights saved to: {}".format(safe_path))

global_feature_module

Bases: Module

A global feature layer that processes global features from input channels and applies them to another input tensor via learned transformations.

Source code in odak/learn/models/components.py
class global_feature_module(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies them to another input tensor via learned transformations.
    """

    def __init__(
        self,
        input_channels,
        mid_channels,
        output_channels,
        kernel_size,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels  : int
                          Number of mid channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.transformations_1 = global_transformations(input_channels, output_channels)
        self.global_features_1 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.global_features_2 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.transformations_2 = global_transformations(input_channels, output_channels)

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        global_tensor_1 = self.transformations_1(x1, x2)
        y1 = self.global_features_1(global_tensor_1)
        y2 = self.global_features_2(y1)
        global_tensor_2 = self.transformations_2(y1, y2)
        return global_tensor_2

__init__(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of mid channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    mid_channels,
    output_channels,
    kernel_size,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels  : int
                      Number of mid channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.transformations_1 = global_transformations(input_channels, output_channels)
    self.global_features_1 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.global_features_2 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.transformations_2 = global_transformations(input_channels, output_channels)

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    global_tensor_1 = self.transformations_1(x1, x2)
    y1 = self.global_features_1(global_tensor_1)
    y2 = self.global_features_2(y1)
    global_tensor_2 = self.transformations_2(y1, y2)
    return global_tensor_2

global_transformations

Bases: Module

A global feature layer that processes global features from input channels and applies learned transformations to another input tensor.

This implementation is adapted from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Reference: J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."

Source code in odak/learn/models/components.py
class global_transformations(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies learned transformations to another input tensor.

    This implementation is adapted from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Reference:
    J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."
    """

    def __init__(self, input_channels, output_channels):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        """
        super().__init__()
        self.global_feature_1 = torch.nn.Sequential(
            torch.nn.Linear(input_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_2 = torch.nn.Sequential(
            torch.nn.Linear(output_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y = torch.mean(x2, dim=(2, 3))
        y1 = self.global_feature_1(y)
        y2 = self.global_feature_2(y1)
        y1 = y1.unsqueeze(2).unsqueeze(3)
        y2 = y2.unsqueeze(2).unsqueeze(3)
        result = x1 * y1 + y2
        return result

__init__(input_channels, output_channels)

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
Source code in odak/learn/models/components.py
def __init__(self, input_channels, output_channels):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    """
    super().__init__()
    self.global_feature_1 = torch.nn.Sequential(
        torch.nn.Linear(input_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_2 = torch.nn.Sequential(
        torch.nn.Linear(output_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y = torch.mean(x2, dim=(2, 3))
    y1 = self.global_feature_1(y)
    y2 = self.global_feature_2(y1)
    y1 = y1.unsqueeze(2).unsqueeze(3)
    y2 = y2.unsqueeze(2).unsqueeze(3)
    result = x1 * y1 + y2
    return result

multi_layer_perceptron

Bases: Module

A multi-layer perceptron model.

Source code in odak/learn/models/models.py
class multi_layer_perceptron(torch.nn.Module):
    """
    A multi-layer perceptron model.
    """

    def __init__(
        self,
        dimensions,
        activation=torch.nn.ReLU(),
        bias=False,
        model_type="conventional",
        siren_multiplier=1.0,
        input_multiplier=None,
    ):
        """
        Initialize the multi-layer perceptron.

        Parameters
        ----------
        dimensions : list of int
            List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
        activation : torch.nn.Module, optional
            Nonlinear activation function. Default is `torch.nn.ReLU()`.
        bias : bool, optional
            If set to True, linear layers will include biases. Default is False.
        siren_multiplier : float, optional
            When using `SIREN` model type, this parameter functions as a hyperparameter.
            The original SIREN work uses 30.
            You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
        input_multiplier : float, optional
            Initial value of the input multiplier before the very first layer.
        model_type : str, optional
            Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
            `conventional` refers to a standard multi layer perceptron.
            For `SIREN`, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
            For `Swish`, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017).
            For `FILM SIREN`, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
            For `Gaussian`, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
            Default is "conventional".
        """
        super(multi_layer_perceptron, self).__init__()
        self.activation = activation
        self.bias = bias
        self.model_type = model_type
        self.layers = torch.nn.ModuleList()
        self.siren_multiplier = siren_multiplier
        self.dimensions = dimensions
        logger.info(
            f"Initializing multi_layer_perceptron: model_type={model_type}, "
            f"dimensions={dimensions}, bias={bias}, "
            f"siren_multiplier={siren_multiplier}"
        )
        for i in range(len(self.dimensions) - 1):
            self.layers.append(
                torch.nn.Linear(
                    self.dimensions[i], self.dimensions[i + 1], bias=self.bias
                )
            )
        if not isinstance(input_multiplier, type(None)):
            self.input_multiplier = torch.nn.ParameterList()
            self.input_multiplier.append(
                torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier)
            )
            logger.debug(f"Input multiplier initialized: {input_multiplier}")
        if self.model_type == "FILM SIREN":
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1::]:
                self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
            logger.debug("FILM SIREN alpha parameters initialized")
        if self.model_type == "Gaussian":
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1::]:
                self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))
            logger.debug("Gaussian alpha parameters initialized")

    def forward(self, x):
        """
        Forward pass of the multi-layer perceptron.

        Parameters
        ----------
        x : torch.Tensor
            Input data.

        Returns
        -------
        result : torch.Tensor
            Estimated output.
        """
        if hasattr(self, "input_multiplier"):
            result = x * self.input_multiplier[0]
        else:
            result = x
        for layer_id, layer in enumerate(self.layers):
            result = layer(result)
            if self.model_type == "conventional" and layer_id != len(self.layers) - 1:
                result = self.activation(result)
            elif self.model_type == "swish" and layer_id != len(self.layers) - 1:
                result = swish(result)
            elif self.model_type == "SIREN" and layer_id != len(self.layers) - 1:
                result = torch.sin(result * self.siren_multiplier)
            elif self.model_type == "FILM SIREN" and layer_id != len(self.layers) - 1:
                result = torch.sin(
                    self.alpha[layer_id][0] * result + self.alpha[layer_id][1]
                )
            elif self.model_type == "Gaussian" and layer_id != len(self.layers) - 1:
                result = gaussian(result, self.alpha[layer_id][0])
        return result

__init__(dimensions, activation=torch.nn.ReLU(), bias=False, model_type='conventional', siren_multiplier=1.0, input_multiplier=None)

Initialize the multi-layer perceptron.

Parameters:

  • dimensions (list of int) –

    List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).

  • activation (Module, default: ReLU() ) –

    Nonlinear activation function. Default is torch.nn.ReLU().

  • bias (bool, default: False ) –

    If set to True, linear layers will include biases. Default is False.

  • siren_multiplier (float, default: 1.0 ) –

    When using SIREN model type, this parameter functions as a hyperparameter. The original SIREN work uses 30. You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.

  • input_multiplier (float, default: None ) –

    Initial value of the input multiplier before the very first layer.

  • model_type (str, default: 'conventional' ) –

    Model type: conventional, swish, SIREN, FILM SIREN, Gaussian. conventional refers to a standard multi layer perceptron. For SIREN, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473. For Swish, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). For FILM SIREN, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021. For Gaussian, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022. Default is "conventional".

Source code in odak/learn/models/models.py
def __init__(
    self,
    dimensions,
    activation=torch.nn.ReLU(),
    bias=False,
    model_type="conventional",
    siren_multiplier=1.0,
    input_multiplier=None,
):
    """
    Initialize the multi-layer perceptron.

    Parameters
    ----------
    dimensions : list of int
        List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
    activation : torch.nn.Module, optional
        Nonlinear activation function. Default is `torch.nn.ReLU()`.
    bias : bool, optional
        If set to True, linear layers will include biases. Default is False.
    siren_multiplier : float, optional
        When using `SIREN` model type, this parameter functions as a hyperparameter.
        The original SIREN work uses 30.
        You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
    input_multiplier : float, optional
        Initial value of the input multiplier before the very first layer.
    model_type : str, optional
        Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
        `conventional` refers to a standard multi layer perceptron.
        For `SIREN`, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
        For `Swish`, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017).
        For `FILM SIREN`, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
        For `Gaussian`, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
        Default is "conventional".
    """
    super(multi_layer_perceptron, self).__init__()
    self.activation = activation
    self.bias = bias
    self.model_type = model_type
    self.layers = torch.nn.ModuleList()
    self.siren_multiplier = siren_multiplier
    self.dimensions = dimensions
    logger.info(
        f"Initializing multi_layer_perceptron: model_type={model_type}, "
        f"dimensions={dimensions}, bias={bias}, "
        f"siren_multiplier={siren_multiplier}"
    )
    for i in range(len(self.dimensions) - 1):
        self.layers.append(
            torch.nn.Linear(
                self.dimensions[i], self.dimensions[i + 1], bias=self.bias
            )
        )
    if not isinstance(input_multiplier, type(None)):
        self.input_multiplier = torch.nn.ParameterList()
        self.input_multiplier.append(
            torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier)
        )
        logger.debug(f"Input multiplier initialized: {input_multiplier}")
    if self.model_type == "FILM SIREN":
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1::]:
            self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
        logger.debug("FILM SIREN alpha parameters initialized")
    if self.model_type == "Gaussian":
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1::]:
            self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))
        logger.debug("Gaussian alpha parameters initialized")

forward(x)

Forward pass of the multi-layer perceptron.

Parameters:

  • x (Tensor) –

    Input data.

Returns:

  • result ( Tensor ) –

    Estimated output.

Source code in odak/learn/models/models.py
def forward(self, x):
    """
    Forward pass of the multi-layer perceptron.

    Parameters
    ----------
    x : torch.Tensor
        Input data.

    Returns
    -------
    result : torch.Tensor
        Estimated output.
    """
    if hasattr(self, "input_multiplier"):
        result = x * self.input_multiplier[0]
    else:
        result = x
    for layer_id, layer in enumerate(self.layers):
        result = layer(result)
        if self.model_type == "conventional" and layer_id != len(self.layers) - 1:
            result = self.activation(result)
        elif self.model_type == "swish" and layer_id != len(self.layers) - 1:
            result = swish(result)
        elif self.model_type == "SIREN" and layer_id != len(self.layers) - 1:
            result = torch.sin(result * self.siren_multiplier)
        elif self.model_type == "FILM SIREN" and layer_id != len(self.layers) - 1:
            result = torch.sin(
                self.alpha[layer_id][0] * result + self.alpha[layer_id][1]
            )
        elif self.model_type == "Gaussian" and layer_id != len(self.layers) - 1:
            result = gaussian(result, self.alpha[layer_id][0])
    return result

non_local_layer

Bases: Module

Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)

Source code in odak/learn/models/components.py
class non_local_layer(torch.nn.Module):
    """
    Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)
    """

    def __init__(
        self,
        input_channels=1024,
        bottleneck_channels=512,
        kernel_size=1,
        bias=False,
    ):
        """

        Parameters
        ----------
        input_channels      : int
                              Number of input channels.
        bottleneck_channels : int
                              Number of middle channels.
        kernel_size         : int
                              Kernel size.
        bias                : bool
                              Set to True to let convolutional layers have bias term.
        """
        super(non_local_layer, self).__init__()
        self.input_channels = input_channels
        self.bottleneck_channels = bottleneck_channels
        self.g = torch.nn.Conv2d(
            self.input_channels,
            self.bottleneck_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        self.W_z = torch.nn.Sequential(
            torch.nn.Conv2d(
                self.bottleneck_channels,
                self.input_channels,
                kernel_size=kernel_size,
                bias=bias,
                padding=kernel_size // 2,
            ),
            torch.nn.BatchNorm2d(self.input_channels),
        )
        torch.nn.init.constant_(self.W_z[1].weight, 0)
        torch.nn.init.constant_(self.W_z[1].bias, 0)

    def forward(self, x):
        """
        Forward model [zi = Wzyi + xi]

        Parameters
        ----------
        x               : torch.tensor
                          First input data.


        Returns
        ----------
        z               : torch.tensor
                          Estimated output.
        """
        batch_size, channels, height, width = x.size()
        theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
        phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
        g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
        attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
        attn = torch.nn.functional.softmax(attn, dim=-1)
        y = (
            torch.bmm(attn, g)
            .permute(0, 2, 1)
            .contiguous()
            .view(batch_size, self.bottleneck_channels, height, width)
        )
        W_y = self.W_z(y)
        z = W_y + x
        return z

__init__(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)

Parameters:

  • input_channels
                  Number of input channels.
    
  • bottleneck_channels (int, default: 512 ) –
                  Number of middle channels.
    
  • kernel_size
                  Kernel size.
    
  • bias
                  Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=1024,
    bottleneck_channels=512,
    kernel_size=1,
    bias=False,
):
    """

    Parameters
    ----------
    input_channels      : int
                          Number of input channels.
    bottleneck_channels : int
                          Number of middle channels.
    kernel_size         : int
                          Kernel size.
    bias                : bool
                          Set to True to let convolutional layers have bias term.
    """
    super(non_local_layer, self).__init__()
    self.input_channels = input_channels
    self.bottleneck_channels = bottleneck_channels
    self.g = torch.nn.Conv2d(
        self.input_channels,
        self.bottleneck_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    self.W_z = torch.nn.Sequential(
        torch.nn.Conv2d(
            self.bottleneck_channels,
            self.input_channels,
            kernel_size=kernel_size,
            bias=bias,
            padding=kernel_size // 2,
        ),
        torch.nn.BatchNorm2d(self.input_channels),
    )
    torch.nn.init.constant_(self.W_z[1].weight, 0)
    torch.nn.init.constant_(self.W_z[1].bias, 0)

forward(x)

Forward model [zi = Wzyi + xi]

Parameters:

  • x
              First input data.
    

Returns:

  • z ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model [zi = Wzyi + xi]

    Parameters
    ----------
    x               : torch.tensor
                      First input data.


    Returns
    ----------
    z               : torch.tensor
                      Estimated output.
    """
    batch_size, channels, height, width = x.size()
    theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
    phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
    g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
    attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
    attn = torch.nn.functional.softmax(attn, dim=-1)
    y = (
        torch.bmm(attn, g)
        .permute(0, 2, 1)
        .contiguous()
        .view(batch_size, self.bottleneck_channels, height, width)
    )
    W_y = self.W_z(y)
    z = W_y + x
    return z

normalization

Bases: Module

A normalization layer.

Source code in odak/learn/models/components.py
class normalization(torch.nn.Module):
    """
    A normalization layer.
    """

    def __init__(
        self,
        dim=1,
    ):
        """
        Normalization layer.


        Parameters
        ----------
        dim             : int
                          Dimension (axis) to normalize.
        """
        super().__init__()
        self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        eps = 1e-5 if x.dtype == torch.float32 else 1e-3
        var = torch.var(x, dim=1, unbiased=False, keepdim=True)
        mean = torch.mean(x, dim=1, keepdim=True)
        result = (x - mean) * (var + eps).rsqrt() * self.k
        return result

__init__(dim=1)

Normalization layer.

Parameters:

  • dim
              Dimension (axis) to normalize.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    dim=1,
):
    """
    Normalization layer.


    Parameters
    ----------
    dim             : int
                      Dimension (axis) to normalize.
    """
    super().__init__()
    self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    eps = 1e-5 if x.dtype == torch.float32 else 1e-3
    var = torch.var(x, dim=1, unbiased=False, keepdim=True)
    mean = torch.mean(x, dim=1, keepdim=True)
    result = (x - mean) * (var + eps).rsqrt() * self.k
    return result

positional_encoder

Bases: Module

A positional encoder module. This implementation follows this specific work: Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021..

Source code in odak/learn/models/components.py
class positional_encoder(torch.nn.Module):
    """
    A positional encoder module.
    This implementation follows this specific work: `Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021.`.
    """

    def __init__(self, L):
        """
        A positional encoder module.

        Parameters
        ----------
        L                   : int
                              Positional encoding level.
        """
        super(positional_encoder, self).__init__()
        self.L = L

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x               : torch.tensor
                          Input data [b x n], where `b` is batch size, `n` is the feature size.

        Returns
        ----------
        result          : torch.tensor
                          Result of the forward operation.
        """
        freqs = 2 ** torch.arange(self.L, device=x.device)
        freqs = freqs.view(1, 1, -1)
        results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results = torch.cat((x, results_cos, results_sin), dim=1)
        return results

__init__(L)

A positional encoder module.

Parameters:

  • L
                  Positional encoding level.
    
Source code in odak/learn/models/components.py
def __init__(self, L):
    """
    A positional encoder module.

    Parameters
    ----------
    L                   : int
                          Positional encoding level.
    """
    super(positional_encoder, self).__init__()
    self.L = L

forward(x)

Forward model.

Parameters:

  • x
              Input data [b x n], where `b` is batch size, `n` is the feature size.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x               : torch.tensor
                      Input data [b x n], where `b` is batch size, `n` is the feature size.

    Returns
    ----------
    result          : torch.tensor
                      Result of the forward operation.
    """
    freqs = 2 ** torch.arange(self.L, device=x.device)
    freqs = freqs.view(1, 1, -1)
    results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results = torch.cat((x, results_cos, results_sin), dim=1)
    return results

residual_attention_layer

Bases: Module

A residual block with an attention layer.

Source code in odak/learn/models/components.py
class residual_attention_layer(torch.nn.Module):
    """
    A residual block with an attention layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=1,
        bias=False,
        activation=torch.nn.ReLU(),
    ):
        """
        An attention layer class.


        Parameters
        ----------
        input_channels  : int or optioal
                          Number of input channels.
        output_channels : int or optional
                          Number of middle channels.
        kernel_size     : int or optional
                          Kernel size.
        bias            : bool or optional
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn or optional
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution0 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.convolution1 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.final_layer = torch.nn.Sequential(
            self.activation,
            torch.nn.Conv2d(
                output_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )

    def forward(self, x0, x1):
        """
        Forward model.

        Parameters
        ----------
        x0             : torch.tensor
                         First input data.

        x1             : torch.tensor
                         Seconnd input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y0 = self.convolution0(x0)
        y1 = self.convolution1(x1)
        y2 = torch.add(y0, y1)
        result = self.final_layer(y2) * x0
        return result

__init__(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())

An attention layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int or optional, default: 2 ) –
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=1,
    bias=False,
    activation=torch.nn.ReLU(),
):
    """
    An attention layer class.


    Parameters
    ----------
    input_channels  : int or optioal
                      Number of input channels.
    output_channels : int or optional
                      Number of middle channels.
    kernel_size     : int or optional
                      Kernel size.
    bias            : bool or optional
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn or optional
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution0 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.convolution1 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.final_layer = torch.nn.Sequential(
        self.activation,
        torch.nn.Conv2d(
            output_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
    )

forward(x0, x1)

Forward model.

Parameters:

  • x0
             First input data.
    
  • x1
             Seconnd input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x0, x1):
    """
    Forward model.

    Parameters
    ----------
    x0             : torch.tensor
                     First input data.

    x1             : torch.tensor
                     Seconnd input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y0 = self.convolution0(x0)
    y1 = self.convolution1(x1)
    y2 = torch.add(y0, y1)
    result = self.final_layer(y2) * x0
    return result

residual_layer

Bases: Module

A residual layer.

Source code in odak/learn/models/components.py
class residual_layer(torch.nn.Module):
    """
    A residual layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=16,
        kernel_size=3,
        bias=False,
        normalization=True,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of middle channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution = double_convolution(
            input_channels,
            mid_channels=mid_channels,
            output_channels=input_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        x0 = self.convolution(x)
        return x + x0

__init__(input_channels=2, mid_channels=16, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    mid_channels=16,
    kernel_size=3,
    bias=False,
    normalization=True,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of middle channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution = double_convolution(
        input_channels,
        mid_channels=mid_channels,
        output_channels=input_channels,
        kernel_size=kernel_size,
        normalization=normalization,
        bias=bias,
        activation=activation,
    )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    x0 = self.convolution(x)
    return x + x0

spatial_gate

Bases: Module

Spatial attention module that applies a convolution layer after channel pooling. This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.

Source code in odak/learn/models/components.py
class spatial_gate(torch.nn.Module):
    """
    Spatial attention module that applies a convolution layer after channel pooling.
    This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.
    """

    def __init__(self):
        """
        Initializes the spatial gate module.
        """
        super().__init__()
        kernel_size = 7
        self.spatial = convolution_layer(
            2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
        )

    def channel_pool(self, x):
        """
        Applies max and average pooling on the channels.

        Parameters
        ----------
        x             : torch.tensor
                        Input tensor.

        Returns
        -------
        output        : torch.tensor
                        Output tensor.
        """
        max_pool = torch.max(x, 1)[0].unsqueeze(1)
        avg_pool = torch.mean(x, 1).unsqueeze(1)
        output = torch.cat((max_pool, avg_pool), dim=1)
        return output

    def forward(self, x):
        """
        Forward pass of the SpatialGate module.

        Applies spatial attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the SpatialGate module.

        Returns
        -------
        scaled_x     : torch.tensor
                       Output tensor after applying spatial attention.
        """
        x_compress = self.channel_pool(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out)
        scaled_x = x * scale
        return scaled_x

__init__()

Initializes the spatial gate module.

Source code in odak/learn/models/components.py
def __init__(self):
    """
    Initializes the spatial gate module.
    """
    super().__init__()
    kernel_size = 7
    self.spatial = convolution_layer(
        2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
    )

channel_pool(x)

Applies max and average pooling on the channels.

Parameters:

  • x
            Input tensor.
    

Returns:

  • output ( tensor ) –

    Output tensor.

Source code in odak/learn/models/components.py
def channel_pool(self, x):
    """
    Applies max and average pooling on the channels.

    Parameters
    ----------
    x             : torch.tensor
                    Input tensor.

    Returns
    -------
    output        : torch.tensor
                    Output tensor.
    """
    max_pool = torch.max(x, 1)[0].unsqueeze(1)
    avg_pool = torch.mean(x, 1).unsqueeze(1)
    output = torch.cat((max_pool, avg_pool), dim=1)
    return output

forward(x)

Forward pass of the SpatialGate module.

Applies spatial attention to the input tensor.

Parameters:

  • x
           Input tensor to the SpatialGate module.
    

Returns:

  • scaled_x ( tensor ) –

    Output tensor after applying spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the SpatialGate module.

    Applies spatial attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the SpatialGate module.

    Returns
    -------
    scaled_x     : torch.tensor
                   Output tensor after applying spatial attention.
    """
    x_compress = self.channel_pool(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out)
    scaled_x = x * scale
    return scaled_x

spatially_adaptive_convolution

Bases: Module

A spatially adaptive convolution layer.

References

C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions." C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation." C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."

Source code in odak/learn/models/components.py
class spatially_adaptive_convolution(torch.nn.Module):
    """
    A spatially adaptive convolution layer.

    References
    ----------

    C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions."
    C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation."
    C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive convolution layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn.Module
                          Activation function to apply. If None, no activation is applied.
        """
        super(spatially_adaptive_convolution, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive convolution layer.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        sa_output          : torch.tensor
                            Estimated output tensor.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Resize weight to match the input channels and kernel size
        si_kernel = self.weight.reshape(
            self.output_channels,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply spatially varying kernels
        sv_feature = input_feature * sv_kernel

        # Perform matrix multiplication
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )
        return sa_output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive convolution layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Activation function to apply. If None, no activation is applied.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive convolution layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn.Module
                      Activation function to apply. If None, no activation is applied.
    """
    super(spatially_adaptive_convolution, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive convolution layer.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • sa_output ( tensor ) –

    Estimated output tensor. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive convolution layer.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    sa_output          : torch.tensor
                        Estimated output tensor.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Resize weight to match the input channels and kernel size
    si_kernel = self.weight.reshape(
        self.output_channels,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply spatially varying kernels
    sv_feature = input_feature * sv_kernel

    # Perform matrix multiplication
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )
    return sa_output

spatially_adaptive_module

Bases: Module

A spatially adaptive module that combines learned spatially adaptive convolutions.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/components.py
class spatially_adaptive_module(torch.nn.Module):
    """
    A spatially adaptive module that combines learned spatially adaptive convolutions.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive module.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super(spatially_adaptive_module, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.output_channels_for_weight = self.output_channels - 1
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels_for_weight,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive module.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        output             : torch.tensor
                            Combined output tensor from standard and spatially adaptive convolutions.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Apply sv_kernel to the input_feature
        sv_feature = input_feature * sv_kernel

        # Original spatially varying convolution output
        sv_output = torch.sum(sv_feature, dim=1).reshape(
            1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
        )

        # Reshape weight for spatially adaptive convolution
        si_kernel = self.weight.reshape(
            self.output_channels_for_weight,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply si_kernel on sv convolution output
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels_for_weight,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )

        # Combine the outputs and apply activation function
        output = self.activation(torch.cat((sv_output, sa_output), dim=1))
        return output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive module.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive module.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super(spatially_adaptive_module, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.output_channels_for_weight = self.output_channels - 1
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels_for_weight,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive module.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • output ( tensor ) –

    Combined output tensor from standard and spatially adaptive convolutions. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive module.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    output             : torch.tensor
                        Combined output tensor from standard and spatially adaptive convolutions.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Apply sv_kernel to the input_feature
    sv_feature = input_feature * sv_kernel

    # Original spatially varying convolution output
    sv_output = torch.sum(sv_feature, dim=1).reshape(
        1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
    )

    # Reshape weight for spatially adaptive convolution
    si_kernel = self.weight.reshape(
        self.output_channels_for_weight,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply si_kernel on sv convolution output
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels_for_weight,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )

    # Combine the outputs and apply activation function
    output = self.activation(torch.cat((sv_output, sa_output), dim=1))
    return output

spatially_adaptive_unet

Bases: Module

Spatially varying U-Net model based on spatially adaptive convolution.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/models.py
class spatially_adaptive_unet(torch.nn.Module):
    """
    Spatially varying U-Net model based on spatially adaptive convolution.

    References
    ----------
    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        depth=3,
        dimensions=8,
        input_channels=6,
        out_channels=6,
        kernel_size=3,
        bias=True,
        normalization=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initialize the spatially adaptive U-Net model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 3.
        dimensions : int, optional
            Number of dimensions. Default is 8.
        input_channels : int, optional
            Number of input channels. Default is 6.
        out_channels : int, optional
            Number of output channels. Default is 6.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set to True to let convolutional layers learn a bias term. Default is True.
        normalization : bool, optional
            If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
        """
        super().__init__()
        self.depth = depth
        self.out_channels = out_channels
        logger.info(
            f"Initializing spatially_adaptive_unet: "
            f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
            f"out_channels={out_channels}, kernel_size={kernel_size}, "
            f"bias={bias}, normalization={normalization}"
        )
        self.inc = convolution_layer(
            input_channels=input_channels,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )

        self.encoder = torch.nn.ModuleList()
        for i in range(self.depth + 1):  # Downsampling layers
            down_in_channels = dimensions * (2**i)
            down_out_channels = 2 * down_in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                input_channels=down_in_channels,
                mid_channels=down_in_channels,
                output_channels=down_in_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            sam = spatially_adaptive_module(
                input_channels=down_in_channels,
                output_channels=down_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
            )
            self.encoder.append(
                torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam])
            )
            logger.debug(f"Added encoder block {i}: {down_in_channels} -> {down_out_channels}")
        self.global_feature_module = torch.nn.ModuleList()
        double_convolution_layer = double_convolution(
            input_channels=dimensions * (2 ** (depth + 1)),
            mid_channels=dimensions * (2 ** (depth + 1)),
            output_channels=dimensions * (2 ** (depth + 1)),
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        global_feature_layer = global_feature_module(
            input_channels=dimensions * (2 ** (depth + 1)),
            mid_channels=dimensions * (2 ** (depth + 1)),
            output_channels=dimensions * (2 ** (depth + 1)),
            kernel_size=kernel_size,
            bias=bias,
            activation=torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_module.append(
            torch.nn.ModuleList([double_convolution_layer, global_feature_layer])
        )
        logger.debug("Added global feature module")

        self.decoder = torch.nn.ModuleList()
        for i in range(depth, -1, -1):
            up_in_channels = dimensions * (2 ** (i + 1))
            up_mid_channels = up_in_channels // 2
            if i == 0:
                up_out_channels = self.out_channels
                upsample_layer = upsample_convtranspose2d_layer(
                    input_channels=up_in_channels,
                    output_channels=up_mid_channels,
                    kernel_size=2,
                    stride=2,
                    bias=bias,
                )
                conv_layer = torch.nn.Sequential(
                    convolution_layer(
                        input_channels=up_mid_channels,
                        output_channels=up_mid_channels,
                        kernel_size=kernel_size,
                        bias=bias,
                        normalization=normalization,
                        activation=activation,
                    ),
                    convolution_layer(
                        input_channels=up_mid_channels,
                        output_channels=up_out_channels,
                        kernel_size=1,
                        bias=bias,
                        normalization=normalization,
                        activation=None,
                    ),
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
                logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
            else:
                up_out_channels = up_in_channels // 2
                upsample_layer = upsample_convtranspose2d_layer(
                    input_channels=up_in_channels,
                    output_channels=up_mid_channels,
                    kernel_size=2,
                    stride=2,
                    bias=bias,
                )
                conv_layer = double_convolution(
                    input_channels=up_mid_channels,
                    mid_channels=up_mid_channels,
                    output_channels=up_out_channels,
                    kernel_size=kernel_size,
                    bias=bias,
                    normalization=normalization,
                    activation=activation,
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
                logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        logger.info("spatially_adaptive_unet initialization completed")

    def forward(self, sv_kernel, field):
        """
        Forward pass of the spatially adaptive U-Net.

        Parameters
        ----------
        sv_kernel : list of torch.Tensor
            Learned spatially varying kernels.
            Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
            where C_i, H_i, and W_i represent the channel, height, and width
            of each feature at a certain scale.

        field : torch.Tensor
            Input field data.
            Dimension: (1, 6, H, W)

        Returns
        -------
        target_field : torch.Tensor
            Estimated output.
            Dimension: (1, 6, H, W)
        """
        x = self.inc(field)
        downsampling_outputs = [x]
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer[0](downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
            sam_output = down_layer[2](
                x_down + down_layer[1](x_down), sv_kernel[self.depth - i]
            )
            downsampling_outputs.append(sam_output)
        global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
        global_feature = self.global_feature_module[0][1](
            downsampling_outputs[-1], global_feature
        )
        downsampling_outputs.append(global_feature)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate(self.decoder):
            x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
            x_up = up_layer[1](x_up)
        result = x_up
        return result

__init__(depth=3, dimensions=8, input_channels=6, out_channels=6, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initialize the spatially adaptive U-Net model.

Parameters:

  • depth (int, default: 3 ) –

    Number of upsampling and downsampling layers. Default is 3.

  • dimensions (int, default: 8 ) –

    Number of dimensions. Default is 8.

  • input_channels (int, default: 6 ) –

    Number of input channels. Default is 6.

  • out_channels (int, default: 6 ) –

    Number of output channels. Default is 6.

  • kernel_size (int, default: 3 ) –

    Kernel size for convolutional layers. Default is 3.

  • bias (bool, default: True ) –

    Set to True to let convolutional layers learn a bias term. Default is True.

  • normalization (bool, default: False ) –

    If True, adds a Batch Normalization layer after the convolutional layer. Default is False.

  • activation (Module, default: LeakyReLU(0.2, inplace=True) ) –

    Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).

Source code in odak/learn/models/models.py
def __init__(
    self,
    depth=3,
    dimensions=8,
    input_channels=6,
    out_channels=6,
    kernel_size=3,
    bias=True,
    normalization=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initialize the spatially adaptive U-Net model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 3.
    dimensions : int, optional
        Number of dimensions. Default is 8.
    input_channels : int, optional
        Number of input channels. Default is 6.
    out_channels : int, optional
        Number of output channels. Default is 6.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set to True to let convolutional layers learn a bias term. Default is True.
    normalization : bool, optional
        If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
    """
    super().__init__()
    self.depth = depth
    self.out_channels = out_channels
    logger.info(
        f"Initializing spatially_adaptive_unet: "
        f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
        f"out_channels={out_channels}, kernel_size={kernel_size}, "
        f"bias={bias}, normalization={normalization}"
    )
    self.inc = convolution_layer(
        input_channels=input_channels,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )

    self.encoder = torch.nn.ModuleList()
    for i in range(self.depth + 1):  # Downsampling layers
        down_in_channels = dimensions * (2**i)
        down_out_channels = 2 * down_in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
            input_channels=down_in_channels,
            mid_channels=down_in_channels,
            output_channels=down_in_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        sam = spatially_adaptive_module(
            input_channels=down_in_channels,
            output_channels=down_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )
        self.encoder.append(
            torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam])
        )
        logger.debug(f"Added encoder block {i}: {down_in_channels} -> {down_out_channels}")
    self.global_feature_module = torch.nn.ModuleList()
    double_convolution_layer = double_convolution(
        input_channels=dimensions * (2 ** (depth + 1)),
        mid_channels=dimensions * (2 ** (depth + 1)),
        output_channels=dimensions * (2 ** (depth + 1)),
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    global_feature_layer = global_feature_module(
        input_channels=dimensions * (2 ** (depth + 1)),
        mid_channels=dimensions * (2 ** (depth + 1)),
        output_channels=dimensions * (2 ** (depth + 1)),
        kernel_size=kernel_size,
        bias=bias,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_module.append(
        torch.nn.ModuleList([double_convolution_layer, global_feature_layer])
    )
    logger.debug("Added global feature module")

    self.decoder = torch.nn.ModuleList()
    for i in range(depth, -1, -1):
        up_in_channels = dimensions * (2 ** (i + 1))
        up_mid_channels = up_in_channels // 2
        if i == 0:
            up_out_channels = self.out_channels
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = torch.nn.Sequential(
                convolution_layer(
                    input_channels=up_mid_channels,
                    output_channels=up_mid_channels,
                    kernel_size=kernel_size,
                    bias=bias,
                    normalization=normalization,
                    activation=activation,
                ),
                convolution_layer(
                    input_channels=up_mid_channels,
                    output_channels=up_out_channels,
                    kernel_size=1,
                    bias=bias,
                    normalization=normalization,
                    activation=None,
                ),
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        else:
            up_out_channels = up_in_channels // 2
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = double_convolution(
                input_channels=up_mid_channels,
                mid_channels=up_mid_channels,
                output_channels=up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
    logger.info("spatially_adaptive_unet initialization completed")

forward(sv_kernel, field)

Forward pass of the spatially adaptive U-Net.

Parameters:

  • sv_kernel (list of torch.Tensor) –

    Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.

  • field (Tensor) –

    Input field data. Dimension: (1, 6, H, W)

Returns:

  • target_field ( Tensor ) –

    Estimated output. Dimension: (1, 6, H, W)

Source code in odak/learn/models/models.py
def forward(self, sv_kernel, field):
    """
    Forward pass of the spatially adaptive U-Net.

    Parameters
    ----------
    sv_kernel : list of torch.Tensor
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.

    field : torch.Tensor
        Input field data.
        Dimension: (1, 6, H, W)

    Returns
    -------
    target_field : torch.Tensor
        Estimated output.
        Dimension: (1, 6, H, W)
    """
    x = self.inc(field)
    downsampling_outputs = [x]
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer[0](downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
        sam_output = down_layer[2](
            x_down + down_layer[1](x_down), sv_kernel[self.depth - i]
        )
        downsampling_outputs.append(sam_output)
    global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
    global_feature = self.global_feature_module[0][1](
        downsampling_outputs[-1], global_feature
    )
    downsampling_outputs.append(global_feature)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate(self.decoder):
        x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
        x_up = up_layer[1](x_up)
    result = x_up
    return result

spatially_varying_kernel_generation_model

Bases: Module

Spatially_varying_kernel_generation_model revised from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Refer to: J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.

Source code in odak/learn/models/models.py
class spatially_varying_kernel_generation_model(torch.nn.Module):
    """
    Spatially_varying_kernel_generation_model revised from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Refer to:
    J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.
    """

    def __init__(
        self,
        depth=3,
        dimensions=8,
        input_channels=7,
        kernel_size=3,
        bias=True,
        normalization=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initialize the spatially varying kernel generation model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 3.
        dimensions : int, optional
            Number of dimensions. Default is 8.
        input_channels : int, optional
            Number of input channels. Default is 7.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set to True to let convolutional layers learn a bias term. Default is True.
        normalization : bool, optional
            If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
        """
        super().__init__()
        self.depth = depth
        logger.info(
            f"Initializing spatially_varying_kernel_generation_model: "
            f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
            f"kernel_size={kernel_size}, bias={bias}, normalization={normalization}"
        )
        self.inc = convolution_layer(
            input_channels=input_channels,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )

        self.encoder = torch.nn.ModuleList()
        for i in range(depth + 1):  # downsampling layers
            if i == 0:
                in_channels = dimensions * (2**i)
                out_channels = dimensions * (2**i)
            elif i == depth:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = dimensions * (2 ** (i - 1))
            else:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = 2 * in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                input_channels=in_channels,
                mid_channels=in_channels,
                output_channels=out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.encoder.append(pooling_layer)
            self.encoder.append(double_convolution_layer)
            logger.debug(f"Added encoder block {i}: {in_channels} -> {out_channels}")

        self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
        for i in range(depth, -1, -1):
            if i == 1:
                svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
            else:
                svf_in_channels = 2 ** (self.depth + i) + 1
            svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
            svf_mid_channels = dimensions * (2 ** (self.depth - 1))
            spatially_varying_kernel_generation = torch.nn.ModuleList()
            for j in range(i, -1, -1):
                pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
                spatially_varying_kernel_generation.append(pooling_layer)
            kernel_generation_block = torch.nn.Sequential(
                torch.nn.Conv2d(
                    in_channels=svf_in_channels,
                    out_channels=svf_mid_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
                activation,
                torch.nn.Conv2d(
                    in_channels=svf_mid_channels,
                    out_channels=svf_mid_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
                activation,
                torch.nn.Conv2d(
                    in_channels=svf_mid_channels,
                    out_channels=svf_out_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
            )
            spatially_varying_kernel_generation.append(kernel_generation_block)
            self.spatially_varying_feature.append(spatially_varying_kernel_generation)
            logger.debug(f"Added SVF block {i}: {svf_in_channels} -> {svf_out_channels}")

        self.decoder = torch.nn.ModuleList()
        global_feature_layer = global_feature_module(  # global feature layer
            input_channels=dimensions * (2 ** (depth - 1)),
            mid_channels=dimensions * (2 ** (depth - 1)),
            output_channels=dimensions * (2 ** (depth - 1)),
            kernel_size=kernel_size,
            bias=bias,
            activation=torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.decoder.append(global_feature_layer)
        for i in range(depth, 0, -1):
            if i == 2:
                up_in_channels = (dimensions // 2) * (2**i)
                up_out_channels = up_in_channels
                up_mid_channels = up_in_channels
            elif i == 1:
                up_in_channels = dimensions * 2
                up_out_channels = dimensions
                up_mid_channels = up_out_channels
            else:
                up_in_channels = (dimensions // 2) * (2**i)
                up_out_channels = up_in_channels // 2
                up_mid_channels = up_in_channels
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = double_convolution(
                input_channels=up_mid_channels,
                output_channels=up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        logger.info("spatially_varying_kernel_generation_model initialization completed")

    def forward(self, focal_surface, field):
        """
        Forward pass of the spatially varying kernel generation model.

        Parameters
        ----------
        focal_surface : torch.Tensor
            Input focal surface data.
            Dimension: (1, 1, H, W)

        field : torch.Tensor
            Input field data.
            Dimension: (1, 6, H, W)

        Returns
        -------
        sv_kernel : list of torch.Tensor
            Learned spatially varying kernels.
            Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
            where C_i, H_i, and W_i represent the channel, height, and width
            of each feature at a certain scale.
        """
        x = self.inc(torch.cat((focal_surface, field), dim=1))
        downsampling_outputs = [focal_surface]
        downsampling_outputs.append(x)
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        sv_kernels = []
        for i, (up_layer, svf_layer) in enumerate(
            zip(self.decoder, self.spatially_varying_feature)
        ):
            if i == 0:
                global_feature = up_layer(
                    downsampling_outputs[-2], downsampling_outputs[-1]
                )
                downsampling_outputs[-1] = global_feature
                sv_feature = [global_feature, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                sv_feature = [
                    sv_feature[0],
                    sv_feature[1],
                    sv_feature[4],
                    sv_feature[2],
                    sv_feature[3],
                ]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
                sv_kernels.append(sv_kernel)
            else:
                x_up = up_layer[0](
                    downsampling_outputs[-1],
                    downsampling_outputs[2 * (self.depth + 1 - i) + 1],
                )
                x_up = up_layer[1](x_up)
                downsampling_outputs[-1] = x_up
                sv_feature = [x_up, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                if i == 1:
                    sv_feature = [
                        sv_feature[0],
                        sv_feature[1],
                        sv_feature[3],
                        sv_feature[2],
                    ]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
                sv_kernels.append(sv_kernel)
        return sv_kernels

__init__(depth=3, dimensions=8, input_channels=7, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initialize the spatially varying kernel generation model.

Parameters:

  • depth (int, default: 3 ) –

    Number of upsampling and downsampling layers. Default is 3.

  • dimensions (int, default: 8 ) –

    Number of dimensions. Default is 8.

  • input_channels (int, default: 7 ) –

    Number of input channels. Default is 7.

  • kernel_size (int, default: 3 ) –

    Kernel size for convolutional layers. Default is 3.

  • bias (bool, default: True ) –

    Set to True to let convolutional layers learn a bias term. Default is True.

  • normalization (bool, default: False ) –

    If True, adds a Batch Normalization layer after the convolutional layer. Default is False.

  • activation (Module, default: LeakyReLU(0.2, inplace=True) ) –

    Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).

Source code in odak/learn/models/models.py
def __init__(
    self,
    depth=3,
    dimensions=8,
    input_channels=7,
    kernel_size=3,
    bias=True,
    normalization=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initialize the spatially varying kernel generation model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 3.
    dimensions : int, optional
        Number of dimensions. Default is 8.
    input_channels : int, optional
        Number of input channels. Default is 7.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set to True to let convolutional layers learn a bias term. Default is True.
    normalization : bool, optional
        If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
    """
    super().__init__()
    self.depth = depth
    logger.info(
        f"Initializing spatially_varying_kernel_generation_model: "
        f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
        f"kernel_size={kernel_size}, bias={bias}, normalization={normalization}"
    )
    self.inc = convolution_layer(
        input_channels=input_channels,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )

    self.encoder = torch.nn.ModuleList()
    for i in range(depth + 1):  # downsampling layers
        if i == 0:
            in_channels = dimensions * (2**i)
            out_channels = dimensions * (2**i)
        elif i == depth:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = dimensions * (2 ** (i - 1))
        else:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = 2 * in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
            input_channels=in_channels,
            mid_channels=in_channels,
            output_channels=out_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.encoder.append(pooling_layer)
        self.encoder.append(double_convolution_layer)
        logger.debug(f"Added encoder block {i}: {in_channels} -> {out_channels}")

    self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
    for i in range(depth, -1, -1):
        if i == 1:
            svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
        else:
            svf_in_channels = 2 ** (self.depth + i) + 1
        svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
        svf_mid_channels = dimensions * (2 ** (self.depth - 1))
        spatially_varying_kernel_generation = torch.nn.ModuleList()
        for j in range(i, -1, -1):
            pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
            spatially_varying_kernel_generation.append(pooling_layer)
        kernel_generation_block = torch.nn.Sequential(
            torch.nn.Conv2d(
                in_channels=svf_in_channels,
                out_channels=svf_mid_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            activation,
            torch.nn.Conv2d(
                in_channels=svf_mid_channels,
                out_channels=svf_mid_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            activation,
            torch.nn.Conv2d(
                in_channels=svf_mid_channels,
                out_channels=svf_out_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )
        spatially_varying_kernel_generation.append(kernel_generation_block)
        self.spatially_varying_feature.append(spatially_varying_kernel_generation)
        logger.debug(f"Added SVF block {i}: {svf_in_channels} -> {svf_out_channels}")

    self.decoder = torch.nn.ModuleList()
    global_feature_layer = global_feature_module(  # global feature layer
        input_channels=dimensions * (2 ** (depth - 1)),
        mid_channels=dimensions * (2 ** (depth - 1)),
        output_channels=dimensions * (2 ** (depth - 1)),
        kernel_size=kernel_size,
        bias=bias,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.decoder.append(global_feature_layer)
    for i in range(depth, 0, -1):
        if i == 2:
            up_in_channels = (dimensions // 2) * (2**i)
            up_out_channels = up_in_channels
            up_mid_channels = up_in_channels
        elif i == 1:
            up_in_channels = dimensions * 2
            up_out_channels = dimensions
            up_mid_channels = up_out_channels
        else:
            up_in_channels = (dimensions // 2) * (2**i)
            up_out_channels = up_in_channels // 2
            up_mid_channels = up_in_channels
        upsample_layer = upsample_convtranspose2d_layer(
            input_channels=up_in_channels,
            output_channels=up_mid_channels,
            kernel_size=2,
            stride=2,
            bias=bias,
        )
        conv_layer = double_convolution(
            input_channels=up_mid_channels,
            output_channels=up_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
        logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
    logger.info("spatially_varying_kernel_generation_model initialization completed")

forward(focal_surface, field)

Forward pass of the spatially varying kernel generation model.

Parameters:

  • focal_surface (Tensor) –

    Input focal surface data. Dimension: (1, 1, H, W)

  • field (Tensor) –

    Input field data. Dimension: (1, 6, H, W)

Returns:

  • sv_kernel ( list of torch.Tensor ) –

    Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.

Source code in odak/learn/models/models.py
def forward(self, focal_surface, field):
    """
    Forward pass of the spatially varying kernel generation model.

    Parameters
    ----------
    focal_surface : torch.Tensor
        Input focal surface data.
        Dimension: (1, 1, H, W)

    field : torch.Tensor
        Input field data.
        Dimension: (1, 6, H, W)

    Returns
    -------
    sv_kernel : list of torch.Tensor
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.
    """
    x = self.inc(torch.cat((focal_surface, field), dim=1))
    downsampling_outputs = [focal_surface]
    downsampling_outputs.append(x)
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    sv_kernels = []
    for i, (up_layer, svf_layer) in enumerate(
        zip(self.decoder, self.spatially_varying_feature)
    ):
        if i == 0:
            global_feature = up_layer(
                downsampling_outputs[-2], downsampling_outputs[-1]
            )
            downsampling_outputs[-1] = global_feature
            sv_feature = [global_feature, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            sv_feature = [
                sv_feature[0],
                sv_feature[1],
                sv_feature[4],
                sv_feature[2],
                sv_feature[3],
            ]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
            sv_kernels.append(sv_kernel)
        else:
            x_up = up_layer[0](
                downsampling_outputs[-1],
                downsampling_outputs[2 * (self.depth + 1 - i) + 1],
            )
            x_up = up_layer[1](x_up)
            downsampling_outputs[-1] = x_up
            sv_feature = [x_up, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            if i == 1:
                sv_feature = [
                    sv_feature[0],
                    sv_feature[1],
                    sv_feature[3],
                    sv_feature[2],
                ]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
            sv_kernels.append(sv_kernel)
    return sv_kernels

unet

Bases: Module

A U-Net model, heavily inspired from https://github.com/milesial/Pytorch-UNet/tree/master/unet and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.

Source code in odak/learn/models/models.py
class unet(torch.nn.Module):
    """
    A U-Net model, heavily inspired from `https://github.com/milesial/Pytorch-UNet/tree/master/unet` and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.
    """

    def __init__(
        self,
        depth=4,
        dimensions=64,
        input_channels=2,
        output_channels=1,
        bilinear=False,
        kernel_size=3,
        bias=False,
        activation=torch.nn.ReLU(inplace=True),
    ):
        """
        Initialize the U-Net model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 4.
        dimensions : int, optional
            Number of dimensions. Default is 64.
        input_channels : int, optional
            Number of input channels. Default is 2.
        output_channels : int, optional
            Number of output channels. Default is 1.
        bilinear : bool, optional
            Uses bilinear upsampling in upsampling layers when set True. Default is False.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set True to let convolutional layers learn a bias term. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).
        """
        super(unet, self).__init__()
        logger.info(
            f"Initializing U-Net: depth={depth}, dimensions={dimensions}, "
            f"input_channels={input_channels}, output_channels={output_channels}, "
            f"bilinear={bilinear}, kernel_size={kernel_size}"
        )
        self.inc = double_convolution(
            input_channels=input_channels,
            mid_channels=dimensions,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )

        self.downsampling_layers = torch.nn.ModuleList()
        self.upsampling_layers = torch.nn.ModuleList()
        for i in range(depth):  # downsampling layers
            in_channels = dimensions * (2**i)
            out_channels = dimensions * (2 ** (i + 1))
            down_layer = downsample_layer(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
            )
            self.downsampling_layers.append(down_layer)
            logger.debug(f"Added downsampling layer {i}: {in_channels} -> {out_channels}")

        for i in range(depth - 1, -1, -1):  # upsampling layers
            up_in_channels = dimensions * (2 ** (i + 1))
            up_out_channels = dimensions * (2**i)
            up_layer = upsample_layer(
                up_in_channels,
                up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
                bilinear=bilinear,
            )
            self.upsampling_layers.append(up_layer)
            logger.debug(f"Added upsampling layer: {up_in_channels} -> {up_out_channels}")
        self.outc = torch.nn.Conv2d(
            dimensions,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        logger.info("U-Net initialization completed")

    def forward(self, x):
        """
        Forward pass of the U-Net.

        Parameters
        ----------
        x : torch.Tensor
            Input data.

        Returns
        -------
        result : torch.Tensor
            Estimated output.
        """
        downsampling_outputs = [self.inc(x)]
        for down_layer in self.downsampling_layers:
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate((self.upsampling_layers)):
            x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])
        result = self.outc(x_up)
        return result

__init__(depth=4, dimensions=64, input_channels=2, output_channels=1, bilinear=False, kernel_size=3, bias=False, activation=torch.nn.ReLU(inplace=True))

Initialize the U-Net model.

Parameters:

  • depth (int, default: 4 ) –

    Number of upsampling and downsampling layers. Default is 4.

  • dimensions (int, default: 64 ) –

    Number of dimensions. Default is 64.

  • input_channels (int, default: 2 ) –

    Number of input channels. Default is 2.

  • output_channels (int, default: 1 ) –

    Number of output channels. Default is 1.

  • bilinear (bool, default: False ) –

    Uses bilinear upsampling in upsampling layers when set True. Default is False.

  • kernel_size (int, default: 3 ) –

    Kernel size for convolutional layers. Default is 3.

  • bias (bool, default: False ) –

    Set True to let convolutional layers learn a bias term. Default is False.

  • activation (Module, default: ReLU(inplace=True) ) –

    Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).

Source code in odak/learn/models/models.py
def __init__(
    self,
    depth=4,
    dimensions=64,
    input_channels=2,
    output_channels=1,
    bilinear=False,
    kernel_size=3,
    bias=False,
    activation=torch.nn.ReLU(inplace=True),
):
    """
    Initialize the U-Net model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 4.
    dimensions : int, optional
        Number of dimensions. Default is 64.
    input_channels : int, optional
        Number of input channels. Default is 2.
    output_channels : int, optional
        Number of output channels. Default is 1.
    bilinear : bool, optional
        Uses bilinear upsampling in upsampling layers when set True. Default is False.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set True to let convolutional layers learn a bias term. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).
    """
    super(unet, self).__init__()
    logger.info(
        f"Initializing U-Net: depth={depth}, dimensions={dimensions}, "
        f"input_channels={input_channels}, output_channels={output_channels}, "
        f"bilinear={bilinear}, kernel_size={kernel_size}"
    )
    self.inc = double_convolution(
        input_channels=input_channels,
        mid_channels=dimensions,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        activation=activation,
    )

    self.downsampling_layers = torch.nn.ModuleList()
    self.upsampling_layers = torch.nn.ModuleList()
    for i in range(depth):  # downsampling layers
        in_channels = dimensions * (2**i)
        out_channels = dimensions * (2 ** (i + 1))
        down_layer = downsample_layer(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )
        self.downsampling_layers.append(down_layer)
        logger.debug(f"Added downsampling layer {i}: {in_channels} -> {out_channels}")

    for i in range(depth - 1, -1, -1):  # upsampling layers
        up_in_channels = dimensions * (2 ** (i + 1))
        up_out_channels = dimensions * (2**i)
        up_layer = upsample_layer(
            up_in_channels,
            up_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
            bilinear=bilinear,
        )
        self.upsampling_layers.append(up_layer)
        logger.debug(f"Added upsampling layer: {up_in_channels} -> {up_out_channels}")
    self.outc = torch.nn.Conv2d(
        dimensions,
        output_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    logger.info("U-Net initialization completed")

forward(x)

Forward pass of the U-Net.

Parameters:

  • x (Tensor) –

    Input data.

Returns:

  • result ( Tensor ) –

    Estimated output.

Source code in odak/learn/models/models.py
def forward(self, x):
    """
    Forward pass of the U-Net.

    Parameters
    ----------
    x : torch.Tensor
        Input data.

    Returns
    -------
    result : torch.Tensor
        Estimated output.
    """
    downsampling_outputs = [self.inc(x)]
    for down_layer in self.downsampling_layers:
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate((self.upsampling_layers)):
        x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])
    result = self.outc(x_up)
    return result

upsample_convtranspose2d_layer

Bases: Module

An upsampling convtranspose2d layer.

Source code in odak/learn/models/components.py
class upsample_convtranspose2d_layer(torch.nn.Module):
    """
    An upsampling convtranspose2d layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=2,
        stride=2,
        bias=False,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        """
        super().__init__()
        self.up = torch.nn.ConvTranspose2d(
            in_channels=input_channels,
            out_channels=output_channels,
            bias=bias,
            kernel_size=kernel_size,
            stride=stride,
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        result = x1 + x2
        return result

__init__(input_channels, output_channels, kernel_size=2, stride=2, bias=False)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=2,
    stride=2,
    bias=False,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    """
    super().__init__()
    self.up = torch.nn.ConvTranspose2d(
        in_channels=input_channels,
        out_channels=output_channels,
        bias=bias,
        kernel_size=kernel_size,
        stride=stride,
    )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    result = x1 + x2
    return result

upsample_layer

Bases: Module

An upsampling convolutional layer.

Source code in odak/learn/models/components.py
class upsample_layer(torch.nn.Module):
    """
    An upsampling convolutional layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
        bilinear=True,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        bilinear        : bool
                          If set to True, bilinear sampling is used.
        """
        super(upsample_layer, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(
                scale_factor=2, mode="bilinear", align_corners=True
            )
            self.conv = double_convolution(
                input_channels=input_channels + output_channels,
                mid_channels=input_channels // 2,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )
        else:
            self.up = torch.nn.ConvTranspose2d(
                input_channels, input_channels // 2, kernel_size=2, stride=2
            )
            self.conv = double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        x = torch.cat([x2, x1], dim=1)
        result = self.conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU(), bilinear=True)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
  • bilinear
              If set to True, bilinear sampling is used.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
    bilinear=True,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    bilinear        : bool
                      If set to True, bilinear sampling is used.
    """
    super(upsample_layer, self).__init__()
    if bilinear:
        self.up = torch.nn.Upsample(
            scale_factor=2, mode="bilinear", align_corners=True
        )
        self.conv = double_convolution(
            input_channels=input_channels + output_channels,
            mid_channels=input_channels // 2,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )
    else:
        self.up = torch.nn.ConvTranspose2d(
            input_channels, input_channels // 2, kernel_size=2, stride=2
        )
        self.conv = double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    x = torch.cat([x2, x1], dim=1)
    result = self.conv(x)
    return result

evaluate_3d_gaussians(points, centers=torch.zeros(1, 3), scales=torch.ones(1, 3), angles=torch.zeros(1, 3), opacity=torch.ones(1, 1))

Evaluate 3D Gaussian functions at given points, with optional rotation.

Parameters:

  • points
          The 3D points at which to evaluate the Gaussians.
    
  • centers
          The centers of the Gaussians.
    
  • scales
          The standard deviations (spread) of the Gaussians along each axis.
    
  • angles
          The rotation angles (in radians) for each Gaussian, applied to the points.
    
  • opacity
          Opacity of the Gaussians.
    

Returns:

  • intensities ( (Tensor, shape[n, 1]) ) –

    The evaluated Gaussian intensities at each point.

Source code in odak/learn/tools/function.py
def evaluate_3d_gaussians(
    points,
    centers=torch.zeros(1, 3),
    scales=torch.ones(1, 3),
    angles=torch.zeros(1, 3),
    opacity=torch.ones(1, 1),
) -> torch.Tensor:
    """
    Evaluate 3D Gaussian functions at given points, with optional rotation.

    Parameters
    ----------
    points      : torch.Tensor, shape [n, 3]
                  The 3D points at which to evaluate the Gaussians.
    centers     : torch.Tensor, shape [n, 3]
                  The centers of the Gaussians.
    scales      : torch.Tensor, shape [n, 3]
                  The standard deviations (spread) of the Gaussians along each axis.
    angles      : torch.Tensor, shape [n, 3]
                  The rotation angles (in radians) for each Gaussian, applied to the points.
    opacity     : torch.Tensor, shape [n, 1]
                  Opacity of the Gaussians.

    Returns
    -------
    intensities : torch.Tensor, shape [n, 1]
                  The evaluated Gaussian intensities at each point.
    """
    points_rotated, _, _, _ = rotate_points(point=points, angles=angles, origin=centers)
    points_rotated = points_rotated - centers.unsqueeze(0)
    scales = scales.unsqueeze(0)
    exponent = torch.sum(-0.5 * (points_rotated / scales) ** 2, dim=-1)
    divider = (scales[:, :, 0] * scales[:, :, 1] * scales[:, :, 2]) * (
        2.0 * torch.pi
    ) ** (3.0 / 2.0)
    exponential = torch.exp(exponent)
    intensities = exponential / divider
    intensities = opacity.T * intensities
    return intensities

gaussian(x, multiplier=1.0)

A Gaussian non-linear activation. For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

Parameters:

  • x
           Input data.
    
  • multiplier
           Multiplier.
    

Returns:

  • result ( float or tensor ) –

    Ouput data.

Source code in odak/learn/models/components.py
def gaussian(x, multiplier=1.0):
    """
    A Gaussian non-linear activation.
    For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

    Parameters
    ----------
    x            : float or torch.tensor
                   Input data.
    multiplier   : float or torch.tensor
                   Multiplier.

    Returns
    -------
    result       : float or torch.tensor
                   Ouput data.
    """
    result = torch.exp(-((multiplier * x) ** 2))
    return result

swish(x)

A swish non-linear activation. For more details: https://en.wikipedia.org/wiki/Swish_function

Parameters:

  • x
             Input.
    

Returns:

  • out ( float or tensor ) –

    Output.

Source code in odak/learn/models/components.py
def swish(x):
    """
    A swish non-linear activation.
    For more details: https://en.wikipedia.org/wiki/Swish_function

    Parameters
    -----------
    x              : float or torch.tensor
                     Input.

    Returns
    -------
    out            : float or torch.tensor
                     Output.
    """
    out = x * torch.sigmoid(x)
    return out

validate_path(path, allowed_extensions=None)

Validates a file path for security safety.

Parameters:

  • path
              Path to validate.
    
  • allowed_extensions (list, default: None ) –
                  List of allowed extensions (e.g., ['.png', '.jpg']).
                  If None, all extensions are allowed.
    

Returns:

  • safe_path ( str ) –

    The validated and secured path (with tilde expanded).

Raises:

  • ValueError : If path traversal attempt detected or extension not allowed.
  • TypeError : If path is not a string.
Source code in odak/tools/file.py
def validate_path(path, allowed_extensions=None):
    """
    Validates a file path for security safety.

    Parameters
    ----------
    path            : str
                      Path to validate.
    allowed_extensions : list, optional
                          List of allowed extensions (e.g., ['.png', '.jpg']).
                          If None, all extensions are allowed.

    Returns
    -------
    safe_path       : str
                      The validated and secured path (with tilde expanded).

    Raises
    ------
    ValueError      : If path traversal attempt detected or extension not allowed.
    TypeError       : If path is not a string.
    """
    if not isinstance(path, str):
        raise TypeError(f"Path must be a string, got {type(path).__name__}")

    # Check for null bytes before expanding user (Windows path injection)
    if "\x00" in path:
        raise ValueError("Null bytes not allowed in path")

    # Check for path traversal patterns BEFORE expanding
    if ".." in path.split(os.sep) or ".." in path.replace(os.sep, "/").split("/"):
        if re.search(r"(^|[/\\])\.\.([/\\]|$)", path):
            raise ValueError("Path traversal detected: '..' not allowed in path")

    # Check for URL protocols before expanding
    path_lower = path.lower()
    if re.search(r"https?://|ftp://", path_lower):
        raise ValueError("URL protocols not allowed in file paths")

    path = os.path.expanduser(path)
    resolved_path = os.path.abspath(path)

    # Check for UNC or device paths on Windows
    if re.match(r"\\\\\\\|\\\\\\?\.\\", path) or path.startswith("//."):
        raise ValueError("UNC/device paths not allowed")

    if len(resolved_path) > 260:  # Windows MAX_PATH limit
        raise ValueError("Path exceeds maximum allowed length (260 characters)")

    if allowed_extensions is not None:
        _, file_ext = os.path.splitext(path)
        ext_lower = file_ext.lower()
        allowed_normalized = [
            ext.lower() if ext.startswith(".") else f".{ext}"
            for ext in allowed_extensions
        ]
        if ext_lower not in allowed_normalized:
            raise ValueError(
                f"File extension '{file_ext}' is not allowed. "
                f"Allowed: {allowed_extensions}"
            )

    logger.debug(f"Path validated: {path}")
    return resolved_path

channel_gate

Bases: Module

Channel attention module with various pooling strategies. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class channel_gate(torch.nn.Module):
    """
    Channel attention module with various pooling strategies.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
        """
        Initializes the channel gate module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the intermediate layer.
        pool_types      : list
                          List of pooling operations to apply.
        """
        super().__init__()
        self.gate_channels = gate_channels
        hidden_channels = gate_channels // reduction_ratio
        if hidden_channels == 0:
            hidden_channels = 1
        self.mlp = torch.nn.Sequential(
            convolutional_block_attention.Flatten(),
            torch.nn.Linear(gate_channels, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_channels, gate_channels),
        )
        self.pool_types = pool_types

    def forward(self, x):
        """
        Forward pass of the ChannelGate module.

        Applies channel-wise attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the ChannelGate module.

        Returns
        -------
        output       : torch.tensor
                       Output tensor after applying channel attention.
        """
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == "avg":
                pool = torch.nn.functional.avg_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            elif pool_type == "max":
                pool = torch.nn.functional.max_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            channel_att_raw = self.mlp(pool)
            channel_att_sum = (
                channel_att_raw
                if channel_att_sum is None
                else channel_att_sum + channel_att_raw
            )
        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
        output = x * scale
        return output

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])

Initializes the channel gate module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the intermediate layer.
    
  • pool_types
              List of pooling operations to apply.
    
Source code in odak/learn/models/components.py
def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
    """
    Initializes the channel gate module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the intermediate layer.
    pool_types      : list
                      List of pooling operations to apply.
    """
    super().__init__()
    self.gate_channels = gate_channels
    hidden_channels = gate_channels // reduction_ratio
    if hidden_channels == 0:
        hidden_channels = 1
    self.mlp = torch.nn.Sequential(
        convolutional_block_attention.Flatten(),
        torch.nn.Linear(gate_channels, hidden_channels),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_channels, gate_channels),
    )
    self.pool_types = pool_types

forward(x)

Forward pass of the ChannelGate module.

Applies channel-wise attention to the input tensor.

Parameters:

  • x
           Input tensor to the ChannelGate module.
    

Returns:

  • output ( tensor ) –

    Output tensor after applying channel attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the ChannelGate module.

    Applies channel-wise attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the ChannelGate module.

    Returns
    -------
    output       : torch.tensor
                   Output tensor after applying channel attention.
    """
    channel_att_sum = None
    for pool_type in self.pool_types:
        if pool_type == "avg":
            pool = torch.nn.functional.avg_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        elif pool_type == "max":
            pool = torch.nn.functional.max_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        channel_att_raw = self.mlp(pool)
        channel_att_sum = (
            channel_att_raw
            if channel_att_sum is None
            else channel_att_sum + channel_att_raw
        )
    scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
    output = x * scale
    return output

convolution_layer

Bases: Module

A convolution layer.

Source code in odak/learn/models/components.py
class convolution_layer(torch.nn.Module):
    """
    A convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        bias=False,
        stride=1,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        layers = [
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=kernel_size // 2,
                bias=bias,
            )
        ]
        if normalization:
            layers.append(torch.nn.BatchNorm2d(output_channels))
        if activation:
            layers.append(activation)
        self.model = torch.nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

__init__(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=False, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    bias=False,
    stride=1,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    layers = [
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=kernel_size // 2,
            bias=bias,
        )
    ]
    if normalization:
        layers.append(torch.nn.BatchNorm2d(output_channels))
    if activation:
        layers.append(activation)
    self.model = torch.nn.Sequential(*layers)

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

convolutional_block_attention

Bases: Module

Convolutional Block Attention Module (CBAM) class. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class convolutional_block_attention(torch.nn.Module):
    """
    Convolutional Block Attention Module (CBAM) class.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(
        self,
        gate_channels,
        reduction_ratio=16,
        pool_types=["avg", "max"],
        no_spatial=False,
    ):
        """
        Initializes the convolutional block attention module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the channel attention.
        pool_types      : list
                          List of pooling operations to apply for channel attention.
        no_spatial      : bool
                          If True, spatial attention is not applied.
        """
        super(convolutional_block_attention, self).__init__()
        self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.spatial_gate = spatial_gate()

    class Flatten(torch.nn.Module):
        """
        Flattens the input tensor to a 2D matrix.
        """

        def forward(self, x):
            return x.view(x.size(0), -1)

    def forward(self, x):
        """
        Forward pass of the convolutional block attention module.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the CBAM module.

        Returns
        -------
        x_out        : torch.tensor
                       Output tensor after applying channel and spatial attention.
        """
        x_out = self.channel_gate(x)
        if not self.no_spatial:
            x_out = self.spatial_gate(x_out)
        return x_out

Flatten

Bases: Module

Flattens the input tensor to a 2D matrix.

Source code in odak/learn/models/components.py
class Flatten(torch.nn.Module):
    """
    Flattens the input tensor to a 2D matrix.
    """

    def forward(self, x):
        return x.view(x.size(0), -1)

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)

Initializes the convolutional block attention module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the channel attention.
    
  • pool_types
              List of pooling operations to apply for channel attention.
    
  • no_spatial
              If True, spatial attention is not applied.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    gate_channels,
    reduction_ratio=16,
    pool_types=["avg", "max"],
    no_spatial=False,
):
    """
    Initializes the convolutional block attention module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the channel attention.
    pool_types      : list
                      List of pooling operations to apply for channel attention.
    no_spatial      : bool
                      If True, spatial attention is not applied.
    """
    super(convolutional_block_attention, self).__init__()
    self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
    self.no_spatial = no_spatial
    if not no_spatial:
        self.spatial_gate = spatial_gate()

forward(x)

Forward pass of the convolutional block attention module.

Parameters:

  • x
           Input tensor to the CBAM module.
    

Returns:

  • x_out ( tensor ) –

    Output tensor after applying channel and spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the convolutional block attention module.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the CBAM module.

    Returns
    -------
    x_out        : torch.tensor
                   Output tensor after applying channel and spatial attention.
    """
    x_out = self.channel_gate(x)
    if not self.no_spatial:
        x_out = self.spatial_gate(x_out)
    return x_out

double_convolution

Bases: Module

A double convolution layer.

Source code in odak/learn/models/components.py
class double_convolution(torch.nn.Module):
    """
    A double convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=None,
        output_channels=2,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        Double convolution model.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of channels in the hidden layer between two convolutions.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        if isinstance(mid_channels, type(None)):
            mid_channels = output_channels
        self.activation = activation
        self.model = torch.nn.Sequential(
            convolution_layer(
                input_channels=input_channels,
                output_channels=mid_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
            convolution_layer(
                input_channels=mid_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

__init__(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())

Double convolution model.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of channels in the hidden layer between two convolutions.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    mid_channels=None,
    output_channels=2,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    Double convolution model.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of channels in the hidden layer between two convolutions.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    if isinstance(mid_channels, type(None)):
        mid_channels = output_channels
    self.activation = activation
    self.model = torch.nn.Sequential(
        convolution_layer(
            input_channels=input_channels,
            output_channels=mid_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
        convolution_layer(
            input_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
    )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

downsample_layer

Bases: Module

A downscaling component followed by a double convolution.

Source code in odak/learn/models/components.py
class downsample_layer(torch.nn.Module):
    """
    A downscaling component followed by a double convolution.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.maxpool_conv = torch.nn.Sequential(
            torch.nn.MaxPool2d(2),
            double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x              : torch.tensor
                         First input data.



        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.maxpool_conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.maxpool_conv = torch.nn.Sequential(
        torch.nn.MaxPool2d(2),
        double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        ),
    )

forward(x)

Forward model.

Parameters:

  • x
             First input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x              : torch.tensor
                     First input data.



    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.maxpool_conv(x)
    return result

global_feature_module

Bases: Module

A global feature layer that processes global features from input channels and applies them to another input tensor via learned transformations.

Source code in odak/learn/models/components.py
class global_feature_module(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies them to another input tensor via learned transformations.
    """

    def __init__(
        self,
        input_channels,
        mid_channels,
        output_channels,
        kernel_size,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels  : int
                          Number of mid channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.transformations_1 = global_transformations(input_channels, output_channels)
        self.global_features_1 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.global_features_2 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.transformations_2 = global_transformations(input_channels, output_channels)

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        global_tensor_1 = self.transformations_1(x1, x2)
        y1 = self.global_features_1(global_tensor_1)
        y2 = self.global_features_2(y1)
        global_tensor_2 = self.transformations_2(y1, y2)
        return global_tensor_2

__init__(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of mid channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    mid_channels,
    output_channels,
    kernel_size,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels  : int
                      Number of mid channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.transformations_1 = global_transformations(input_channels, output_channels)
    self.global_features_1 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.global_features_2 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.transformations_2 = global_transformations(input_channels, output_channels)

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    global_tensor_1 = self.transformations_1(x1, x2)
    y1 = self.global_features_1(global_tensor_1)
    y2 = self.global_features_2(y1)
    global_tensor_2 = self.transformations_2(y1, y2)
    return global_tensor_2

global_transformations

Bases: Module

A global feature layer that processes global features from input channels and applies learned transformations to another input tensor.

This implementation is adapted from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Reference: J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."

Source code in odak/learn/models/components.py
class global_transformations(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies learned transformations to another input tensor.

    This implementation is adapted from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Reference:
    J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."
    """

    def __init__(self, input_channels, output_channels):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        """
        super().__init__()
        self.global_feature_1 = torch.nn.Sequential(
            torch.nn.Linear(input_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_2 = torch.nn.Sequential(
            torch.nn.Linear(output_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y = torch.mean(x2, dim=(2, 3))
        y1 = self.global_feature_1(y)
        y2 = self.global_feature_2(y1)
        y1 = y1.unsqueeze(2).unsqueeze(3)
        y2 = y2.unsqueeze(2).unsqueeze(3)
        result = x1 * y1 + y2
        return result

__init__(input_channels, output_channels)

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
Source code in odak/learn/models/components.py
def __init__(self, input_channels, output_channels):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    """
    super().__init__()
    self.global_feature_1 = torch.nn.Sequential(
        torch.nn.Linear(input_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_2 = torch.nn.Sequential(
        torch.nn.Linear(output_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y = torch.mean(x2, dim=(2, 3))
    y1 = self.global_feature_1(y)
    y2 = self.global_feature_2(y1)
    y1 = y1.unsqueeze(2).unsqueeze(3)
    y2 = y2.unsqueeze(2).unsqueeze(3)
    result = x1 * y1 + y2
    return result

non_local_layer

Bases: Module

Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)

Source code in odak/learn/models/components.py
class non_local_layer(torch.nn.Module):
    """
    Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)
    """

    def __init__(
        self,
        input_channels=1024,
        bottleneck_channels=512,
        kernel_size=1,
        bias=False,
    ):
        """

        Parameters
        ----------
        input_channels      : int
                              Number of input channels.
        bottleneck_channels : int
                              Number of middle channels.
        kernel_size         : int
                              Kernel size.
        bias                : bool
                              Set to True to let convolutional layers have bias term.
        """
        super(non_local_layer, self).__init__()
        self.input_channels = input_channels
        self.bottleneck_channels = bottleneck_channels
        self.g = torch.nn.Conv2d(
            self.input_channels,
            self.bottleneck_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        self.W_z = torch.nn.Sequential(
            torch.nn.Conv2d(
                self.bottleneck_channels,
                self.input_channels,
                kernel_size=kernel_size,
                bias=bias,
                padding=kernel_size // 2,
            ),
            torch.nn.BatchNorm2d(self.input_channels),
        )
        torch.nn.init.constant_(self.W_z[1].weight, 0)
        torch.nn.init.constant_(self.W_z[1].bias, 0)

    def forward(self, x):
        """
        Forward model [zi = Wzyi + xi]

        Parameters
        ----------
        x               : torch.tensor
                          First input data.


        Returns
        ----------
        z               : torch.tensor
                          Estimated output.
        """
        batch_size, channels, height, width = x.size()
        theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
        phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
        g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
        attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
        attn = torch.nn.functional.softmax(attn, dim=-1)
        y = (
            torch.bmm(attn, g)
            .permute(0, 2, 1)
            .contiguous()
            .view(batch_size, self.bottleneck_channels, height, width)
        )
        W_y = self.W_z(y)
        z = W_y + x
        return z

__init__(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)

Parameters:

  • input_channels
                  Number of input channels.
    
  • bottleneck_channels (int, default: 512 ) –
                  Number of middle channels.
    
  • kernel_size
                  Kernel size.
    
  • bias
                  Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=1024,
    bottleneck_channels=512,
    kernel_size=1,
    bias=False,
):
    """

    Parameters
    ----------
    input_channels      : int
                          Number of input channels.
    bottleneck_channels : int
                          Number of middle channels.
    kernel_size         : int
                          Kernel size.
    bias                : bool
                          Set to True to let convolutional layers have bias term.
    """
    super(non_local_layer, self).__init__()
    self.input_channels = input_channels
    self.bottleneck_channels = bottleneck_channels
    self.g = torch.nn.Conv2d(
        self.input_channels,
        self.bottleneck_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    self.W_z = torch.nn.Sequential(
        torch.nn.Conv2d(
            self.bottleneck_channels,
            self.input_channels,
            kernel_size=kernel_size,
            bias=bias,
            padding=kernel_size // 2,
        ),
        torch.nn.BatchNorm2d(self.input_channels),
    )
    torch.nn.init.constant_(self.W_z[1].weight, 0)
    torch.nn.init.constant_(self.W_z[1].bias, 0)

forward(x)

Forward model [zi = Wzyi + xi]

Parameters:

  • x
              First input data.
    

Returns:

  • z ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model [zi = Wzyi + xi]

    Parameters
    ----------
    x               : torch.tensor
                      First input data.


    Returns
    ----------
    z               : torch.tensor
                      Estimated output.
    """
    batch_size, channels, height, width = x.size()
    theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
    phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
    g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
    attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
    attn = torch.nn.functional.softmax(attn, dim=-1)
    y = (
        torch.bmm(attn, g)
        .permute(0, 2, 1)
        .contiguous()
        .view(batch_size, self.bottleneck_channels, height, width)
    )
    W_y = self.W_z(y)
    z = W_y + x
    return z

normalization

Bases: Module

A normalization layer.

Source code in odak/learn/models/components.py
class normalization(torch.nn.Module):
    """
    A normalization layer.
    """

    def __init__(
        self,
        dim=1,
    ):
        """
        Normalization layer.


        Parameters
        ----------
        dim             : int
                          Dimension (axis) to normalize.
        """
        super().__init__()
        self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        eps = 1e-5 if x.dtype == torch.float32 else 1e-3
        var = torch.var(x, dim=1, unbiased=False, keepdim=True)
        mean = torch.mean(x, dim=1, keepdim=True)
        result = (x - mean) * (var + eps).rsqrt() * self.k
        return result

__init__(dim=1)

Normalization layer.

Parameters:

  • dim
              Dimension (axis) to normalize.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    dim=1,
):
    """
    Normalization layer.


    Parameters
    ----------
    dim             : int
                      Dimension (axis) to normalize.
    """
    super().__init__()
    self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    eps = 1e-5 if x.dtype == torch.float32 else 1e-3
    var = torch.var(x, dim=1, unbiased=False, keepdim=True)
    mean = torch.mean(x, dim=1, keepdim=True)
    result = (x - mean) * (var + eps).rsqrt() * self.k
    return result

positional_encoder

Bases: Module

A positional encoder module. This implementation follows this specific work: Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021..

Source code in odak/learn/models/components.py
class positional_encoder(torch.nn.Module):
    """
    A positional encoder module.
    This implementation follows this specific work: `Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021.`.
    """

    def __init__(self, L):
        """
        A positional encoder module.

        Parameters
        ----------
        L                   : int
                              Positional encoding level.
        """
        super(positional_encoder, self).__init__()
        self.L = L

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x               : torch.tensor
                          Input data [b x n], where `b` is batch size, `n` is the feature size.

        Returns
        ----------
        result          : torch.tensor
                          Result of the forward operation.
        """
        freqs = 2 ** torch.arange(self.L, device=x.device)
        freqs = freqs.view(1, 1, -1)
        results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results = torch.cat((x, results_cos, results_sin), dim=1)
        return results

__init__(L)

A positional encoder module.

Parameters:

  • L
                  Positional encoding level.
    
Source code in odak/learn/models/components.py
def __init__(self, L):
    """
    A positional encoder module.

    Parameters
    ----------
    L                   : int
                          Positional encoding level.
    """
    super(positional_encoder, self).__init__()
    self.L = L

forward(x)

Forward model.

Parameters:

  • x
              Input data [b x n], where `b` is batch size, `n` is the feature size.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x               : torch.tensor
                      Input data [b x n], where `b` is batch size, `n` is the feature size.

    Returns
    ----------
    result          : torch.tensor
                      Result of the forward operation.
    """
    freqs = 2 ** torch.arange(self.L, device=x.device)
    freqs = freqs.view(1, 1, -1)
    results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results = torch.cat((x, results_cos, results_sin), dim=1)
    return results

residual_attention_layer

Bases: Module

A residual block with an attention layer.

Source code in odak/learn/models/components.py
class residual_attention_layer(torch.nn.Module):
    """
    A residual block with an attention layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=1,
        bias=False,
        activation=torch.nn.ReLU(),
    ):
        """
        An attention layer class.


        Parameters
        ----------
        input_channels  : int or optioal
                          Number of input channels.
        output_channels : int or optional
                          Number of middle channels.
        kernel_size     : int or optional
                          Kernel size.
        bias            : bool or optional
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn or optional
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution0 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.convolution1 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.final_layer = torch.nn.Sequential(
            self.activation,
            torch.nn.Conv2d(
                output_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )

    def forward(self, x0, x1):
        """
        Forward model.

        Parameters
        ----------
        x0             : torch.tensor
                         First input data.

        x1             : torch.tensor
                         Seconnd input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y0 = self.convolution0(x0)
        y1 = self.convolution1(x1)
        y2 = torch.add(y0, y1)
        result = self.final_layer(y2) * x0
        return result

__init__(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())

An attention layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int or optional, default: 2 ) –
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=1,
    bias=False,
    activation=torch.nn.ReLU(),
):
    """
    An attention layer class.


    Parameters
    ----------
    input_channels  : int or optioal
                      Number of input channels.
    output_channels : int or optional
                      Number of middle channels.
    kernel_size     : int or optional
                      Kernel size.
    bias            : bool or optional
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn or optional
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution0 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.convolution1 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.final_layer = torch.nn.Sequential(
        self.activation,
        torch.nn.Conv2d(
            output_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
    )

forward(x0, x1)

Forward model.

Parameters:

  • x0
             First input data.
    
  • x1
             Seconnd input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x0, x1):
    """
    Forward model.

    Parameters
    ----------
    x0             : torch.tensor
                     First input data.

    x1             : torch.tensor
                     Seconnd input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y0 = self.convolution0(x0)
    y1 = self.convolution1(x1)
    y2 = torch.add(y0, y1)
    result = self.final_layer(y2) * x0
    return result

residual_layer

Bases: Module

A residual layer.

Source code in odak/learn/models/components.py
class residual_layer(torch.nn.Module):
    """
    A residual layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=16,
        kernel_size=3,
        bias=False,
        normalization=True,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of middle channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution = double_convolution(
            input_channels,
            mid_channels=mid_channels,
            output_channels=input_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        x0 = self.convolution(x)
        return x + x0

__init__(input_channels=2, mid_channels=16, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    mid_channels=16,
    kernel_size=3,
    bias=False,
    normalization=True,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of middle channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution = double_convolution(
        input_channels,
        mid_channels=mid_channels,
        output_channels=input_channels,
        kernel_size=kernel_size,
        normalization=normalization,
        bias=bias,
        activation=activation,
    )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    x0 = self.convolution(x)
    return x + x0

spatial_gate

Bases: Module

Spatial attention module that applies a convolution layer after channel pooling. This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.

Source code in odak/learn/models/components.py
class spatial_gate(torch.nn.Module):
    """
    Spatial attention module that applies a convolution layer after channel pooling.
    This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.
    """

    def __init__(self):
        """
        Initializes the spatial gate module.
        """
        super().__init__()
        kernel_size = 7
        self.spatial = convolution_layer(
            2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
        )

    def channel_pool(self, x):
        """
        Applies max and average pooling on the channels.

        Parameters
        ----------
        x             : torch.tensor
                        Input tensor.

        Returns
        -------
        output        : torch.tensor
                        Output tensor.
        """
        max_pool = torch.max(x, 1)[0].unsqueeze(1)
        avg_pool = torch.mean(x, 1).unsqueeze(1)
        output = torch.cat((max_pool, avg_pool), dim=1)
        return output

    def forward(self, x):
        """
        Forward pass of the SpatialGate module.

        Applies spatial attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the SpatialGate module.

        Returns
        -------
        scaled_x     : torch.tensor
                       Output tensor after applying spatial attention.
        """
        x_compress = self.channel_pool(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out)
        scaled_x = x * scale
        return scaled_x

__init__()

Initializes the spatial gate module.

Source code in odak/learn/models/components.py
def __init__(self):
    """
    Initializes the spatial gate module.
    """
    super().__init__()
    kernel_size = 7
    self.spatial = convolution_layer(
        2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
    )

channel_pool(x)

Applies max and average pooling on the channels.

Parameters:

  • x
            Input tensor.
    

Returns:

  • output ( tensor ) –

    Output tensor.

Source code in odak/learn/models/components.py
def channel_pool(self, x):
    """
    Applies max and average pooling on the channels.

    Parameters
    ----------
    x             : torch.tensor
                    Input tensor.

    Returns
    -------
    output        : torch.tensor
                    Output tensor.
    """
    max_pool = torch.max(x, 1)[0].unsqueeze(1)
    avg_pool = torch.mean(x, 1).unsqueeze(1)
    output = torch.cat((max_pool, avg_pool), dim=1)
    return output

forward(x)

Forward pass of the SpatialGate module.

Applies spatial attention to the input tensor.

Parameters:

  • x
           Input tensor to the SpatialGate module.
    

Returns:

  • scaled_x ( tensor ) –

    Output tensor after applying spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the SpatialGate module.

    Applies spatial attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the SpatialGate module.

    Returns
    -------
    scaled_x     : torch.tensor
                   Output tensor after applying spatial attention.
    """
    x_compress = self.channel_pool(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out)
    scaled_x = x * scale
    return scaled_x

spatially_adaptive_convolution

Bases: Module

A spatially adaptive convolution layer.

References

C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions." C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation." C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."

Source code in odak/learn/models/components.py
class spatially_adaptive_convolution(torch.nn.Module):
    """
    A spatially adaptive convolution layer.

    References
    ----------

    C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions."
    C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation."
    C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive convolution layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn.Module
                          Activation function to apply. If None, no activation is applied.
        """
        super(spatially_adaptive_convolution, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive convolution layer.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        sa_output          : torch.tensor
                            Estimated output tensor.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Resize weight to match the input channels and kernel size
        si_kernel = self.weight.reshape(
            self.output_channels,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply spatially varying kernels
        sv_feature = input_feature * sv_kernel

        # Perform matrix multiplication
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )
        return sa_output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive convolution layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Activation function to apply. If None, no activation is applied.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive convolution layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn.Module
                      Activation function to apply. If None, no activation is applied.
    """
    super(spatially_adaptive_convolution, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive convolution layer.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • sa_output ( tensor ) –

    Estimated output tensor. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive convolution layer.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    sa_output          : torch.tensor
                        Estimated output tensor.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Resize weight to match the input channels and kernel size
    si_kernel = self.weight.reshape(
        self.output_channels,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply spatially varying kernels
    sv_feature = input_feature * sv_kernel

    # Perform matrix multiplication
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )
    return sa_output

spatially_adaptive_module

Bases: Module

A spatially adaptive module that combines learned spatially adaptive convolutions.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/components.py
class spatially_adaptive_module(torch.nn.Module):
    """
    A spatially adaptive module that combines learned spatially adaptive convolutions.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive module.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super(spatially_adaptive_module, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.output_channels_for_weight = self.output_channels - 1
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels_for_weight,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive module.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        output             : torch.tensor
                            Combined output tensor from standard and spatially adaptive convolutions.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Apply sv_kernel to the input_feature
        sv_feature = input_feature * sv_kernel

        # Original spatially varying convolution output
        sv_output = torch.sum(sv_feature, dim=1).reshape(
            1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
        )

        # Reshape weight for spatially adaptive convolution
        si_kernel = self.weight.reshape(
            self.output_channels_for_weight,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply si_kernel on sv convolution output
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels_for_weight,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )

        # Combine the outputs and apply activation function
        output = self.activation(torch.cat((sv_output, sa_output), dim=1))
        return output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive module.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive module.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super(spatially_adaptive_module, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.output_channels_for_weight = self.output_channels - 1
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels_for_weight,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive module.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • output ( tensor ) –

    Combined output tensor from standard and spatially adaptive convolutions. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive module.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    output             : torch.tensor
                        Combined output tensor from standard and spatially adaptive convolutions.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Apply sv_kernel to the input_feature
    sv_feature = input_feature * sv_kernel

    # Original spatially varying convolution output
    sv_output = torch.sum(sv_feature, dim=1).reshape(
        1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
    )

    # Reshape weight for spatially adaptive convolution
    si_kernel = self.weight.reshape(
        self.output_channels_for_weight,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply si_kernel on sv convolution output
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels_for_weight,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )

    # Combine the outputs and apply activation function
    output = self.activation(torch.cat((sv_output, sa_output), dim=1))
    return output

upsample_convtranspose2d_layer

Bases: Module

An upsampling convtranspose2d layer.

Source code in odak/learn/models/components.py
class upsample_convtranspose2d_layer(torch.nn.Module):
    """
    An upsampling convtranspose2d layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=2,
        stride=2,
        bias=False,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        """
        super().__init__()
        self.up = torch.nn.ConvTranspose2d(
            in_channels=input_channels,
            out_channels=output_channels,
            bias=bias,
            kernel_size=kernel_size,
            stride=stride,
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        result = x1 + x2
        return result

__init__(input_channels, output_channels, kernel_size=2, stride=2, bias=False)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=2,
    stride=2,
    bias=False,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    """
    super().__init__()
    self.up = torch.nn.ConvTranspose2d(
        in_channels=input_channels,
        out_channels=output_channels,
        bias=bias,
        kernel_size=kernel_size,
        stride=stride,
    )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    result = x1 + x2
    return result

upsample_layer

Bases: Module

An upsampling convolutional layer.

Source code in odak/learn/models/components.py
class upsample_layer(torch.nn.Module):
    """
    An upsampling convolutional layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
        bilinear=True,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        bilinear        : bool
                          If set to True, bilinear sampling is used.
        """
        super(upsample_layer, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(
                scale_factor=2, mode="bilinear", align_corners=True
            )
            self.conv = double_convolution(
                input_channels=input_channels + output_channels,
                mid_channels=input_channels // 2,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )
        else:
            self.up = torch.nn.ConvTranspose2d(
                input_channels, input_channels // 2, kernel_size=2, stride=2
            )
            self.conv = double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        x = torch.cat([x2, x1], dim=1)
        result = self.conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU(), bilinear=True)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
  • bilinear
              If set to True, bilinear sampling is used.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
    bilinear=True,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    bilinear        : bool
                      If set to True, bilinear sampling is used.
    """
    super(upsample_layer, self).__init__()
    if bilinear:
        self.up = torch.nn.Upsample(
            scale_factor=2, mode="bilinear", align_corners=True
        )
        self.conv = double_convolution(
            input_channels=input_channels + output_channels,
            mid_channels=input_channels // 2,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )
    else:
        self.up = torch.nn.ConvTranspose2d(
            input_channels, input_channels // 2, kernel_size=2, stride=2
        )
        self.conv = double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    x = torch.cat([x2, x1], dim=1)
    result = self.conv(x)
    return result

gaussian(x, multiplier=1.0)

A Gaussian non-linear activation. For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

Parameters:

  • x
           Input data.
    
  • multiplier
           Multiplier.
    

Returns:

  • result ( float or tensor ) –

    Ouput data.

Source code in odak/learn/models/components.py
def gaussian(x, multiplier=1.0):
    """
    A Gaussian non-linear activation.
    For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

    Parameters
    ----------
    x            : float or torch.tensor
                   Input data.
    multiplier   : float or torch.tensor
                   Multiplier.

    Returns
    -------
    result       : float or torch.tensor
                   Ouput data.
    """
    result = torch.exp(-((multiplier * x) ** 2))
    return result

swish(x)

A swish non-linear activation. For more details: https://en.wikipedia.org/wiki/Swish_function

Parameters:

  • x
             Input.
    

Returns:

  • out ( float or tensor ) –

    Output.

Source code in odak/learn/models/components.py
def swish(x):
    """
    A swish non-linear activation.
    For more details: https://en.wikipedia.org/wiki/Swish_function

    Parameters
    -----------
    x              : float or torch.tensor
                     Input.

    Returns
    -------
    out            : float or torch.tensor
                     Output.
    """
    out = x * torch.sigmoid(x)
    return out

channel_gate

Bases: Module

Channel attention module with various pooling strategies. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class channel_gate(torch.nn.Module):
    """
    Channel attention module with various pooling strategies.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
        """
        Initializes the channel gate module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the intermediate layer.
        pool_types      : list
                          List of pooling operations to apply.
        """
        super().__init__()
        self.gate_channels = gate_channels
        hidden_channels = gate_channels // reduction_ratio
        if hidden_channels == 0:
            hidden_channels = 1
        self.mlp = torch.nn.Sequential(
            convolutional_block_attention.Flatten(),
            torch.nn.Linear(gate_channels, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_channels, gate_channels),
        )
        self.pool_types = pool_types

    def forward(self, x):
        """
        Forward pass of the ChannelGate module.

        Applies channel-wise attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the ChannelGate module.

        Returns
        -------
        output       : torch.tensor
                       Output tensor after applying channel attention.
        """
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == "avg":
                pool = torch.nn.functional.avg_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            elif pool_type == "max":
                pool = torch.nn.functional.max_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            channel_att_raw = self.mlp(pool)
            channel_att_sum = (
                channel_att_raw
                if channel_att_sum is None
                else channel_att_sum + channel_att_raw
            )
        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
        output = x * scale
        return output

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])

Initializes the channel gate module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the intermediate layer.
    
  • pool_types
              List of pooling operations to apply.
    
Source code in odak/learn/models/components.py
def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
    """
    Initializes the channel gate module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the intermediate layer.
    pool_types      : list
                      List of pooling operations to apply.
    """
    super().__init__()
    self.gate_channels = gate_channels
    hidden_channels = gate_channels // reduction_ratio
    if hidden_channels == 0:
        hidden_channels = 1
    self.mlp = torch.nn.Sequential(
        convolutional_block_attention.Flatten(),
        torch.nn.Linear(gate_channels, hidden_channels),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_channels, gate_channels),
    )
    self.pool_types = pool_types

forward(x)

Forward pass of the ChannelGate module.

Applies channel-wise attention to the input tensor.

Parameters:

  • x
           Input tensor to the ChannelGate module.
    

Returns:

  • output ( tensor ) –

    Output tensor after applying channel attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the ChannelGate module.

    Applies channel-wise attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the ChannelGate module.

    Returns
    -------
    output       : torch.tensor
                   Output tensor after applying channel attention.
    """
    channel_att_sum = None
    for pool_type in self.pool_types:
        if pool_type == "avg":
            pool = torch.nn.functional.avg_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        elif pool_type == "max":
            pool = torch.nn.functional.max_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        channel_att_raw = self.mlp(pool)
        channel_att_sum = (
            channel_att_raw
            if channel_att_sum is None
            else channel_att_sum + channel_att_raw
        )
    scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
    output = x * scale
    return output

convolution_layer

Bases: Module

A convolution layer.

Source code in odak/learn/models/components.py
class convolution_layer(torch.nn.Module):
    """
    A convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        bias=False,
        stride=1,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        layers = [
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=kernel_size // 2,
                bias=bias,
            )
        ]
        if normalization:
            layers.append(torch.nn.BatchNorm2d(output_channels))
        if activation:
            layers.append(activation)
        self.model = torch.nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

__init__(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=False, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    bias=False,
    stride=1,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    layers = [
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=kernel_size // 2,
            bias=bias,
        )
    ]
    if normalization:
        layers.append(torch.nn.BatchNorm2d(output_channels))
    if activation:
        layers.append(activation)
    self.model = torch.nn.Sequential(*layers)

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

convolutional_block_attention

Bases: Module

Convolutional Block Attention Module (CBAM) class. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class convolutional_block_attention(torch.nn.Module):
    """
    Convolutional Block Attention Module (CBAM) class.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(
        self,
        gate_channels,
        reduction_ratio=16,
        pool_types=["avg", "max"],
        no_spatial=False,
    ):
        """
        Initializes the convolutional block attention module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the channel attention.
        pool_types      : list
                          List of pooling operations to apply for channel attention.
        no_spatial      : bool
                          If True, spatial attention is not applied.
        """
        super(convolutional_block_attention, self).__init__()
        self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.spatial_gate = spatial_gate()

    class Flatten(torch.nn.Module):
        """
        Flattens the input tensor to a 2D matrix.
        """

        def forward(self, x):
            return x.view(x.size(0), -1)

    def forward(self, x):
        """
        Forward pass of the convolutional block attention module.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the CBAM module.

        Returns
        -------
        x_out        : torch.tensor
                       Output tensor after applying channel and spatial attention.
        """
        x_out = self.channel_gate(x)
        if not self.no_spatial:
            x_out = self.spatial_gate(x_out)
        return x_out

Flatten

Bases: Module

Flattens the input tensor to a 2D matrix.

Source code in odak/learn/models/components.py
class Flatten(torch.nn.Module):
    """
    Flattens the input tensor to a 2D matrix.
    """

    def forward(self, x):
        return x.view(x.size(0), -1)

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)

Initializes the convolutional block attention module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the channel attention.
    
  • pool_types
              List of pooling operations to apply for channel attention.
    
  • no_spatial
              If True, spatial attention is not applied.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    gate_channels,
    reduction_ratio=16,
    pool_types=["avg", "max"],
    no_spatial=False,
):
    """
    Initializes the convolutional block attention module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the channel attention.
    pool_types      : list
                      List of pooling operations to apply for channel attention.
    no_spatial      : bool
                      If True, spatial attention is not applied.
    """
    super(convolutional_block_attention, self).__init__()
    self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
    self.no_spatial = no_spatial
    if not no_spatial:
        self.spatial_gate = spatial_gate()

forward(x)

Forward pass of the convolutional block attention module.

Parameters:

  • x
           Input tensor to the CBAM module.
    

Returns:

  • x_out ( tensor ) –

    Output tensor after applying channel and spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the convolutional block attention module.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the CBAM module.

    Returns
    -------
    x_out        : torch.tensor
                   Output tensor after applying channel and spatial attention.
    """
    x_out = self.channel_gate(x)
    if not self.no_spatial:
        x_out = self.spatial_gate(x_out)
    return x_out

double_convolution

Bases: Module

A double convolution layer.

Source code in odak/learn/models/components.py
class double_convolution(torch.nn.Module):
    """
    A double convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=None,
        output_channels=2,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        Double convolution model.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of channels in the hidden layer between two convolutions.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        if isinstance(mid_channels, type(None)):
            mid_channels = output_channels
        self.activation = activation
        self.model = torch.nn.Sequential(
            convolution_layer(
                input_channels=input_channels,
                output_channels=mid_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
            convolution_layer(
                input_channels=mid_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

__init__(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())

Double convolution model.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of channels in the hidden layer between two convolutions.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    mid_channels=None,
    output_channels=2,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    Double convolution model.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of channels in the hidden layer between two convolutions.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    if isinstance(mid_channels, type(None)):
        mid_channels = output_channels
    self.activation = activation
    self.model = torch.nn.Sequential(
        convolution_layer(
            input_channels=input_channels,
            output_channels=mid_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
        convolution_layer(
            input_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
    )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

downsample_layer

Bases: Module

A downscaling component followed by a double convolution.

Source code in odak/learn/models/components.py
class downsample_layer(torch.nn.Module):
    """
    A downscaling component followed by a double convolution.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.maxpool_conv = torch.nn.Sequential(
            torch.nn.MaxPool2d(2),
            double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x              : torch.tensor
                         First input data.



        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.maxpool_conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.maxpool_conv = torch.nn.Sequential(
        torch.nn.MaxPool2d(2),
        double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        ),
    )

forward(x)

Forward model.

Parameters:

  • x
             First input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x              : torch.tensor
                     First input data.



    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.maxpool_conv(x)
    return result

global_feature_module

Bases: Module

A global feature layer that processes global features from input channels and applies them to another input tensor via learned transformations.

Source code in odak/learn/models/components.py
class global_feature_module(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies them to another input tensor via learned transformations.
    """

    def __init__(
        self,
        input_channels,
        mid_channels,
        output_channels,
        kernel_size,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels  : int
                          Number of mid channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.transformations_1 = global_transformations(input_channels, output_channels)
        self.global_features_1 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.global_features_2 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.transformations_2 = global_transformations(input_channels, output_channels)

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        global_tensor_1 = self.transformations_1(x1, x2)
        y1 = self.global_features_1(global_tensor_1)
        y2 = self.global_features_2(y1)
        global_tensor_2 = self.transformations_2(y1, y2)
        return global_tensor_2

__init__(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of mid channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    mid_channels,
    output_channels,
    kernel_size,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels  : int
                      Number of mid channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.transformations_1 = global_transformations(input_channels, output_channels)
    self.global_features_1 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.global_features_2 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.transformations_2 = global_transformations(input_channels, output_channels)

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    global_tensor_1 = self.transformations_1(x1, x2)
    y1 = self.global_features_1(global_tensor_1)
    y2 = self.global_features_2(y1)
    global_tensor_2 = self.transformations_2(y1, y2)
    return global_tensor_2

global_transformations

Bases: Module

A global feature layer that processes global features from input channels and applies learned transformations to another input tensor.

This implementation is adapted from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Reference: J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."

Source code in odak/learn/models/components.py
class global_transformations(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies learned transformations to another input tensor.

    This implementation is adapted from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Reference:
    J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."
    """

    def __init__(self, input_channels, output_channels):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        """
        super().__init__()
        self.global_feature_1 = torch.nn.Sequential(
            torch.nn.Linear(input_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_2 = torch.nn.Sequential(
            torch.nn.Linear(output_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y = torch.mean(x2, dim=(2, 3))
        y1 = self.global_feature_1(y)
        y2 = self.global_feature_2(y1)
        y1 = y1.unsqueeze(2).unsqueeze(3)
        y2 = y2.unsqueeze(2).unsqueeze(3)
        result = x1 * y1 + y2
        return result

__init__(input_channels, output_channels)

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
Source code in odak/learn/models/components.py
def __init__(self, input_channels, output_channels):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    """
    super().__init__()
    self.global_feature_1 = torch.nn.Sequential(
        torch.nn.Linear(input_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_2 = torch.nn.Sequential(
        torch.nn.Linear(output_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y = torch.mean(x2, dim=(2, 3))
    y1 = self.global_feature_1(y)
    y2 = self.global_feature_2(y1)
    y1 = y1.unsqueeze(2).unsqueeze(3)
    y2 = y2.unsqueeze(2).unsqueeze(3)
    result = x1 * y1 + y2
    return result

multi_layer_perceptron

Bases: Module

A multi-layer perceptron model.

Source code in odak/learn/models/models.py
class multi_layer_perceptron(torch.nn.Module):
    """
    A multi-layer perceptron model.
    """

    def __init__(
        self,
        dimensions,
        activation=torch.nn.ReLU(),
        bias=False,
        model_type="conventional",
        siren_multiplier=1.0,
        input_multiplier=None,
    ):
        """
        Initialize the multi-layer perceptron.

        Parameters
        ----------
        dimensions : list of int
            List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
        activation : torch.nn.Module, optional
            Nonlinear activation function. Default is `torch.nn.ReLU()`.
        bias : bool, optional
            If set to True, linear layers will include biases. Default is False.
        siren_multiplier : float, optional
            When using `SIREN` model type, this parameter functions as a hyperparameter.
            The original SIREN work uses 30.
            You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
        input_multiplier : float, optional
            Initial value of the input multiplier before the very first layer.
        model_type : str, optional
            Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
            `conventional` refers to a standard multi layer perceptron.
            For `SIREN`, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
            For `Swish`, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017).
            For `FILM SIREN`, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
            For `Gaussian`, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
            Default is "conventional".
        """
        super(multi_layer_perceptron, self).__init__()
        self.activation = activation
        self.bias = bias
        self.model_type = model_type
        self.layers = torch.nn.ModuleList()
        self.siren_multiplier = siren_multiplier
        self.dimensions = dimensions
        logger.info(
            f"Initializing multi_layer_perceptron: model_type={model_type}, "
            f"dimensions={dimensions}, bias={bias}, "
            f"siren_multiplier={siren_multiplier}"
        )
        for i in range(len(self.dimensions) - 1):
            self.layers.append(
                torch.nn.Linear(
                    self.dimensions[i], self.dimensions[i + 1], bias=self.bias
                )
            )
        if not isinstance(input_multiplier, type(None)):
            self.input_multiplier = torch.nn.ParameterList()
            self.input_multiplier.append(
                torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier)
            )
            logger.debug(f"Input multiplier initialized: {input_multiplier}")
        if self.model_type == "FILM SIREN":
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1::]:
                self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
            logger.debug("FILM SIREN alpha parameters initialized")
        if self.model_type == "Gaussian":
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1::]:
                self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))
            logger.debug("Gaussian alpha parameters initialized")

    def forward(self, x):
        """
        Forward pass of the multi-layer perceptron.

        Parameters
        ----------
        x : torch.Tensor
            Input data.

        Returns
        -------
        result : torch.Tensor
            Estimated output.
        """
        if hasattr(self, "input_multiplier"):
            result = x * self.input_multiplier[0]
        else:
            result = x
        for layer_id, layer in enumerate(self.layers):
            result = layer(result)
            if self.model_type == "conventional" and layer_id != len(self.layers) - 1:
                result = self.activation(result)
            elif self.model_type == "swish" and layer_id != len(self.layers) - 1:
                result = swish(result)
            elif self.model_type == "SIREN" and layer_id != len(self.layers) - 1:
                result = torch.sin(result * self.siren_multiplier)
            elif self.model_type == "FILM SIREN" and layer_id != len(self.layers) - 1:
                result = torch.sin(
                    self.alpha[layer_id][0] * result + self.alpha[layer_id][1]
                )
            elif self.model_type == "Gaussian" and layer_id != len(self.layers) - 1:
                result = gaussian(result, self.alpha[layer_id][0])
        return result

__init__(dimensions, activation=torch.nn.ReLU(), bias=False, model_type='conventional', siren_multiplier=1.0, input_multiplier=None)

Initialize the multi-layer perceptron.

Parameters:

  • dimensions (list of int) –

    List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).

  • activation (Module, default: ReLU() ) –

    Nonlinear activation function. Default is torch.nn.ReLU().

  • bias (bool, default: False ) –

    If set to True, linear layers will include biases. Default is False.

  • siren_multiplier (float, default: 1.0 ) –

    When using SIREN model type, this parameter functions as a hyperparameter. The original SIREN work uses 30. You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.

  • input_multiplier (float, default: None ) –

    Initial value of the input multiplier before the very first layer.

  • model_type (str, default: 'conventional' ) –

    Model type: conventional, swish, SIREN, FILM SIREN, Gaussian. conventional refers to a standard multi layer perceptron. For SIREN, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473. For Swish, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). For FILM SIREN, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021. For Gaussian, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022. Default is "conventional".

Source code in odak/learn/models/models.py
def __init__(
    self,
    dimensions,
    activation=torch.nn.ReLU(),
    bias=False,
    model_type="conventional",
    siren_multiplier=1.0,
    input_multiplier=None,
):
    """
    Initialize the multi-layer perceptron.

    Parameters
    ----------
    dimensions : list of int
        List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
    activation : torch.nn.Module, optional
        Nonlinear activation function. Default is `torch.nn.ReLU()`.
    bias : bool, optional
        If set to True, linear layers will include biases. Default is False.
    siren_multiplier : float, optional
        When using `SIREN` model type, this parameter functions as a hyperparameter.
        The original SIREN work uses 30.
        You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
    input_multiplier : float, optional
        Initial value of the input multiplier before the very first layer.
    model_type : str, optional
        Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
        `conventional` refers to a standard multi layer perceptron.
        For `SIREN`, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
        For `Swish`, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017).
        For `FILM SIREN`, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
        For `Gaussian`, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
        Default is "conventional".
    """
    super(multi_layer_perceptron, self).__init__()
    self.activation = activation
    self.bias = bias
    self.model_type = model_type
    self.layers = torch.nn.ModuleList()
    self.siren_multiplier = siren_multiplier
    self.dimensions = dimensions
    logger.info(
        f"Initializing multi_layer_perceptron: model_type={model_type}, "
        f"dimensions={dimensions}, bias={bias}, "
        f"siren_multiplier={siren_multiplier}"
    )
    for i in range(len(self.dimensions) - 1):
        self.layers.append(
            torch.nn.Linear(
                self.dimensions[i], self.dimensions[i + 1], bias=self.bias
            )
        )
    if not isinstance(input_multiplier, type(None)):
        self.input_multiplier = torch.nn.ParameterList()
        self.input_multiplier.append(
            torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier)
        )
        logger.debug(f"Input multiplier initialized: {input_multiplier}")
    if self.model_type == "FILM SIREN":
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1::]:
            self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
        logger.debug("FILM SIREN alpha parameters initialized")
    if self.model_type == "Gaussian":
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1::]:
            self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))
        logger.debug("Gaussian alpha parameters initialized")

forward(x)

Forward pass of the multi-layer perceptron.

Parameters:

  • x (Tensor) –

    Input data.

Returns:

  • result ( Tensor ) –

    Estimated output.

Source code in odak/learn/models/models.py
def forward(self, x):
    """
    Forward pass of the multi-layer perceptron.

    Parameters
    ----------
    x : torch.Tensor
        Input data.

    Returns
    -------
    result : torch.Tensor
        Estimated output.
    """
    if hasattr(self, "input_multiplier"):
        result = x * self.input_multiplier[0]
    else:
        result = x
    for layer_id, layer in enumerate(self.layers):
        result = layer(result)
        if self.model_type == "conventional" and layer_id != len(self.layers) - 1:
            result = self.activation(result)
        elif self.model_type == "swish" and layer_id != len(self.layers) - 1:
            result = swish(result)
        elif self.model_type == "SIREN" and layer_id != len(self.layers) - 1:
            result = torch.sin(result * self.siren_multiplier)
        elif self.model_type == "FILM SIREN" and layer_id != len(self.layers) - 1:
            result = torch.sin(
                self.alpha[layer_id][0] * result + self.alpha[layer_id][1]
            )
        elif self.model_type == "Gaussian" and layer_id != len(self.layers) - 1:
            result = gaussian(result, self.alpha[layer_id][0])
    return result

non_local_layer

Bases: Module

Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)

Source code in odak/learn/models/components.py
class non_local_layer(torch.nn.Module):
    """
    Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)
    """

    def __init__(
        self,
        input_channels=1024,
        bottleneck_channels=512,
        kernel_size=1,
        bias=False,
    ):
        """

        Parameters
        ----------
        input_channels      : int
                              Number of input channels.
        bottleneck_channels : int
                              Number of middle channels.
        kernel_size         : int
                              Kernel size.
        bias                : bool
                              Set to True to let convolutional layers have bias term.
        """
        super(non_local_layer, self).__init__()
        self.input_channels = input_channels
        self.bottleneck_channels = bottleneck_channels
        self.g = torch.nn.Conv2d(
            self.input_channels,
            self.bottleneck_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        self.W_z = torch.nn.Sequential(
            torch.nn.Conv2d(
                self.bottleneck_channels,
                self.input_channels,
                kernel_size=kernel_size,
                bias=bias,
                padding=kernel_size // 2,
            ),
            torch.nn.BatchNorm2d(self.input_channels),
        )
        torch.nn.init.constant_(self.W_z[1].weight, 0)
        torch.nn.init.constant_(self.W_z[1].bias, 0)

    def forward(self, x):
        """
        Forward model [zi = Wzyi + xi]

        Parameters
        ----------
        x               : torch.tensor
                          First input data.


        Returns
        ----------
        z               : torch.tensor
                          Estimated output.
        """
        batch_size, channels, height, width = x.size()
        theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
        phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
        g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
        attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
        attn = torch.nn.functional.softmax(attn, dim=-1)
        y = (
            torch.bmm(attn, g)
            .permute(0, 2, 1)
            .contiguous()
            .view(batch_size, self.bottleneck_channels, height, width)
        )
        W_y = self.W_z(y)
        z = W_y + x
        return z

__init__(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)

Parameters:

  • input_channels
                  Number of input channels.
    
  • bottleneck_channels (int, default: 512 ) –
                  Number of middle channels.
    
  • kernel_size
                  Kernel size.
    
  • bias
                  Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=1024,
    bottleneck_channels=512,
    kernel_size=1,
    bias=False,
):
    """

    Parameters
    ----------
    input_channels      : int
                          Number of input channels.
    bottleneck_channels : int
                          Number of middle channels.
    kernel_size         : int
                          Kernel size.
    bias                : bool
                          Set to True to let convolutional layers have bias term.
    """
    super(non_local_layer, self).__init__()
    self.input_channels = input_channels
    self.bottleneck_channels = bottleneck_channels
    self.g = torch.nn.Conv2d(
        self.input_channels,
        self.bottleneck_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    self.W_z = torch.nn.Sequential(
        torch.nn.Conv2d(
            self.bottleneck_channels,
            self.input_channels,
            kernel_size=kernel_size,
            bias=bias,
            padding=kernel_size // 2,
        ),
        torch.nn.BatchNorm2d(self.input_channels),
    )
    torch.nn.init.constant_(self.W_z[1].weight, 0)
    torch.nn.init.constant_(self.W_z[1].bias, 0)

forward(x)

Forward model [zi = Wzyi + xi]

Parameters:

  • x
              First input data.
    

Returns:

  • z ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model [zi = Wzyi + xi]

    Parameters
    ----------
    x               : torch.tensor
                      First input data.


    Returns
    ----------
    z               : torch.tensor
                      Estimated output.
    """
    batch_size, channels, height, width = x.size()
    theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
    phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
    g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
    attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
    attn = torch.nn.functional.softmax(attn, dim=-1)
    y = (
        torch.bmm(attn, g)
        .permute(0, 2, 1)
        .contiguous()
        .view(batch_size, self.bottleneck_channels, height, width)
    )
    W_y = self.W_z(y)
    z = W_y + x
    return z

normalization

Bases: Module

A normalization layer.

Source code in odak/learn/models/components.py
class normalization(torch.nn.Module):
    """
    A normalization layer.
    """

    def __init__(
        self,
        dim=1,
    ):
        """
        Normalization layer.


        Parameters
        ----------
        dim             : int
                          Dimension (axis) to normalize.
        """
        super().__init__()
        self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        eps = 1e-5 if x.dtype == torch.float32 else 1e-3
        var = torch.var(x, dim=1, unbiased=False, keepdim=True)
        mean = torch.mean(x, dim=1, keepdim=True)
        result = (x - mean) * (var + eps).rsqrt() * self.k
        return result

__init__(dim=1)

Normalization layer.

Parameters:

  • dim
              Dimension (axis) to normalize.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    dim=1,
):
    """
    Normalization layer.


    Parameters
    ----------
    dim             : int
                      Dimension (axis) to normalize.
    """
    super().__init__()
    self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    eps = 1e-5 if x.dtype == torch.float32 else 1e-3
    var = torch.var(x, dim=1, unbiased=False, keepdim=True)
    mean = torch.mean(x, dim=1, keepdim=True)
    result = (x - mean) * (var + eps).rsqrt() * self.k
    return result

positional_encoder

Bases: Module

A positional encoder module. This implementation follows this specific work: Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021..

Source code in odak/learn/models/components.py
class positional_encoder(torch.nn.Module):
    """
    A positional encoder module.
    This implementation follows this specific work: `Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021.`.
    """

    def __init__(self, L):
        """
        A positional encoder module.

        Parameters
        ----------
        L                   : int
                              Positional encoding level.
        """
        super(positional_encoder, self).__init__()
        self.L = L

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x               : torch.tensor
                          Input data [b x n], where `b` is batch size, `n` is the feature size.

        Returns
        ----------
        result          : torch.tensor
                          Result of the forward operation.
        """
        freqs = 2 ** torch.arange(self.L, device=x.device)
        freqs = freqs.view(1, 1, -1)
        results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results = torch.cat((x, results_cos, results_sin), dim=1)
        return results

__init__(L)

A positional encoder module.

Parameters:

  • L
                  Positional encoding level.
    
Source code in odak/learn/models/components.py
def __init__(self, L):
    """
    A positional encoder module.

    Parameters
    ----------
    L                   : int
                          Positional encoding level.
    """
    super(positional_encoder, self).__init__()
    self.L = L

forward(x)

Forward model.

Parameters:

  • x
              Input data [b x n], where `b` is batch size, `n` is the feature size.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x               : torch.tensor
                      Input data [b x n], where `b` is batch size, `n` is the feature size.

    Returns
    ----------
    result          : torch.tensor
                      Result of the forward operation.
    """
    freqs = 2 ** torch.arange(self.L, device=x.device)
    freqs = freqs.view(1, 1, -1)
    results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results = torch.cat((x, results_cos, results_sin), dim=1)
    return results

residual_attention_layer

Bases: Module

A residual block with an attention layer.

Source code in odak/learn/models/components.py
class residual_attention_layer(torch.nn.Module):
    """
    A residual block with an attention layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=1,
        bias=False,
        activation=torch.nn.ReLU(),
    ):
        """
        An attention layer class.


        Parameters
        ----------
        input_channels  : int or optioal
                          Number of input channels.
        output_channels : int or optional
                          Number of middle channels.
        kernel_size     : int or optional
                          Kernel size.
        bias            : bool or optional
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn or optional
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution0 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.convolution1 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.final_layer = torch.nn.Sequential(
            self.activation,
            torch.nn.Conv2d(
                output_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )

    def forward(self, x0, x1):
        """
        Forward model.

        Parameters
        ----------
        x0             : torch.tensor
                         First input data.

        x1             : torch.tensor
                         Seconnd input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y0 = self.convolution0(x0)
        y1 = self.convolution1(x1)
        y2 = torch.add(y0, y1)
        result = self.final_layer(y2) * x0
        return result

__init__(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())

An attention layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int or optional, default: 2 ) –
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=1,
    bias=False,
    activation=torch.nn.ReLU(),
):
    """
    An attention layer class.


    Parameters
    ----------
    input_channels  : int or optioal
                      Number of input channels.
    output_channels : int or optional
                      Number of middle channels.
    kernel_size     : int or optional
                      Kernel size.
    bias            : bool or optional
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn or optional
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution0 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.convolution1 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.final_layer = torch.nn.Sequential(
        self.activation,
        torch.nn.Conv2d(
            output_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
    )

forward(x0, x1)

Forward model.

Parameters:

  • x0
             First input data.
    
  • x1
             Seconnd input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x0, x1):
    """
    Forward model.

    Parameters
    ----------
    x0             : torch.tensor
                     First input data.

    x1             : torch.tensor
                     Seconnd input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y0 = self.convolution0(x0)
    y1 = self.convolution1(x1)
    y2 = torch.add(y0, y1)
    result = self.final_layer(y2) * x0
    return result

residual_layer

Bases: Module

A residual layer.

Source code in odak/learn/models/components.py
class residual_layer(torch.nn.Module):
    """
    A residual layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=16,
        kernel_size=3,
        bias=False,
        normalization=True,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of middle channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution = double_convolution(
            input_channels,
            mid_channels=mid_channels,
            output_channels=input_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        x0 = self.convolution(x)
        return x + x0

__init__(input_channels=2, mid_channels=16, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    mid_channels=16,
    kernel_size=3,
    bias=False,
    normalization=True,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of middle channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution = double_convolution(
        input_channels,
        mid_channels=mid_channels,
        output_channels=input_channels,
        kernel_size=kernel_size,
        normalization=normalization,
        bias=bias,
        activation=activation,
    )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    x0 = self.convolution(x)
    return x + x0

spatial_gate

Bases: Module

Spatial attention module that applies a convolution layer after channel pooling. This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.

Source code in odak/learn/models/components.py
class spatial_gate(torch.nn.Module):
    """
    Spatial attention module that applies a convolution layer after channel pooling.
    This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.
    """

    def __init__(self):
        """
        Initializes the spatial gate module.
        """
        super().__init__()
        kernel_size = 7
        self.spatial = convolution_layer(
            2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
        )

    def channel_pool(self, x):
        """
        Applies max and average pooling on the channels.

        Parameters
        ----------
        x             : torch.tensor
                        Input tensor.

        Returns
        -------
        output        : torch.tensor
                        Output tensor.
        """
        max_pool = torch.max(x, 1)[0].unsqueeze(1)
        avg_pool = torch.mean(x, 1).unsqueeze(1)
        output = torch.cat((max_pool, avg_pool), dim=1)
        return output

    def forward(self, x):
        """
        Forward pass of the SpatialGate module.

        Applies spatial attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the SpatialGate module.

        Returns
        -------
        scaled_x     : torch.tensor
                       Output tensor after applying spatial attention.
        """
        x_compress = self.channel_pool(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out)
        scaled_x = x * scale
        return scaled_x

__init__()

Initializes the spatial gate module.

Source code in odak/learn/models/components.py
def __init__(self):
    """
    Initializes the spatial gate module.
    """
    super().__init__()
    kernel_size = 7
    self.spatial = convolution_layer(
        2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
    )

channel_pool(x)

Applies max and average pooling on the channels.

Parameters:

  • x
            Input tensor.
    

Returns:

  • output ( tensor ) –

    Output tensor.

Source code in odak/learn/models/components.py
def channel_pool(self, x):
    """
    Applies max and average pooling on the channels.

    Parameters
    ----------
    x             : torch.tensor
                    Input tensor.

    Returns
    -------
    output        : torch.tensor
                    Output tensor.
    """
    max_pool = torch.max(x, 1)[0].unsqueeze(1)
    avg_pool = torch.mean(x, 1).unsqueeze(1)
    output = torch.cat((max_pool, avg_pool), dim=1)
    return output

forward(x)

Forward pass of the SpatialGate module.

Applies spatial attention to the input tensor.

Parameters:

  • x
           Input tensor to the SpatialGate module.
    

Returns:

  • scaled_x ( tensor ) –

    Output tensor after applying spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the SpatialGate module.

    Applies spatial attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the SpatialGate module.

    Returns
    -------
    scaled_x     : torch.tensor
                   Output tensor after applying spatial attention.
    """
    x_compress = self.channel_pool(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out)
    scaled_x = x * scale
    return scaled_x

spatially_adaptive_convolution

Bases: Module

A spatially adaptive convolution layer.

References

C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions." C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation." C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."

Source code in odak/learn/models/components.py
class spatially_adaptive_convolution(torch.nn.Module):
    """
    A spatially adaptive convolution layer.

    References
    ----------

    C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions."
    C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation."
    C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive convolution layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn.Module
                          Activation function to apply. If None, no activation is applied.
        """
        super(spatially_adaptive_convolution, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive convolution layer.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        sa_output          : torch.tensor
                            Estimated output tensor.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Resize weight to match the input channels and kernel size
        si_kernel = self.weight.reshape(
            self.output_channels,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply spatially varying kernels
        sv_feature = input_feature * sv_kernel

        # Perform matrix multiplication
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )
        return sa_output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive convolution layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Activation function to apply. If None, no activation is applied.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive convolution layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn.Module
                      Activation function to apply. If None, no activation is applied.
    """
    super(spatially_adaptive_convolution, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive convolution layer.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • sa_output ( tensor ) –

    Estimated output tensor. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive convolution layer.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    sa_output          : torch.tensor
                        Estimated output tensor.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Resize weight to match the input channels and kernel size
    si_kernel = self.weight.reshape(
        self.output_channels,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply spatially varying kernels
    sv_feature = input_feature * sv_kernel

    # Perform matrix multiplication
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )
    return sa_output

spatially_adaptive_module

Bases: Module

A spatially adaptive module that combines learned spatially adaptive convolutions.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/components.py
class spatially_adaptive_module(torch.nn.Module):
    """
    A spatially adaptive module that combines learned spatially adaptive convolutions.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive module.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super(spatially_adaptive_module, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.output_channels_for_weight = self.output_channels - 1
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels_for_weight,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive module.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        output             : torch.tensor
                            Combined output tensor from standard and spatially adaptive convolutions.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Apply sv_kernel to the input_feature
        sv_feature = input_feature * sv_kernel

        # Original spatially varying convolution output
        sv_output = torch.sum(sv_feature, dim=1).reshape(
            1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
        )

        # Reshape weight for spatially adaptive convolution
        si_kernel = self.weight.reshape(
            self.output_channels_for_weight,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply si_kernel on sv convolution output
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels_for_weight,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )

        # Combine the outputs and apply activation function
        output = self.activation(torch.cat((sv_output, sa_output), dim=1))
        return output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive module.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive module.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super(spatially_adaptive_module, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.output_channels_for_weight = self.output_channels - 1
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels_for_weight,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive module.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • output ( tensor ) –

    Combined output tensor from standard and spatially adaptive convolutions. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive module.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    output             : torch.tensor
                        Combined output tensor from standard and spatially adaptive convolutions.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Apply sv_kernel to the input_feature
    sv_feature = input_feature * sv_kernel

    # Original spatially varying convolution output
    sv_output = torch.sum(sv_feature, dim=1).reshape(
        1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
    )

    # Reshape weight for spatially adaptive convolution
    si_kernel = self.weight.reshape(
        self.output_channels_for_weight,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply si_kernel on sv convolution output
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels_for_weight,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )

    # Combine the outputs and apply activation function
    output = self.activation(torch.cat((sv_output, sa_output), dim=1))
    return output

spatially_adaptive_unet

Bases: Module

Spatially varying U-Net model based on spatially adaptive convolution.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/models.py
class spatially_adaptive_unet(torch.nn.Module):
    """
    Spatially varying U-Net model based on spatially adaptive convolution.

    References
    ----------
    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        depth=3,
        dimensions=8,
        input_channels=6,
        out_channels=6,
        kernel_size=3,
        bias=True,
        normalization=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initialize the spatially adaptive U-Net model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 3.
        dimensions : int, optional
            Number of dimensions. Default is 8.
        input_channels : int, optional
            Number of input channels. Default is 6.
        out_channels : int, optional
            Number of output channels. Default is 6.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set to True to let convolutional layers learn a bias term. Default is True.
        normalization : bool, optional
            If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
        """
        super().__init__()
        self.depth = depth
        self.out_channels = out_channels
        logger.info(
            f"Initializing spatially_adaptive_unet: "
            f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
            f"out_channels={out_channels}, kernel_size={kernel_size}, "
            f"bias={bias}, normalization={normalization}"
        )
        self.inc = convolution_layer(
            input_channels=input_channels,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )

        self.encoder = torch.nn.ModuleList()
        for i in range(self.depth + 1):  # Downsampling layers
            down_in_channels = dimensions * (2**i)
            down_out_channels = 2 * down_in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                input_channels=down_in_channels,
                mid_channels=down_in_channels,
                output_channels=down_in_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            sam = spatially_adaptive_module(
                input_channels=down_in_channels,
                output_channels=down_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
            )
            self.encoder.append(
                torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam])
            )
            logger.debug(f"Added encoder block {i}: {down_in_channels} -> {down_out_channels}")
        self.global_feature_module = torch.nn.ModuleList()
        double_convolution_layer = double_convolution(
            input_channels=dimensions * (2 ** (depth + 1)),
            mid_channels=dimensions * (2 ** (depth + 1)),
            output_channels=dimensions * (2 ** (depth + 1)),
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        global_feature_layer = global_feature_module(
            input_channels=dimensions * (2 ** (depth + 1)),
            mid_channels=dimensions * (2 ** (depth + 1)),
            output_channels=dimensions * (2 ** (depth + 1)),
            kernel_size=kernel_size,
            bias=bias,
            activation=torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_module.append(
            torch.nn.ModuleList([double_convolution_layer, global_feature_layer])
        )
        logger.debug("Added global feature module")

        self.decoder = torch.nn.ModuleList()
        for i in range(depth, -1, -1):
            up_in_channels = dimensions * (2 ** (i + 1))
            up_mid_channels = up_in_channels // 2
            if i == 0:
                up_out_channels = self.out_channels
                upsample_layer = upsample_convtranspose2d_layer(
                    input_channels=up_in_channels,
                    output_channels=up_mid_channels,
                    kernel_size=2,
                    stride=2,
                    bias=bias,
                )
                conv_layer = torch.nn.Sequential(
                    convolution_layer(
                        input_channels=up_mid_channels,
                        output_channels=up_mid_channels,
                        kernel_size=kernel_size,
                        bias=bias,
                        normalization=normalization,
                        activation=activation,
                    ),
                    convolution_layer(
                        input_channels=up_mid_channels,
                        output_channels=up_out_channels,
                        kernel_size=1,
                        bias=bias,
                        normalization=normalization,
                        activation=None,
                    ),
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
                logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
            else:
                up_out_channels = up_in_channels // 2
                upsample_layer = upsample_convtranspose2d_layer(
                    input_channels=up_in_channels,
                    output_channels=up_mid_channels,
                    kernel_size=2,
                    stride=2,
                    bias=bias,
                )
                conv_layer = double_convolution(
                    input_channels=up_mid_channels,
                    mid_channels=up_mid_channels,
                    output_channels=up_out_channels,
                    kernel_size=kernel_size,
                    bias=bias,
                    normalization=normalization,
                    activation=activation,
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
                logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        logger.info("spatially_adaptive_unet initialization completed")

    def forward(self, sv_kernel, field):
        """
        Forward pass of the spatially adaptive U-Net.

        Parameters
        ----------
        sv_kernel : list of torch.Tensor
            Learned spatially varying kernels.
            Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
            where C_i, H_i, and W_i represent the channel, height, and width
            of each feature at a certain scale.

        field : torch.Tensor
            Input field data.
            Dimension: (1, 6, H, W)

        Returns
        -------
        target_field : torch.Tensor
            Estimated output.
            Dimension: (1, 6, H, W)
        """
        x = self.inc(field)
        downsampling_outputs = [x]
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer[0](downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
            sam_output = down_layer[2](
                x_down + down_layer[1](x_down), sv_kernel[self.depth - i]
            )
            downsampling_outputs.append(sam_output)
        global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
        global_feature = self.global_feature_module[0][1](
            downsampling_outputs[-1], global_feature
        )
        downsampling_outputs.append(global_feature)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate(self.decoder):
            x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
            x_up = up_layer[1](x_up)
        result = x_up
        return result

__init__(depth=3, dimensions=8, input_channels=6, out_channels=6, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initialize the spatially adaptive U-Net model.

Parameters:

  • depth (int, default: 3 ) –

    Number of upsampling and downsampling layers. Default is 3.

  • dimensions (int, default: 8 ) –

    Number of dimensions. Default is 8.

  • input_channels (int, default: 6 ) –

    Number of input channels. Default is 6.

  • out_channels (int, default: 6 ) –

    Number of output channels. Default is 6.

  • kernel_size (int, default: 3 ) –

    Kernel size for convolutional layers. Default is 3.

  • bias (bool, default: True ) –

    Set to True to let convolutional layers learn a bias term. Default is True.

  • normalization (bool, default: False ) –

    If True, adds a Batch Normalization layer after the convolutional layer. Default is False.

  • activation (Module, default: LeakyReLU(0.2, inplace=True) ) –

    Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).

Source code in odak/learn/models/models.py
def __init__(
    self,
    depth=3,
    dimensions=8,
    input_channels=6,
    out_channels=6,
    kernel_size=3,
    bias=True,
    normalization=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initialize the spatially adaptive U-Net model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 3.
    dimensions : int, optional
        Number of dimensions. Default is 8.
    input_channels : int, optional
        Number of input channels. Default is 6.
    out_channels : int, optional
        Number of output channels. Default is 6.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set to True to let convolutional layers learn a bias term. Default is True.
    normalization : bool, optional
        If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
    """
    super().__init__()
    self.depth = depth
    self.out_channels = out_channels
    logger.info(
        f"Initializing spatially_adaptive_unet: "
        f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
        f"out_channels={out_channels}, kernel_size={kernel_size}, "
        f"bias={bias}, normalization={normalization}"
    )
    self.inc = convolution_layer(
        input_channels=input_channels,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )

    self.encoder = torch.nn.ModuleList()
    for i in range(self.depth + 1):  # Downsampling layers
        down_in_channels = dimensions * (2**i)
        down_out_channels = 2 * down_in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
            input_channels=down_in_channels,
            mid_channels=down_in_channels,
            output_channels=down_in_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        sam = spatially_adaptive_module(
            input_channels=down_in_channels,
            output_channels=down_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )
        self.encoder.append(
            torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam])
        )
        logger.debug(f"Added encoder block {i}: {down_in_channels} -> {down_out_channels}")
    self.global_feature_module = torch.nn.ModuleList()
    double_convolution_layer = double_convolution(
        input_channels=dimensions * (2 ** (depth + 1)),
        mid_channels=dimensions * (2 ** (depth + 1)),
        output_channels=dimensions * (2 ** (depth + 1)),
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    global_feature_layer = global_feature_module(
        input_channels=dimensions * (2 ** (depth + 1)),
        mid_channels=dimensions * (2 ** (depth + 1)),
        output_channels=dimensions * (2 ** (depth + 1)),
        kernel_size=kernel_size,
        bias=bias,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_module.append(
        torch.nn.ModuleList([double_convolution_layer, global_feature_layer])
    )
    logger.debug("Added global feature module")

    self.decoder = torch.nn.ModuleList()
    for i in range(depth, -1, -1):
        up_in_channels = dimensions * (2 ** (i + 1))
        up_mid_channels = up_in_channels // 2
        if i == 0:
            up_out_channels = self.out_channels
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = torch.nn.Sequential(
                convolution_layer(
                    input_channels=up_mid_channels,
                    output_channels=up_mid_channels,
                    kernel_size=kernel_size,
                    bias=bias,
                    normalization=normalization,
                    activation=activation,
                ),
                convolution_layer(
                    input_channels=up_mid_channels,
                    output_channels=up_out_channels,
                    kernel_size=1,
                    bias=bias,
                    normalization=normalization,
                    activation=None,
                ),
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        else:
            up_out_channels = up_in_channels // 2
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = double_convolution(
                input_channels=up_mid_channels,
                mid_channels=up_mid_channels,
                output_channels=up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
    logger.info("spatially_adaptive_unet initialization completed")

forward(sv_kernel, field)

Forward pass of the spatially adaptive U-Net.

Parameters:

  • sv_kernel (list of torch.Tensor) –

    Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.

  • field (Tensor) –

    Input field data. Dimension: (1, 6, H, W)

Returns:

  • target_field ( Tensor ) –

    Estimated output. Dimension: (1, 6, H, W)

Source code in odak/learn/models/models.py
def forward(self, sv_kernel, field):
    """
    Forward pass of the spatially adaptive U-Net.

    Parameters
    ----------
    sv_kernel : list of torch.Tensor
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.

    field : torch.Tensor
        Input field data.
        Dimension: (1, 6, H, W)

    Returns
    -------
    target_field : torch.Tensor
        Estimated output.
        Dimension: (1, 6, H, W)
    """
    x = self.inc(field)
    downsampling_outputs = [x]
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer[0](downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
        sam_output = down_layer[2](
            x_down + down_layer[1](x_down), sv_kernel[self.depth - i]
        )
        downsampling_outputs.append(sam_output)
    global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
    global_feature = self.global_feature_module[0][1](
        downsampling_outputs[-1], global_feature
    )
    downsampling_outputs.append(global_feature)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate(self.decoder):
        x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
        x_up = up_layer[1](x_up)
    result = x_up
    return result

spatially_varying_kernel_generation_model

Bases: Module

Spatially_varying_kernel_generation_model revised from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Refer to: J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.

Source code in odak/learn/models/models.py
class spatially_varying_kernel_generation_model(torch.nn.Module):
    """
    Spatially_varying_kernel_generation_model revised from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Refer to:
    J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.
    """

    def __init__(
        self,
        depth=3,
        dimensions=8,
        input_channels=7,
        kernel_size=3,
        bias=True,
        normalization=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initialize the spatially varying kernel generation model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 3.
        dimensions : int, optional
            Number of dimensions. Default is 8.
        input_channels : int, optional
            Number of input channels. Default is 7.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set to True to let convolutional layers learn a bias term. Default is True.
        normalization : bool, optional
            If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
        """
        super().__init__()
        self.depth = depth
        logger.info(
            f"Initializing spatially_varying_kernel_generation_model: "
            f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
            f"kernel_size={kernel_size}, bias={bias}, normalization={normalization}"
        )
        self.inc = convolution_layer(
            input_channels=input_channels,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )

        self.encoder = torch.nn.ModuleList()
        for i in range(depth + 1):  # downsampling layers
            if i == 0:
                in_channels = dimensions * (2**i)
                out_channels = dimensions * (2**i)
            elif i == depth:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = dimensions * (2 ** (i - 1))
            else:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = 2 * in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                input_channels=in_channels,
                mid_channels=in_channels,
                output_channels=out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.encoder.append(pooling_layer)
            self.encoder.append(double_convolution_layer)
            logger.debug(f"Added encoder block {i}: {in_channels} -> {out_channels}")

        self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
        for i in range(depth, -1, -1):
            if i == 1:
                svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
            else:
                svf_in_channels = 2 ** (self.depth + i) + 1
            svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
            svf_mid_channels = dimensions * (2 ** (self.depth - 1))
            spatially_varying_kernel_generation = torch.nn.ModuleList()
            for j in range(i, -1, -1):
                pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
                spatially_varying_kernel_generation.append(pooling_layer)
            kernel_generation_block = torch.nn.Sequential(
                torch.nn.Conv2d(
                    in_channels=svf_in_channels,
                    out_channels=svf_mid_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
                activation,
                torch.nn.Conv2d(
                    in_channels=svf_mid_channels,
                    out_channels=svf_mid_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
                activation,
                torch.nn.Conv2d(
                    in_channels=svf_mid_channels,
                    out_channels=svf_out_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
            )
            spatially_varying_kernel_generation.append(kernel_generation_block)
            self.spatially_varying_feature.append(spatially_varying_kernel_generation)
            logger.debug(f"Added SVF block {i}: {svf_in_channels} -> {svf_out_channels}")

        self.decoder = torch.nn.ModuleList()
        global_feature_layer = global_feature_module(  # global feature layer
            input_channels=dimensions * (2 ** (depth - 1)),
            mid_channels=dimensions * (2 ** (depth - 1)),
            output_channels=dimensions * (2 ** (depth - 1)),
            kernel_size=kernel_size,
            bias=bias,
            activation=torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.decoder.append(global_feature_layer)
        for i in range(depth, 0, -1):
            if i == 2:
                up_in_channels = (dimensions // 2) * (2**i)
                up_out_channels = up_in_channels
                up_mid_channels = up_in_channels
            elif i == 1:
                up_in_channels = dimensions * 2
                up_out_channels = dimensions
                up_mid_channels = up_out_channels
            else:
                up_in_channels = (dimensions // 2) * (2**i)
                up_out_channels = up_in_channels // 2
                up_mid_channels = up_in_channels
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = double_convolution(
                input_channels=up_mid_channels,
                output_channels=up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        logger.info("spatially_varying_kernel_generation_model initialization completed")

    def forward(self, focal_surface, field):
        """
        Forward pass of the spatially varying kernel generation model.

        Parameters
        ----------
        focal_surface : torch.Tensor
            Input focal surface data.
            Dimension: (1, 1, H, W)

        field : torch.Tensor
            Input field data.
            Dimension: (1, 6, H, W)

        Returns
        -------
        sv_kernel : list of torch.Tensor
            Learned spatially varying kernels.
            Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
            where C_i, H_i, and W_i represent the channel, height, and width
            of each feature at a certain scale.
        """
        x = self.inc(torch.cat((focal_surface, field), dim=1))
        downsampling_outputs = [focal_surface]
        downsampling_outputs.append(x)
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        sv_kernels = []
        for i, (up_layer, svf_layer) in enumerate(
            zip(self.decoder, self.spatially_varying_feature)
        ):
            if i == 0:
                global_feature = up_layer(
                    downsampling_outputs[-2], downsampling_outputs[-1]
                )
                downsampling_outputs[-1] = global_feature
                sv_feature = [global_feature, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                sv_feature = [
                    sv_feature[0],
                    sv_feature[1],
                    sv_feature[4],
                    sv_feature[2],
                    sv_feature[3],
                ]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
                sv_kernels.append(sv_kernel)
            else:
                x_up = up_layer[0](
                    downsampling_outputs[-1],
                    downsampling_outputs[2 * (self.depth + 1 - i) + 1],
                )
                x_up = up_layer[1](x_up)
                downsampling_outputs[-1] = x_up
                sv_feature = [x_up, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                if i == 1:
                    sv_feature = [
                        sv_feature[0],
                        sv_feature[1],
                        sv_feature[3],
                        sv_feature[2],
                    ]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
                sv_kernels.append(sv_kernel)
        return sv_kernels

__init__(depth=3, dimensions=8, input_channels=7, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initialize the spatially varying kernel generation model.

Parameters:

  • depth (int, default: 3 ) –

    Number of upsampling and downsampling layers. Default is 3.

  • dimensions (int, default: 8 ) –

    Number of dimensions. Default is 8.

  • input_channels (int, default: 7 ) –

    Number of input channels. Default is 7.

  • kernel_size (int, default: 3 ) –

    Kernel size for convolutional layers. Default is 3.

  • bias (bool, default: True ) –

    Set to True to let convolutional layers learn a bias term. Default is True.

  • normalization (bool, default: False ) –

    If True, adds a Batch Normalization layer after the convolutional layer. Default is False.

  • activation (Module, default: LeakyReLU(0.2, inplace=True) ) –

    Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).

Source code in odak/learn/models/models.py
def __init__(
    self,
    depth=3,
    dimensions=8,
    input_channels=7,
    kernel_size=3,
    bias=True,
    normalization=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initialize the spatially varying kernel generation model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 3.
    dimensions : int, optional
        Number of dimensions. Default is 8.
    input_channels : int, optional
        Number of input channels. Default is 7.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set to True to let convolutional layers learn a bias term. Default is True.
    normalization : bool, optional
        If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
    """
    super().__init__()
    self.depth = depth
    logger.info(
        f"Initializing spatially_varying_kernel_generation_model: "
        f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
        f"kernel_size={kernel_size}, bias={bias}, normalization={normalization}"
    )
    self.inc = convolution_layer(
        input_channels=input_channels,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )

    self.encoder = torch.nn.ModuleList()
    for i in range(depth + 1):  # downsampling layers
        if i == 0:
            in_channels = dimensions * (2**i)
            out_channels = dimensions * (2**i)
        elif i == depth:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = dimensions * (2 ** (i - 1))
        else:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = 2 * in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
            input_channels=in_channels,
            mid_channels=in_channels,
            output_channels=out_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.encoder.append(pooling_layer)
        self.encoder.append(double_convolution_layer)
        logger.debug(f"Added encoder block {i}: {in_channels} -> {out_channels}")

    self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
    for i in range(depth, -1, -1):
        if i == 1:
            svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
        else:
            svf_in_channels = 2 ** (self.depth + i) + 1
        svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
        svf_mid_channels = dimensions * (2 ** (self.depth - 1))
        spatially_varying_kernel_generation = torch.nn.ModuleList()
        for j in range(i, -1, -1):
            pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
            spatially_varying_kernel_generation.append(pooling_layer)
        kernel_generation_block = torch.nn.Sequential(
            torch.nn.Conv2d(
                in_channels=svf_in_channels,
                out_channels=svf_mid_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            activation,
            torch.nn.Conv2d(
                in_channels=svf_mid_channels,
                out_channels=svf_mid_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            activation,
            torch.nn.Conv2d(
                in_channels=svf_mid_channels,
                out_channels=svf_out_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )
        spatially_varying_kernel_generation.append(kernel_generation_block)
        self.spatially_varying_feature.append(spatially_varying_kernel_generation)
        logger.debug(f"Added SVF block {i}: {svf_in_channels} -> {svf_out_channels}")

    self.decoder = torch.nn.ModuleList()
    global_feature_layer = global_feature_module(  # global feature layer
        input_channels=dimensions * (2 ** (depth - 1)),
        mid_channels=dimensions * (2 ** (depth - 1)),
        output_channels=dimensions * (2 ** (depth - 1)),
        kernel_size=kernel_size,
        bias=bias,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.decoder.append(global_feature_layer)
    for i in range(depth, 0, -1):
        if i == 2:
            up_in_channels = (dimensions // 2) * (2**i)
            up_out_channels = up_in_channels
            up_mid_channels = up_in_channels
        elif i == 1:
            up_in_channels = dimensions * 2
            up_out_channels = dimensions
            up_mid_channels = up_out_channels
        else:
            up_in_channels = (dimensions // 2) * (2**i)
            up_out_channels = up_in_channels // 2
            up_mid_channels = up_in_channels
        upsample_layer = upsample_convtranspose2d_layer(
            input_channels=up_in_channels,
            output_channels=up_mid_channels,
            kernel_size=2,
            stride=2,
            bias=bias,
        )
        conv_layer = double_convolution(
            input_channels=up_mid_channels,
            output_channels=up_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
        logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
    logger.info("spatially_varying_kernel_generation_model initialization completed")

forward(focal_surface, field)

Forward pass of the spatially varying kernel generation model.

Parameters:

  • focal_surface (Tensor) –

    Input focal surface data. Dimension: (1, 1, H, W)

  • field (Tensor) –

    Input field data. Dimension: (1, 6, H, W)

Returns:

  • sv_kernel ( list of torch.Tensor ) –

    Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.

Source code in odak/learn/models/models.py
def forward(self, focal_surface, field):
    """
    Forward pass of the spatially varying kernel generation model.

    Parameters
    ----------
    focal_surface : torch.Tensor
        Input focal surface data.
        Dimension: (1, 1, H, W)

    field : torch.Tensor
        Input field data.
        Dimension: (1, 6, H, W)

    Returns
    -------
    sv_kernel : list of torch.Tensor
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.
    """
    x = self.inc(torch.cat((focal_surface, field), dim=1))
    downsampling_outputs = [focal_surface]
    downsampling_outputs.append(x)
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    sv_kernels = []
    for i, (up_layer, svf_layer) in enumerate(
        zip(self.decoder, self.spatially_varying_feature)
    ):
        if i == 0:
            global_feature = up_layer(
                downsampling_outputs[-2], downsampling_outputs[-1]
            )
            downsampling_outputs[-1] = global_feature
            sv_feature = [global_feature, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            sv_feature = [
                sv_feature[0],
                sv_feature[1],
                sv_feature[4],
                sv_feature[2],
                sv_feature[3],
            ]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
            sv_kernels.append(sv_kernel)
        else:
            x_up = up_layer[0](
                downsampling_outputs[-1],
                downsampling_outputs[2 * (self.depth + 1 - i) + 1],
            )
            x_up = up_layer[1](x_up)
            downsampling_outputs[-1] = x_up
            sv_feature = [x_up, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            if i == 1:
                sv_feature = [
                    sv_feature[0],
                    sv_feature[1],
                    sv_feature[3],
                    sv_feature[2],
                ]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
            sv_kernels.append(sv_kernel)
    return sv_kernels

unet

Bases: Module

A U-Net model, heavily inspired from https://github.com/milesial/Pytorch-UNet/tree/master/unet and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.

Source code in odak/learn/models/models.py
class unet(torch.nn.Module):
    """
    A U-Net model, heavily inspired from `https://github.com/milesial/Pytorch-UNet/tree/master/unet` and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.
    """

    def __init__(
        self,
        depth=4,
        dimensions=64,
        input_channels=2,
        output_channels=1,
        bilinear=False,
        kernel_size=3,
        bias=False,
        activation=torch.nn.ReLU(inplace=True),
    ):
        """
        Initialize the U-Net model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 4.
        dimensions : int, optional
            Number of dimensions. Default is 64.
        input_channels : int, optional
            Number of input channels. Default is 2.
        output_channels : int, optional
            Number of output channels. Default is 1.
        bilinear : bool, optional
            Uses bilinear upsampling in upsampling layers when set True. Default is False.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set True to let convolutional layers learn a bias term. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).
        """
        super(unet, self).__init__()
        logger.info(
            f"Initializing U-Net: depth={depth}, dimensions={dimensions}, "
            f"input_channels={input_channels}, output_channels={output_channels}, "
            f"bilinear={bilinear}, kernel_size={kernel_size}"
        )
        self.inc = double_convolution(
            input_channels=input_channels,
            mid_channels=dimensions,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )

        self.downsampling_layers = torch.nn.ModuleList()
        self.upsampling_layers = torch.nn.ModuleList()
        for i in range(depth):  # downsampling layers
            in_channels = dimensions * (2**i)
            out_channels = dimensions * (2 ** (i + 1))
            down_layer = downsample_layer(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
            )
            self.downsampling_layers.append(down_layer)
            logger.debug(f"Added downsampling layer {i}: {in_channels} -> {out_channels}")

        for i in range(depth - 1, -1, -1):  # upsampling layers
            up_in_channels = dimensions * (2 ** (i + 1))
            up_out_channels = dimensions * (2**i)
            up_layer = upsample_layer(
                up_in_channels,
                up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
                bilinear=bilinear,
            )
            self.upsampling_layers.append(up_layer)
            logger.debug(f"Added upsampling layer: {up_in_channels} -> {up_out_channels}")
        self.outc = torch.nn.Conv2d(
            dimensions,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        logger.info("U-Net initialization completed")

    def forward(self, x):
        """
        Forward pass of the U-Net.

        Parameters
        ----------
        x : torch.Tensor
            Input data.

        Returns
        -------
        result : torch.Tensor
            Estimated output.
        """
        downsampling_outputs = [self.inc(x)]
        for down_layer in self.downsampling_layers:
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate((self.upsampling_layers)):
            x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])
        result = self.outc(x_up)
        return result

__init__(depth=4, dimensions=64, input_channels=2, output_channels=1, bilinear=False, kernel_size=3, bias=False, activation=torch.nn.ReLU(inplace=True))

Initialize the U-Net model.

Parameters:

  • depth (int, default: 4 ) –

    Number of upsampling and downsampling layers. Default is 4.

  • dimensions (int, default: 64 ) –

    Number of dimensions. Default is 64.

  • input_channels (int, default: 2 ) –

    Number of input channels. Default is 2.

  • output_channels (int, default: 1 ) –

    Number of output channels. Default is 1.

  • bilinear (bool, default: False ) –

    Uses bilinear upsampling in upsampling layers when set True. Default is False.

  • kernel_size (int, default: 3 ) –

    Kernel size for convolutional layers. Default is 3.

  • bias (bool, default: False ) –

    Set True to let convolutional layers learn a bias term. Default is False.

  • activation (Module, default: ReLU(inplace=True) ) –

    Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).

Source code in odak/learn/models/models.py
def __init__(
    self,
    depth=4,
    dimensions=64,
    input_channels=2,
    output_channels=1,
    bilinear=False,
    kernel_size=3,
    bias=False,
    activation=torch.nn.ReLU(inplace=True),
):
    """
    Initialize the U-Net model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 4.
    dimensions : int, optional
        Number of dimensions. Default is 64.
    input_channels : int, optional
        Number of input channels. Default is 2.
    output_channels : int, optional
        Number of output channels. Default is 1.
    bilinear : bool, optional
        Uses bilinear upsampling in upsampling layers when set True. Default is False.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set True to let convolutional layers learn a bias term. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).
    """
    super(unet, self).__init__()
    logger.info(
        f"Initializing U-Net: depth={depth}, dimensions={dimensions}, "
        f"input_channels={input_channels}, output_channels={output_channels}, "
        f"bilinear={bilinear}, kernel_size={kernel_size}"
    )
    self.inc = double_convolution(
        input_channels=input_channels,
        mid_channels=dimensions,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        activation=activation,
    )

    self.downsampling_layers = torch.nn.ModuleList()
    self.upsampling_layers = torch.nn.ModuleList()
    for i in range(depth):  # downsampling layers
        in_channels = dimensions * (2**i)
        out_channels = dimensions * (2 ** (i + 1))
        down_layer = downsample_layer(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )
        self.downsampling_layers.append(down_layer)
        logger.debug(f"Added downsampling layer {i}: {in_channels} -> {out_channels}")

    for i in range(depth - 1, -1, -1):  # upsampling layers
        up_in_channels = dimensions * (2 ** (i + 1))
        up_out_channels = dimensions * (2**i)
        up_layer = upsample_layer(
            up_in_channels,
            up_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
            bilinear=bilinear,
        )
        self.upsampling_layers.append(up_layer)
        logger.debug(f"Added upsampling layer: {up_in_channels} -> {up_out_channels}")
    self.outc = torch.nn.Conv2d(
        dimensions,
        output_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    logger.info("U-Net initialization completed")

forward(x)

Forward pass of the U-Net.

Parameters:

  • x (Tensor) –

    Input data.

Returns:

  • result ( Tensor ) –

    Estimated output.

Source code in odak/learn/models/models.py
def forward(self, x):
    """
    Forward pass of the U-Net.

    Parameters
    ----------
    x : torch.Tensor
        Input data.

    Returns
    -------
    result : torch.Tensor
        Estimated output.
    """
    downsampling_outputs = [self.inc(x)]
    for down_layer in self.downsampling_layers:
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate((self.upsampling_layers)):
        x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])
    result = self.outc(x_up)
    return result

upsample_convtranspose2d_layer

Bases: Module

An upsampling convtranspose2d layer.

Source code in odak/learn/models/components.py
class upsample_convtranspose2d_layer(torch.nn.Module):
    """
    An upsampling convtranspose2d layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=2,
        stride=2,
        bias=False,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        """
        super().__init__()
        self.up = torch.nn.ConvTranspose2d(
            in_channels=input_channels,
            out_channels=output_channels,
            bias=bias,
            kernel_size=kernel_size,
            stride=stride,
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        result = x1 + x2
        return result

__init__(input_channels, output_channels, kernel_size=2, stride=2, bias=False)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=2,
    stride=2,
    bias=False,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    """
    super().__init__()
    self.up = torch.nn.ConvTranspose2d(
        in_channels=input_channels,
        out_channels=output_channels,
        bias=bias,
        kernel_size=kernel_size,
        stride=stride,
    )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    result = x1 + x2
    return result

upsample_layer

Bases: Module

An upsampling convolutional layer.

Source code in odak/learn/models/components.py
class upsample_layer(torch.nn.Module):
    """
    An upsampling convolutional layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
        bilinear=True,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        bilinear        : bool
                          If set to True, bilinear sampling is used.
        """
        super(upsample_layer, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(
                scale_factor=2, mode="bilinear", align_corners=True
            )
            self.conv = double_convolution(
                input_channels=input_channels + output_channels,
                mid_channels=input_channels // 2,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )
        else:
            self.up = torch.nn.ConvTranspose2d(
                input_channels, input_channels // 2, kernel_size=2, stride=2
            )
            self.conv = double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        x = torch.cat([x2, x1], dim=1)
        result = self.conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU(), bilinear=True)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
  • bilinear
              If set to True, bilinear sampling is used.
    
Source code in odak/learn/models/components.py
def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
    bilinear=True,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    bilinear        : bool
                      If set to True, bilinear sampling is used.
    """
    super(upsample_layer, self).__init__()
    if bilinear:
        self.up = torch.nn.Upsample(
            scale_factor=2, mode="bilinear", align_corners=True
        )
        self.conv = double_convolution(
            input_channels=input_channels + output_channels,
            mid_channels=input_channels // 2,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )
    else:
        self.up = torch.nn.ConvTranspose2d(
            input_channels, input_channels // 2, kernel_size=2, stride=2
        )
        self.conv = double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    x = torch.cat([x2, x1], dim=1)
    result = self.conv(x)
    return result

gaussian(x, multiplier=1.0)

A Gaussian non-linear activation. For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

Parameters:

  • x
           Input data.
    
  • multiplier
           Multiplier.
    

Returns:

  • result ( float or tensor ) –

    Ouput data.

Source code in odak/learn/models/components.py
def gaussian(x, multiplier=1.0):
    """
    A Gaussian non-linear activation.
    For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

    Parameters
    ----------
    x            : float or torch.tensor
                   Input data.
    multiplier   : float or torch.tensor
                   Multiplier.

    Returns
    -------
    result       : float or torch.tensor
                   Ouput data.
    """
    result = torch.exp(-((multiplier * x) ** 2))
    return result

swish(x)

A swish non-linear activation. For more details: https://en.wikipedia.org/wiki/Swish_function

Parameters:

  • x
             Input.
    

Returns:

  • out ( float or tensor ) –

    Output.

Source code in odak/learn/models/components.py
def swish(x):
    """
    A swish non-linear activation.
    For more details: https://en.wikipedia.org/wiki/Swish_function

    Parameters
    -----------
    x              : float or torch.tensor
                     Input.

    Returns
    -------
    out            : float or torch.tensor
                     Output.
    """
    out = x * torch.sigmoid(x)
    return out