odak.learn.models

odak.learn.models

Provides necessary definitions for components used in machine learning and deep learning.

`channel_gate` ¶

Bases: Module

Channel attention module with various pooling strategies. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py

class channel_gate(torch.nn.Module):
    """
    Channel attention module with various pooling strategies.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
        """
        Initializes the channel gate module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the intermediate layer.
        pool_types      : list
                          List of pooling operations to apply.
        """
        super().__init__()
        self.gate_channels = gate_channels
        hidden_channels = gate_channels // reduction_ratio
        if hidden_channels == 0:
            hidden_channels = 1
        self.mlp = torch.nn.Sequential(
            convolutional_block_attention.Flatten(),
            torch.nn.Linear(gate_channels, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_channels, gate_channels),
        )
        self.pool_types = pool_types

    def forward(self, x):
        """
        Forward pass of the ChannelGate module.

        Applies channel-wise attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the ChannelGate module.

        Returns
        -------
        output       : torch.tensor
                       Output tensor after applying channel attention.
        """
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == "avg":
                pool = torch.nn.functional.avg_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            elif pool_type == "max":
                pool = torch.nn.functional.max_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            channel_att_raw = self.mlp(pool)
            channel_att_sum = (
                channel_att_raw
                if channel_att_sum is None
                else channel_att_sum + channel_att_raw
            )
        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
        output = x * scale
        return output

`init(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])` ¶

Initializes the channel gate module.

Parameters:

gate_channels –

          Number of channels of the input feature map.

reduction_ratio (int, default: 16 ) –

          Reduction ratio for the intermediate layer.

pool_types –

          List of pooling operations to apply.

Source code in odak/learn/models/components.py

def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
    """
    Initializes the channel gate module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the intermediate layer.
    pool_types      : list
                      List of pooling operations to apply.
    """
    super().__init__()
    self.gate_channels = gate_channels
    hidden_channels = gate_channels // reduction_ratio
    if hidden_channels == 0:
        hidden_channels = 1
    self.mlp = torch.nn.Sequential(
        convolutional_block_attention.Flatten(),
        torch.nn.Linear(gate_channels, hidden_channels),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_channels, gate_channels),
    )
    self.pool_types = pool_types

`forward(x)` ¶

Forward pass of the ChannelGate module.

Applies channel-wise attention to the input tensor.

Parameters:

x –

       Input tensor to the ChannelGate module.

Returns:

output ( tensor ) –

Output tensor after applying channel attention.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward pass of the ChannelGate module.

    Applies channel-wise attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the ChannelGate module.

    Returns
    -------
    output       : torch.tensor
                   Output tensor after applying channel attention.
    """
    channel_att_sum = None
    for pool_type in self.pool_types:
        if pool_type == "avg":
            pool = torch.nn.functional.avg_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        elif pool_type == "max":
            pool = torch.nn.functional.max_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        channel_att_raw = self.mlp(pool)
        channel_att_sum = (
            channel_att_raw
            if channel_att_sum is None
            else channel_att_sum + channel_att_raw
        )
    scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
    output = x * scale
    return output

`convolution_layer` ¶

Bases: Module

A convolution layer.

Source code in odak/learn/models/components.py

class convolution_layer(torch.nn.Module):
    """
    A convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        bias=False,
        stride=1,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        layers = [
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=kernel_size // 2,
                bias=bias,
            )
        ]
        if normalization:
            layers.append(torch.nn.BatchNorm2d(output_channels))
        if activation:
            layers.append(activation)
        self.model = torch.nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

`init(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=False, activation=torch.nn.ReLU())` ¶

A convolutional layer class.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int, default: 2 ) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    bias=False,
    stride=1,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    layers = [
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=kernel_size // 2,
            bias=bias,
        )
    ]
    if normalization:
        layers.append(torch.nn.BatchNorm2d(output_channels))
    if activation:
        layers.append(activation)
    self.model = torch.nn.Sequential(*layers)

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

`convolutional_block_attention` ¶

Bases: Module

Convolutional Block Attention Module (CBAM) class. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py

class convolutional_block_attention(torch.nn.Module):
    """
    Convolutional Block Attention Module (CBAM) class.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(
        self,
        gate_channels,
        reduction_ratio=16,
        pool_types=["avg", "max"],
        no_spatial=False,
    ):
        """
        Initializes the convolutional block attention module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the channel attention.
        pool_types      : list
                          List of pooling operations to apply for channel attention.
        no_spatial      : bool
                          If True, spatial attention is not applied.
        """
        super(convolutional_block_attention, self).__init__()
        self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.spatial_gate = spatial_gate()

    class Flatten(torch.nn.Module):
        """
        Flattens the input tensor to a 2D matrix.
        """

        def forward(self, x):
            return x.view(x.size(0), -1)

    def forward(self, x):
        """
        Forward pass of the convolutional block attention module.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the CBAM module.

        Returns
        -------
        x_out        : torch.tensor
                       Output tensor after applying channel and spatial attention.
        """
        x_out = self.channel_gate(x)
        if not self.no_spatial:
            x_out = self.spatial_gate(x_out)
        return x_out

`Flatten` ¶

Bases: Module

Flattens the input tensor to a 2D matrix.

Source code in odak/learn/models/components.py

class Flatten(torch.nn.Module):
    """
    Flattens the input tensor to a 2D matrix.
    """

    def forward(self, x):
        return x.view(x.size(0), -1)

`init(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)` ¶

Initializes the convolutional block attention module.

Parameters:

gate_channels –

          Number of channels of the input feature map.

reduction_ratio (int, default: 16 ) –

          Reduction ratio for the channel attention.

pool_types –

          List of pooling operations to apply for channel attention.

no_spatial –

          If True, spatial attention is not applied.

Source code in odak/learn/models/components.py

def __init__(
    self,
    gate_channels,
    reduction_ratio=16,
    pool_types=["avg", "max"],
    no_spatial=False,
):
    """
    Initializes the convolutional block attention module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the channel attention.
    pool_types      : list
                      List of pooling operations to apply for channel attention.
    no_spatial      : bool
                      If True, spatial attention is not applied.
    """
    super(convolutional_block_attention, self).__init__()
    self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
    self.no_spatial = no_spatial
    if not no_spatial:
        self.spatial_gate = spatial_gate()

`forward(x)` ¶

Forward pass of the convolutional block attention module.

Parameters:

x –

       Input tensor to the CBAM module.

Returns:

x_out ( tensor ) –

Output tensor after applying channel and spatial attention.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward pass of the convolutional block attention module.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the CBAM module.

    Returns
    -------
    x_out        : torch.tensor
                   Output tensor after applying channel and spatial attention.
    """
    x_out = self.channel_gate(x)
    if not self.no_spatial:
        x_out = self.spatial_gate(x_out)
    return x_out

`double_convolution` ¶

Bases: Module

A double convolution layer.

Source code in odak/learn/models/components.py

class double_convolution(torch.nn.Module):
    """
    A double convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=None,
        output_channels=2,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        Double convolution model.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of channels in the hidden layer between two convolutions.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        if isinstance(mid_channels, type(None)):
            mid_channels = output_channels
        self.activation = activation
        self.model = torch.nn.Sequential(
            convolution_layer(
                input_channels=input_channels,
                output_channels=mid_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
            convolution_layer(
                input_channels=mid_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

`init(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

Double convolution model.

Parameters:

input_channels –
```
          Number of input channels.
```

mid_channels –

          Number of channels in the hidden layer between two convolutions.

output_channels (int, default: 2 ) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    mid_channels=None,
    output_channels=2,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    Double convolution model.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of channels in the hidden layer between two convolutions.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    if isinstance(mid_channels, type(None)):
        mid_channels = output_channels
    self.activation = activation
    self.model = torch.nn.Sequential(
        convolution_layer(
            input_channels=input_channels,
            output_channels=mid_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
        convolution_layer(
            input_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
    )

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

`downsample_layer` ¶

Bases: Module

A downscaling component followed by a double convolution.

Source code in odak/learn/models/components.py

class downsample_layer(torch.nn.Module):
    """
    A downscaling component followed by a double convolution.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.maxpool_conv = torch.nn.Sequential(
            torch.nn.MaxPool2d(2),
            double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x              : torch.tensor
                         First input data.



        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.maxpool_conv(x)
        return result

`init(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

A downscaling component with a double convolution.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.maxpool_conv = torch.nn.Sequential(
        torch.nn.MaxPool2d(2),
        double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        ),
    )

`forward(x)` ¶

Forward model.

Parameters:

x –
```
         First input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x              : torch.tensor
                     First input data.



    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.maxpool_conv(x)
    return result

`gaussian_2d` ¶

Bases: Module

2D Gaussian model for learning image representations using 2D Gaussian primitives.

This model represents an image as a weighted sum of 2D Gaussians, each defined by: - widths (std_x, std_y): Standard deviations along x and y axes - offsets (offset_x, offset_y): Center positions in normalized coordinates - rotations: Rotation angles for each Gaussian - alphas: Opacity/weight coefficients

Parameters:

number_of_elements (int, default: 10 ) –

            Number of 2D Gaussian elements to use. Default is 10.

Attributes:

widths ((Parameter, shape(2, 1, N))) –

Standard deviations for x and y dimensions.
offsets ((Parameter, shape(2, 1, N))) –

Center offsets in x and y directions.
rotations ((Parameter, shape(1, N))) –

Rotation angles in radians for each Gaussian.
alphas ((Parameter, shape(1, N))) –

Opacity/weight coefficients blended with tanh activation.

Examples:

>>> model = gaussian_2d(number_of_elements=50)
>>> x = torch.linspace(-1, 1, 256)
>>> y = torch.linspace(-1, 1, 256)
>>> X, Y = torch.meshgrid(x, y, indexing='ij')
>>> output = model(X, Y)

Notes

All parameters are initialized on CPU by default. For GPU acceleration, call .to(device) after initializing this model.
Input coordinates x and y should typically be normalized to [-1, 1].
Output is the sum of weighted Gaussians passed through tanh().

Source code in odak/learn/models/gaussians.py

class gaussian_2d(torch.nn.Module):
    """
    2D Gaussian model for learning image representations using 2D Gaussian primitives.

    This model represents an image as a weighted sum of 2D Gaussians, each defined by:
    - widths (std_x, std_y): Standard deviations along x and y axes
    - offsets (offset_x, offset_y): Center positions in normalized coordinates
    - rotations: Rotation angles for each Gaussian
    - alphas: Opacity/weight coefficients

    Parameters
    ----------
    number_of_elements : int, optional
                        Number of 2D Gaussian elements to use. Default is 10.

    Attributes
    ----------
    widths      : torch.nn.Parameter, shape (2, 1, N)
                  Standard deviations for x and y dimensions.
    offsets     : torch.nn.Parameter, shape (2, 1, N)
                  Center offsets in x and y directions.
    rotations   : torch.nn.Parameter, shape (1, N)
                  Rotation angles in radians for each Gaussian.
    alphas      : torch.nn.Parameter, shape (1, N)
                  Opacity/weight coefficients blended with tanh activation.

    Examples
    --------
    >>> model = gaussian_2d(number_of_elements=50)
    >>> x = torch.linspace(-1, 1, 256)
    >>> y = torch.linspace(-1, 1, 256)
    >>> X, Y = torch.meshgrid(x, y, indexing='ij')
    >>> output = model(X, Y)

    Notes
    -----
    - All parameters are initialized on CPU by default. For GPU acceleration,
      call .to(device) after initializing this model.
    - Input coordinates x and y should typically be normalized to [-1, 1].
    - Output is the sum of weighted Gaussians passed through tanh().
    """

    def __init__(self, number_of_elements=10):
        """
        Initialize the 2D Gaussian model.

        Parameters
        ----------
        number_of_elements : int
                            Number of Gaussian elements (default: 10).
        """
        super(gaussian_2d, self).__init__()

        if not isinstance(number_of_elements, int) or number_of_elements <= 0:
            raise ValueError(
                "number_of_elements must be a positive integer, got {}".format(
                    type(number_of_elements).__name__
                )
            )

        self.number_of_elements = number_of_elements

        # Initialize parameters as learnable tensors
        self.widths = torch.nn.Parameter(torch.rand(2, 1, self.number_of_elements))
        self.offsets = torch.nn.Parameter(
            torch.randn(2, 1, self.number_of_elements)
        )
        self.rotations = torch.nn.Parameter(torch.randn(1, self.number_of_elements))
        self.alphas = torch.nn.Parameter(torch.randn(1, self.number_of_elements))

        # Apply uniform initialization
        self.initialize_parameters_uniformly()

    def initialize_parameters_uniformly(self, ranges=None):
        """
        Initialize parameters using uniform-like distributions within specified ranges.

        This method re-samples the model parameters from normal distributions
        whose mean and standard deviation are derived from the provided ranges.
        For a range [a, b], it uses:
            mean = (a + b) / 2
            std  = (b - a) / 4

        Parameters
        ----------
        ranges : dict or None, optional
                Dictionary specifying custom initialization ranges. Keys can include:
                - 'widths': tuple of (min, max) for Gaussian widths
                - 'offsets': tuple of (min, max) for center offsets
                - 'rotations': tuple of (min, max) for rotation angles in radians
                - 'alphas': tuple of (min, max) for opacity values

                If None, default ranges are used:
                {
                    "widths": (0.1, 0.5),
                    "offsets": (-1.0, 1.0),
                    "rotations": (0.0, 2*pi),
                    "alphas": (0.1, 0.2)
                }

        Notes
        -----
        - Uses torch.no_grad() to avoid tracking gradients during initialization.
        - Parameters are initialized in-place using normal_() method.
        """
        with torch.no_grad():
            default_ranges = {
                "widths": (0.1, 0.5),
                "offsets": (-1.0, 1.0),
                "rotations": (0.0, 2 * torch.pi),
                "alphas": (0.1, 0.2),
            }

            if ranges is None:
                ranges = default_ranges

            # Initialize widths (std_x and std_y)
            if "widths" in ranges:
                self.widths.normal_(
                    mean=(ranges["widths"][0] + ranges["widths"][1]) / 2,
                    std=(ranges["widths"][1] - ranges["widths"][0]) / 4,
                )
            else:
                self.widths.normal_(mean=0.3, std=0.1)

            # Initialize offsets (offset_x and offset_y)
            if "offsets" in ranges:
                self.offsets.normal_(
                    mean=(ranges["offsets"][0] + ranges["offsets"][1]) / 2,
                    std=(ranges["offsets"][1] - ranges["offsets"][0]) / 4,
                )
            else:
                self.offsets.normal_(mean=0.0, std=0.5)

            # Initialize rotations
            if "rotations" in ranges:
                self.rotations.normal_(
                    mean=(ranges["rotations"][0] + ranges["rotations"][1]) / 2,
                    std=(ranges["rotations"][1] - ranges["rotations"][0]) / 4,
                )
            else:
                self.rotations.normal_(mean=torch.pi, std=torch.pi / 2)

            # Initialize alphas (opacity coefficients)
            if "alphas" in ranges:
                self.alphas.normal_(
                    mean=(ranges["alphas"][0] + ranges["alphas"][1]) / 2,
                    std=(ranges["alphas"][1] - ranges["alphas"][0]) / 4,
                )
            else:
                self.alphas.normal_(mean=0.15, std=0.05)

    def forward(self, x, y, residual=1e-6):
        """
        Forward pass: evaluate the 2D Gaussian model at given coordinates.

        Computes a weighted sum of 2D Gaussians evaluated at the input grid
        coordinates (x, y). Each Gaussian is rotated and translated according
        to its learned parameters.

        Parameters
        ----------
        x : torch.Tensor
            X-coordinates of the evaluation grid. Shape should broadcast with y.
        y : torch.Tensor
            Y-coordinates of the evaluation grid. Shape should broadcast with x.
        residual : float, optional
                   Small constant to avoid numerical issues (default: 1e-6).

        Returns
        -------
        results : torch.Tensor
                  The evaluated Gaussian field at input coordinates. The output
                  shape is determined by broadcasting x, y with the parameter shapes.
                  Values are passed through tanh() activation and multiplied by alphas.

        Notes
        -----
        - Coordinates are first rotated using learned rotation angles.
        - Then translated by learned offsets for each Gaussian.
        - The 2D Gaussian function is evaluated as exp(-(x^2 + y^2)) scaled by widths.
        - Final output: tanh(alphas * gaussians) summed over all elements.

    Notes
    -----
    - Supports multiple input shapes via PyTorch broadcasting
    - For grid inputs (H, W): automatically broadcasts to (H, W, N_elements)
    - For flattened inputs (N, 1): broadcasts directly with parameters
    """
        # PyTorch broadcasting handles shape alignment automatically
        # Input shapes: x, y can be (H, W), (H*W,), or (-1, 1)
        # Parameters are stored as (2, 1, N) for offsets/widths and (1, N) for rotations/alphas

        # Rotate coordinates according to each Gaussian's rotation angle
        cos_rot = torch.cos(self.rotations)  # Shape: (1, N)
        sin_rot = torch.sin(self.rotations)  # Shape: (1, N)

        # Broadcasting: x (*), y (*) automatically expand with cos_rot/sin_rot
        x_r = x * cos_rot - y * sin_rot
        y_r = x * sin_rot + y * cos_rot

        # Translate by learned offsets (broadcasts from (2, 1, N) to input shape × (N,))
        x_n = x_r + self.offsets[0]  # Shape: (..., N)
        y_n = y_r + self.offsets[1]

        # Evaluate 2D Gaussian function with learned widths (standard deviations)
        r = (x_n / self.widths[0]) ** 2 + (y_n / self.widths[1]) ** 2
        gaussians = torch.exp(-r)

        # Apply alpha weights and tanh activation
        results = self.alphas * gaussians
        results = torch.tanh(results)

        return results

`init(number_of_elements=10)` ¶

Initialize the 2D Gaussian model.

Parameters:

number_of_elements (int, default: 10 ) –

            Number of Gaussian elements (default: 10).

Source code in odak/learn/models/gaussians.py

def __init__(self, number_of_elements=10):
    """
    Initialize the 2D Gaussian model.

    Parameters
    ----------
    number_of_elements : int
                        Number of Gaussian elements (default: 10).
    """
    super(gaussian_2d, self).__init__()

    if not isinstance(number_of_elements, int) or number_of_elements <= 0:
        raise ValueError(
            "number_of_elements must be a positive integer, got {}".format(
                type(number_of_elements).__name__
            )
        )

    self.number_of_elements = number_of_elements

    # Initialize parameters as learnable tensors
    self.widths = torch.nn.Parameter(torch.rand(2, 1, self.number_of_elements))
    self.offsets = torch.nn.Parameter(
        torch.randn(2, 1, self.number_of_elements)
    )
    self.rotations = torch.nn.Parameter(torch.randn(1, self.number_of_elements))
    self.alphas = torch.nn.Parameter(torch.randn(1, self.number_of_elements))

    # Apply uniform initialization
    self.initialize_parameters_uniformly()

`forward(x, y, residual=1e-06)` ¶

Forward pass: evaluate the 2D Gaussian model at given coordinates.

Computes a weighted sum of 2D Gaussians evaluated at the input grid
coordinates (x, y). Each Gaussian is rotated and translated according
to its learned parameters.

Parameters

x : torch.Tensor
    X-coordinates of the evaluation grid. Shape should broadcast with y.
y : torch.Tensor
    Y-coordinates of the evaluation grid. Shape should broadcast with x.
residual : float, optional
           Small constant to avoid numerical issues (default: 1e-6).

Returns

results : torch.Tensor
          The evaluated Gaussian field at input coordinates. The output
          shape is determined by broadcasting x, y with the parameter shapes.
          Values are passed through tanh() activation and multiplied by alphas.

Notes

- Coordinates are first rotated using learned rotation angles.
- Then translated by learned offsets for each Gaussian.
- The 2D Gaussian function is evaluated as exp(-(x^2 + y^2)) scaled by widths.
- Final output: tanh(alphas * gaussians) summed over all elements.

Notes

Supports multiple input shapes via PyTorch broadcasting
For grid inputs (H, W): automatically broadcasts to (H, W, N_elements)
For flattened inputs (N, 1): broadcasts directly with parameters

Source code in odak/learn/models/gaussians.py

def forward(self, x, y, residual=1e-6):
    """
    Forward pass: evaluate the 2D Gaussian model at given coordinates.

    Computes a weighted sum of 2D Gaussians evaluated at the input grid
    coordinates (x, y). Each Gaussian is rotated and translated according
    to its learned parameters.

    Parameters
    ----------
    x : torch.Tensor
        X-coordinates of the evaluation grid. Shape should broadcast with y.
    y : torch.Tensor
        Y-coordinates of the evaluation grid. Shape should broadcast with x.
    residual : float, optional
               Small constant to avoid numerical issues (default: 1e-6).

    Returns
    -------
    results : torch.Tensor
              The evaluated Gaussian field at input coordinates. The output
              shape is determined by broadcasting x, y with the parameter shapes.
              Values are passed through tanh() activation and multiplied by alphas.

    Notes
    -----
    - Coordinates are first rotated using learned rotation angles.
    - Then translated by learned offsets for each Gaussian.
    - The 2D Gaussian function is evaluated as exp(-(x^2 + y^2)) scaled by widths.
    - Final output: tanh(alphas * gaussians) summed over all elements.

Notes
-----
- Supports multiple input shapes via PyTorch broadcasting
- For grid inputs (H, W): automatically broadcasts to (H, W, N_elements)
- For flattened inputs (N, 1): broadcasts directly with parameters
"""
    # PyTorch broadcasting handles shape alignment automatically
    # Input shapes: x, y can be (H, W), (H*W,), or (-1, 1)
    # Parameters are stored as (2, 1, N) for offsets/widths and (1, N) for rotations/alphas

    # Rotate coordinates according to each Gaussian's rotation angle
    cos_rot = torch.cos(self.rotations)  # Shape: (1, N)
    sin_rot = torch.sin(self.rotations)  # Shape: (1, N)

    # Broadcasting: x (*), y (*) automatically expand with cos_rot/sin_rot
    x_r = x * cos_rot - y * sin_rot
    y_r = x * sin_rot + y * cos_rot

    # Translate by learned offsets (broadcasts from (2, 1, N) to input shape × (N,))
    x_n = x_r + self.offsets[0]  # Shape: (..., N)
    y_n = y_r + self.offsets[1]

    # Evaluate 2D Gaussian function with learned widths (standard deviations)
    r = (x_n / self.widths[0]) ** 2 + (y_n / self.widths[1]) ** 2
    gaussians = torch.exp(-r)

    # Apply alpha weights and tanh activation
    results = self.alphas * gaussians
    results = torch.tanh(results)

    return results

`initialize_parameters_uniformly(ranges=None)` ¶

Initialize parameters using uniform-like distributions within specified ranges.

This method re-samples the model parameters from normal distributions whose mean and standard deviation are derived from the provided ranges. For a range [a, b], it uses: mean = (a + b) / 2 std = (b - a) / 4

Parameters:

ranges (dict or None, default: None ) –

Dictionary specifying custom initialization ranges. Keys can include:
- 'widths': tuple of (min, max) for Gaussian widths
- 'offsets': tuple of (min, max) for center offsets
- 'rotations': tuple of (min, max) for rotation angles in radians
- 'alphas': tuple of (min, max) for opacity values

If None, default ranges are used:
{
    "widths": (0.1, 0.5),
    "offsets": (-1.0, 1.0),
    "rotations": (0.0, 2*pi),
    "alphas": (0.1, 0.2)
}

Notes

Uses torch.no_grad() to avoid tracking gradients during initialization.
Parameters are initialized in-place using normal_() method.

Source code in odak/learn/models/gaussians.py

def initialize_parameters_uniformly(self, ranges=None):
    """
    Initialize parameters using uniform-like distributions within specified ranges.

    This method re-samples the model parameters from normal distributions
    whose mean and standard deviation are derived from the provided ranges.
    For a range [a, b], it uses:
        mean = (a + b) / 2
        std  = (b - a) / 4

    Parameters
    ----------
    ranges : dict or None, optional
            Dictionary specifying custom initialization ranges. Keys can include:
            - 'widths': tuple of (min, max) for Gaussian widths
            - 'offsets': tuple of (min, max) for center offsets
            - 'rotations': tuple of (min, max) for rotation angles in radians
            - 'alphas': tuple of (min, max) for opacity values

            If None, default ranges are used:
            {
                "widths": (0.1, 0.5),
                "offsets": (-1.0, 1.0),
                "rotations": (0.0, 2*pi),
                "alphas": (0.1, 0.2)
            }

    Notes
    -----
    - Uses torch.no_grad() to avoid tracking gradients during initialization.
    - Parameters are initialized in-place using normal_() method.
    """
    with torch.no_grad():
        default_ranges = {
            "widths": (0.1, 0.5),
            "offsets": (-1.0, 1.0),
            "rotations": (0.0, 2 * torch.pi),
            "alphas": (0.1, 0.2),
        }

        if ranges is None:
            ranges = default_ranges

        # Initialize widths (std_x and std_y)
        if "widths" in ranges:
            self.widths.normal_(
                mean=(ranges["widths"][0] + ranges["widths"][1]) / 2,
                std=(ranges["widths"][1] - ranges["widths"][0]) / 4,
            )
        else:
            self.widths.normal_(mean=0.3, std=0.1)

        # Initialize offsets (offset_x and offset_y)
        if "offsets" in ranges:
            self.offsets.normal_(
                mean=(ranges["offsets"][0] + ranges["offsets"][1]) / 2,
                std=(ranges["offsets"][1] - ranges["offsets"][0]) / 4,
            )
        else:
            self.offsets.normal_(mean=0.0, std=0.5)

        # Initialize rotations
        if "rotations" in ranges:
            self.rotations.normal_(
                mean=(ranges["rotations"][0] + ranges["rotations"][1]) / 2,
                std=(ranges["rotations"][1] - ranges["rotations"][0]) / 4,
            )
        else:
            self.rotations.normal_(mean=torch.pi, std=torch.pi / 2)

        # Initialize alphas (opacity coefficients)
        if "alphas" in ranges:
            self.alphas.normal_(
                mean=(ranges["alphas"][0] + ranges["alphas"][1]) / 2,
                std=(ranges["alphas"][1] - ranges["alphas"][0]) / 4,
            )
        else:
            self.alphas.normal_(mean=0.15, std=0.05)

`gaussian_3d_volume` ¶

Bases: Module

Initialize the 3D Gaussian volume model. This model is useful for learning voxelized 3D volumes.

Parameters:

number_of_elements (int, default: 10 ) –

             Number of Gaussian elements in the volume (default: 10).

initial_centers –

             Initial centers of the Gaussians (shape: [N, 3]). If not provided,
             random initialization is used where N is `number_of_elements`.

initial_angles –

             Initial angles defining the orientation of each Gaussian. If not
             provided, random initialization is used.

initial_scales –

             Initial scales controlling the spread (variance) of each Gaussian.
             If not provided, random initialization is used.

initial_alphas –

             Initial alphas controlling the blending between Gaussians.
             If not provided, random initialization is used.

Source code in odak/learn/models/gaussians.py

class gaussian_3d_volume(torch.nn.Module):
    """
    Initialize the 3D Gaussian volume model. This model is useful for learning voxelized 3D volumes.

    Parameters
    ----------
    number_of_elements : int
                         Number of Gaussian elements in the volume (default: 10).
    initial_centers    : torch.Tensor or None, optional
                         Initial centers of the Gaussians (shape: [N, 3]). If not provided,
                         random initialization is used where N is `number_of_elements`.
    initial_angles     : torch.Tensor or None, optional
                         Initial angles defining the orientation of each Gaussian. If not
                         provided, random initialization is used.
    initial_scales     : torch.Tensor or None, optional
                         Initial scales controlling the spread (variance) of each Gaussian.
                         If not provided, random initialization is used.
    initial_alphas     : torch.Tensor or None, optional
                         Initial alphas controlling the blending between Gaussians.
                         If not provided, random initialization is used.
    """

    def __init__(
        self,
        number_of_elements=10,
        initial_centers=None,
        initial_angles=None,
        initial_scales=None,
        initial_alphas=None,
    ):
        """
        Initialize the 3D Gaussian volume model.

        Parameters
        ----------
        number_of_elements : int
                            Number of Gaussian elements in the volume (default: 10).
        initial_centers    : torch.Tensor or None
                            Initial centers of the Gaussians (shape: [N, 3]).
        initial_angles     : torch.Tensor or None
                            Initial angles for orientation.
        initial_scales     : torch.Tensor or None
                            Initial scales for variance.
        initial_alphas     : torch.Tensor or None
                            Initial alphas for blending.

        Device Placement
        ----------- --
        All parameters are initialized on CPU by default. For GPU acceleration,
        call .to(device) after initializing this model.
        Example:
            model = gaussian_3d_volume().cuda()  # or .to('cuda')
        """
        super(gaussian_3d_volume, self).__init__()
        self.number_of_elements = number_of_elements
        self.initialize_parameters(
            centers=initial_centers,
            angles=initial_angles,
            scales=initial_scales,
            alphas=initial_alphas,
        )
        self.l2_loss = torch.nn.MSELoss()
        self.l1_loss = torch.nn.L1Loss()

    def initialize_parameters(
        self,
        centers=None,
        angles=None,
        scales=None,
        alphas=None,
        device=torch.device("cpu"),
    ):
        """
        Initialize model parameters using PyTorch tensors.

        Parameters
        ----------
        centers : torch.Tensor, optional
                  If None (default), initializes as a tensor of shape
                  (number_of_elements, 3) with values sampled from standard normal distribution.
        angles  : torch.Tensor, optional
                  If None (default), initializes similarly to centers: shape (n,3).
        scales  : torch.Tensor, optional
                  If None (default), initializes as a tensor of shape
                  (number_of_elements, 3) with values uniformly distributed between 0 and 1.
        alphas  : torch.Tensor, optional
                  If None (default), initializes as a tensor of shape
                  (number_of_elements, 1) with values uniformly distributed between 0 and 1.
        device  : torch.device
                  Device to be used to define the parameters.
                  Make sure to pass the device you use with this model for proper manual parameter initilization.
        """
        if isinstance(centers, type(None)):
            centers = torch.randn(self.number_of_elements, 3, device=device)
        if isinstance(angles, type(None)):
            angles = torch.randn(self.number_of_elements, 3, device=device)
        if isinstance(scales, type(None)):
            scales = torch.rand(self.number_of_elements, 3, device=device)
        if isinstance(alphas, type(None)):
            alphas = torch.rand(self.number_of_elements, 1, device=device)
        self.centers = torch.nn.Parameter(centers)
        self.angles = torch.nn.Parameter(angles)
        self.scales = torch.nn.Parameter(scales)
        self.alphas = torch.nn.Parameter(alphas)

    def forward(self, points, test=False):
        """
        Forward pass: evaluate the 3D Gaussian volume at given points.

        Parameters
        ----------
        points            : torch.Tensor,  shape (N, 3)
                            Input points at which to evaluate the Gaussian volume, where each row is a 3D point.
        test              : bool, optional
                            If True, disables gradient computation (default: False).

        Returns
        -------
        total_intensities : torch.Tensor
                            Total intensities at the input points, weighted by alphas.
        """
        if test:
            torch.no_grad()
        intensities = evaluate_3d_gaussians(
            points=points,
            centers=self.centers,
            scales=self.scales,
            angles=self.angles * 180,
            opacity=self.alphas,
        )
        total_intensities = torch.mean(intensities, axis=-1)
        return total_intensities

    def optimize(
        self,
        points,
        ground_truth,
        loss_weights,
        learning_rate=1e-2,
        number_of_epochs=10,
        scheduler_power=1,
        save_at_every=1,
        max_norm=None,
        weights_filename=None,
    ):
        """
        Optimize model parameters using AdamW and a polynomial learning rate scheduler.

        Parameters
        ----------
        points           : torch.Tensor
                           Input data points for the model.
        ground_truth     : torch.Tensor
                           Ground truth values corresponding to the input points.
        loss_weights     : dict
                           Dictionary of weights for each loss component.
        learning_rate    : float, optional
                           Learning rate for the optimizer. Default is 1e-2.
        number_of_epochs : int, optional
                           Number of training epochs. Default is 10.
        scheduler_power  : float, optional
                           Power parameter for the polynomial learning rate scheduler. Default is 1.
        save_at_every    : int
                           Save model weights every `save_at_every` epochs. Default is 1.
        max_norm         : float, optional
                           By default it is None, when set clips the gradient with the given threshold.
        weights_filename : str, optional
                           Filename for saving model weights. If None, weights are not saved.

        Notes
        -----
        - Uses AdamW optimizer and PolynomialLR scheduler.
        - Logs loss at each epoch and saves weights periodically.
        """
        optimizer = torch.optim.AdamW(self.parameters(), lr=learning_rate)
        scheduler = torch.optim.lr_scheduler.PolynomialLR(
            optimizer,
            total_iters=number_of_epochs,
            power=scheduler_power,
            last_epoch=-1,
        )
        t_epoch = tqdm(range(number_of_epochs), leave=False, dynamic_ncols=True)
        for epoch_id in t_epoch:
            optimizer.zero_grad()
            estimates = self.forward(points)
            loss = self.evaluate(
                estimates,
                ground_truth,
                epoch_id=epoch_id,
                epoch_count=number_of_epochs,
                weights=loss_weights,
            )
            loss.backward(retain_graph=True)
            if not isinstance(max_norm, type(None)):
                torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm)
            optimizer.step()
            scheduler.step()
            description = "gaussian_3d_volume model loss:{:.4f}".format(loss.item())
            t_epoch.set_description(description)
            if epoch_id % save_at_every == save_at_every - 1:
                self.save_weights(weights_filename)
        logger.info(description)

    def evaluate(
        self,
        estimate,
        ground_truth,
        epoch_id=0,
        epoch_count=1,
        weights={
            "content": {
                "l2": 1e0,
                "l1": 0e-0,
            },
            "alpha": {"smaller": 0e-0, "larger": 0e-0, "threshold": [0.0, 1.0]},
            "scale": {
                "smaller": 0e-0,
                "larger": 0e-0,
                "threshold": [0.0, 1.0],
            },
            "alpha": 0e-0,
            "angle": 0e-0,
            "center": 0e-0,
            "utilization": {"l2": 0e0, "percentile": 0},
        },
    ):
        """
        Parameters
        ----------
        estimate     : torch.Tensor
                       Model's output estimate.
        ground_truth : torch.Tensor
                       Ground truth values.
        epoch_id     : int, optional
                       ID of the starting epoch. Default: 0.
        epoch_count  : int, optional
                       Total number of epochs for training. Default: 1.
        weights      : dict, optional
                       Dictionary containing weights for various loss components:
                       - content: {'l2': float, 'l1': float}
                       - scale: {'smaller': float, 'larger': float, 'threshold': List[float]}
                       - alpha: {'smaller': float, 'larger': float, 'threshold': List[float]}
                       - angle : float
                       - center: float
                       - utilization: {'l2': float, 'percentile': int}
        """
        loss = 0.0
        if weights["content"]["l2"] != 0.0:
            loss_l2_content = self.l2_loss(estimate, ground_truth)
            loss += weights["content"]["l2"] * loss_l2_content
        if weights["content"]["l1"] != 0.0:
            loss_l1_content = self.l1_loss(estimate, ground_truth)
            loss += weights["content"]["l1"] * loss_l1_content
        if weights["scale"]["smaller"] != 0.0:
            threshold = weights["scale"]["threshold"][0]
            loss_scales_smaller = torch.sum(
                torch.abs(self.scales[self.scales < threshold])
            )
            loss += loss_scales_smaller * weights["scale"]["smaller"]
        if weights["scale"]["larger"] != 0.0:
            threshold = weights["scale"]["threshold"][1]
            loss_scales_larger = torch.sum(self.scales[self.scales > threshold])
            loss += loss_scales_larger * weights["scale"]["larger"]
        if weights["alpha"]["smaller"] != 0.0:
            threshold = weights["alpha"]["threshold"][0]
            loss_alphas_smaller = torch.sum(
                torch.abs(self.alphas[self.alphas < threshold])
            )
            loss += loss_alphas_smaller * weights["alpha"]["smaller"]
        if weights["alpha"]["larger"] != 0.0:
            threshold = weights["alpha"]["threshold"][1]
            loss_alphas_larger = torch.sum(self.alphas[self.alphas > threshold])
            loss += loss_alphas_larger * weights["alpha"]["larger"]
        if weights["angle"] != 0.0:
            loss_angle = torch.sum(self.angles[self.angles > 1.0]) + torch.sum(
                torch.abs(self.angles[self.angles < -1.0])
            )
            loss += weights["angle"] * loss_angle
        if weights["center"] != 0.0:
            centers = torch.abs(self.centers)
            loss_center = torch.sum(centers[centers > 1.0])
            loss += weights["center"] * loss_center
        if weights["utilization"]["l2"] != 0:
            n = self.alphas.numel()
            k = int(weights["utilization"]["percentile"] / 100.0 * n)
            _, low_indices = torch.topk(torch.abs(self.alphas), k, dim=0, largest=False)
            _, high_indices = torch.topk(torch.abs(self.alphas), k, dim=0, largest=True)
            loss_utilization = (
                torch.abs(
                    torch.std(self.centers[low_indices, 0])
                    - torch.std(self.centers[high_indices, 0])
                )
                + torch.abs(
                    torch.std(self.centers[low_indices, 1])
                    - torch.std(self.centers[high_indices, 1])
                )
                + torch.abs(
                    torch.std(self.centers[low_indices, 2])
                    - torch.std(self.centers[high_indices, 2])
                )
                + torch.abs(
                    torch.mean(self.centers[low_indices, 0])
                    - torch.mean(self.centers[high_indices, 0])
                )
                + torch.abs(
                    torch.mean(self.centers[low_indices, 1])
                    - torch.mean(self.centers[high_indices, 1])
                )
                + torch.abs(
                    torch.mean(self.centers[low_indices, 2])
                    - torch.mean(self.centers[high_indices, 2])
                )
                + torch.abs(
                    torch.std(self.scales[low_indices, 0])
                    - torch.std(self.scales[high_indices, 0])
                )
                + torch.abs(
                    torch.std(self.scales[low_indices, 1])
                    - torch.std(self.scales[high_indices, 1])
                )
                + torch.abs(
                    torch.std(self.scales[low_indices, 2])
                    - torch.std(self.scales[high_indices, 2])
                )
                + torch.abs(
                    torch.mean(self.scales[low_indices, 0])
                    - torch.mean(self.scales[high_indices, 0])
                )
                + torch.abs(
                    torch.mean(self.scales[low_indices, 1])
                    - torch.mean(self.scales[high_indices, 1])
                )
                + torch.abs(
                    torch.mean(self.scales[low_indices, 2])
                    - torch.mean(self.scales[high_indices, 2])
                )
                + torch.abs(
                    torch.mean(self.alphas[low_indices])
                    - torch.mean(self.alphas[high_indices])
                )
                + torch.abs(
                    torch.std(self.alphas[low_indices])
                    - torch.std(self.alphas[high_indices])
                )
            )
            loss_distribution = (
                torch.std(self.centers[:, 0])
                + torch.std(self.centers[:, 1])
                + torch.std(self.centers[:, 2])
                + torch.std(self.scales[:, 0])
                + torch.std(self.scales[:, 1])
                + torch.std(self.scales[:, 2])
                + torch.std(self.alphas)
            )
            decay = 1.0 - ((epoch_count - epoch_id) / epoch_count)
            loss += (
                decay
                * weights["utilization"]["l2"]
                * (loss_distribution + loss_utilization)
            )
        return loss

    def save_weights(self, weights_filename):
        """
        Save the model weights to a specified file.


        Parameters
        ----------
        weights_filename : str
                            Path or filename where the weights will be saved. The path can include
                            relative paths and tilde notation (~), which will be expanded by `validate_path`.


        Example:
        --------
        # Save model weights to current directory with filename 'model_weights.pth'
        save_weights('model_weights.pth')

        # Save model weights to home directory using ~ notation
        save_weights('~/.weights.pth')

        Raises
        ------
        ValueError : If path validation fails or extension is not allowed.
        """
        safe_path = validate_path(
            weights_filename, allowed_extensions=[".pth", ".pt", ".bin"]
        )
        torch.save(self.state_dict(), safe_path)
        logger.info("gaussian_3d_volume model weights saved: {}".format(safe_path))

    def load_weights(self, weights_filename=None, device=torch.device("cpu")):
        """
        Load model weights from a file.

        Parameters
        ----------
        weights_filename : str
                            Path to the weights file. If None, no weights are loaded.
        device           : torch.device, optional
                            Device to load the weights onto (default: 'cpu').

        Raises
        ------
        ValueError       : If path validation fails or extension is not allowed.
        FileNotFoundError: If file does not exist after validation.

        Notes
        -----
        - If `weights_filename` is a valid file, the model state is updated and set to eval mode.
        - The file path is validated for security (tilde expanded, path traversal blocked).
        - A log message is emitted upon successful loading.
        """
        if not isinstance(weights_filename, type(None)):
            safe_path = validate_path(
                weights_filename, allowed_extensions=[".pth", ".pt", ".bin"]
            )
            if os.path.isfile(safe_path):
                self.load_state_dict(
                    torch.load(safe_path, weights_only=True, map_location=device)
                )
                self.eval()
                logger.info(
                    "gaussian_3d_volume model weights loaded: {}".format(safe_path)
                )

`init(number_of_elements=10, initial_centers=None, initial_angles=None, initial_scales=None, initial_alphas=None)` ¶

Initialize the 3D Gaussian volume model.

Parameters:

number_of_elements (int, default: 10 ) –

            Number of Gaussian elements in the volume (default: 10).

initial_centers –

            Initial centers of the Gaussians (shape: [N, 3]).

initial_angles –

            Initial angles for orientation.

initial_scales –

            Initial scales for variance.

initial_alphas –

            Initial alphas for blending.

Device Placement

All parameters are initialized on CPU by default. For GPU acceleration, call .to(device) after initializing this model. Example: model = gaussian_3d_volume().cuda() # or .to('cuda')

Source code in odak/learn/models/gaussians.py

def __init__(
    self,
    number_of_elements=10,
    initial_centers=None,
    initial_angles=None,
    initial_scales=None,
    initial_alphas=None,
):
    """
    Initialize the 3D Gaussian volume model.

    Parameters
    ----------
    number_of_elements : int
                        Number of Gaussian elements in the volume (default: 10).
    initial_centers    : torch.Tensor or None
                        Initial centers of the Gaussians (shape: [N, 3]).
    initial_angles     : torch.Tensor or None
                        Initial angles for orientation.
    initial_scales     : torch.Tensor or None
                        Initial scales for variance.
    initial_alphas     : torch.Tensor or None
                        Initial alphas for blending.

    Device Placement
    ----------- --
    All parameters are initialized on CPU by default. For GPU acceleration,
    call .to(device) after initializing this model.
    Example:
        model = gaussian_3d_volume().cuda()  # or .to('cuda')
    """
    super(gaussian_3d_volume, self).__init__()
    self.number_of_elements = number_of_elements
    self.initialize_parameters(
        centers=initial_centers,
        angles=initial_angles,
        scales=initial_scales,
        alphas=initial_alphas,
    )
    self.l2_loss = torch.nn.MSELoss()
    self.l1_loss = torch.nn.L1Loss()

`evaluate(estimate, ground_truth, epoch_id=0, epoch_count=1, weights={'content': {'l2': 1.0, 'l1': 0.0}, 'alpha': {'smaller': 0.0, 'larger': 0.0, 'threshold': [0.0, 1.0]}, 'scale': {'smaller': 0.0, 'larger': 0.0, 'threshold': [0.0, 1.0]}, 'alpha': 0.0, 'angle': 0.0, 'center': 0.0, 'utilization': {'l2': 0.0, 'percentile': 0}})` ¶

Parameters:

estimate –
```
       Model's output estimate.
```
ground_truth (Tensor) –
```
       Ground truth values.
```

epoch_id –

       ID of the starting epoch. Default: 0.

epoch_count –

       Total number of epochs for training. Default: 1.

weights –

       Dictionary containing weights for various loss components:
       - content: {'l2': float, 'l1': float}
       - scale: {'smaller': float, 'larger': float, 'threshold': List[float]}
       - alpha: {'smaller': float, 'larger': float, 'threshold': List[float]}
       - angle : float
       - center: float
       - utilization: {'l2': float, 'percentile': int}

Source code in odak/learn/models/gaussians.py

def evaluate(
    self,
    estimate,
    ground_truth,
    epoch_id=0,
    epoch_count=1,
    weights={
        "content": {
            "l2": 1e0,
            "l1": 0e-0,
        },
        "alpha": {"smaller": 0e-0, "larger": 0e-0, "threshold": [0.0, 1.0]},
        "scale": {
            "smaller": 0e-0,
            "larger": 0e-0,
            "threshold": [0.0, 1.0],
        },
        "alpha": 0e-0,
        "angle": 0e-0,
        "center": 0e-0,
        "utilization": {"l2": 0e0, "percentile": 0},
    },
):
    """
    Parameters
    ----------
    estimate     : torch.Tensor
                   Model's output estimate.
    ground_truth : torch.Tensor
                   Ground truth values.
    epoch_id     : int, optional
                   ID of the starting epoch. Default: 0.
    epoch_count  : int, optional
                   Total number of epochs for training. Default: 1.
    weights      : dict, optional
                   Dictionary containing weights for various loss components:
                   - content: {'l2': float, 'l1': float}
                   - scale: {'smaller': float, 'larger': float, 'threshold': List[float]}
                   - alpha: {'smaller': float, 'larger': float, 'threshold': List[float]}
                   - angle : float
                   - center: float
                   - utilization: {'l2': float, 'percentile': int}
    """
    loss = 0.0
    if weights["content"]["l2"] != 0.0:
        loss_l2_content = self.l2_loss(estimate, ground_truth)
        loss += weights["content"]["l2"] * loss_l2_content
    if weights["content"]["l1"] != 0.0:
        loss_l1_content = self.l1_loss(estimate, ground_truth)
        loss += weights["content"]["l1"] * loss_l1_content
    if weights["scale"]["smaller"] != 0.0:
        threshold = weights["scale"]["threshold"][0]
        loss_scales_smaller = torch.sum(
            torch.abs(self.scales[self.scales < threshold])
        )
        loss += loss_scales_smaller * weights["scale"]["smaller"]
    if weights["scale"]["larger"] != 0.0:
        threshold = weights["scale"]["threshold"][1]
        loss_scales_larger = torch.sum(self.scales[self.scales > threshold])
        loss += loss_scales_larger * weights["scale"]["larger"]
    if weights["alpha"]["smaller"] != 0.0:
        threshold = weights["alpha"]["threshold"][0]
        loss_alphas_smaller = torch.sum(
            torch.abs(self.alphas[self.alphas < threshold])
        )
        loss += loss_alphas_smaller * weights["alpha"]["smaller"]
    if weights["alpha"]["larger"] != 0.0:
        threshold = weights["alpha"]["threshold"][1]
        loss_alphas_larger = torch.sum(self.alphas[self.alphas > threshold])
        loss += loss_alphas_larger * weights["alpha"]["larger"]
    if weights["angle"] != 0.0:
        loss_angle = torch.sum(self.angles[self.angles > 1.0]) + torch.sum(
            torch.abs(self.angles[self.angles < -1.0])
        )
        loss += weights["angle"] * loss_angle
    if weights["center"] != 0.0:
        centers = torch.abs(self.centers)
        loss_center = torch.sum(centers[centers > 1.0])
        loss += weights["center"] * loss_center
    if weights["utilization"]["l2"] != 0:
        n = self.alphas.numel()
        k = int(weights["utilization"]["percentile"] / 100.0 * n)
        _, low_indices = torch.topk(torch.abs(self.alphas), k, dim=0, largest=False)
        _, high_indices = torch.topk(torch.abs(self.alphas), k, dim=0, largest=True)
        loss_utilization = (
            torch.abs(
                torch.std(self.centers[low_indices, 0])
                - torch.std(self.centers[high_indices, 0])
            )
            + torch.abs(
                torch.std(self.centers[low_indices, 1])
                - torch.std(self.centers[high_indices, 1])
            )
            + torch.abs(
                torch.std(self.centers[low_indices, 2])
                - torch.std(self.centers[high_indices, 2])
            )
            + torch.abs(
                torch.mean(self.centers[low_indices, 0])
                - torch.mean(self.centers[high_indices, 0])
            )
            + torch.abs(
                torch.mean(self.centers[low_indices, 1])
                - torch.mean(self.centers[high_indices, 1])
            )
            + torch.abs(
                torch.mean(self.centers[low_indices, 2])
                - torch.mean(self.centers[high_indices, 2])
            )
            + torch.abs(
                torch.std(self.scales[low_indices, 0])
                - torch.std(self.scales[high_indices, 0])
            )
            + torch.abs(
                torch.std(self.scales[low_indices, 1])
                - torch.std(self.scales[high_indices, 1])
            )
            + torch.abs(
                torch.std(self.scales[low_indices, 2])
                - torch.std(self.scales[high_indices, 2])
            )
            + torch.abs(
                torch.mean(self.scales[low_indices, 0])
                - torch.mean(self.scales[high_indices, 0])
            )
            + torch.abs(
                torch.mean(self.scales[low_indices, 1])
                - torch.mean(self.scales[high_indices, 1])
            )
            + torch.abs(
                torch.mean(self.scales[low_indices, 2])
                - torch.mean(self.scales[high_indices, 2])
            )
            + torch.abs(
                torch.mean(self.alphas[low_indices])
                - torch.mean(self.alphas[high_indices])
            )
            + torch.abs(
                torch.std(self.alphas[low_indices])
                - torch.std(self.alphas[high_indices])
            )
        )
        loss_distribution = (
            torch.std(self.centers[:, 0])
            + torch.std(self.centers[:, 1])
            + torch.std(self.centers[:, 2])
            + torch.std(self.scales[:, 0])
            + torch.std(self.scales[:, 1])
            + torch.std(self.scales[:, 2])
            + torch.std(self.alphas)
        )
        decay = 1.0 - ((epoch_count - epoch_id) / epoch_count)
        loss += (
            decay
            * weights["utilization"]["l2"]
            * (loss_distribution + loss_utilization)
        )
    return loss

`forward(points, test=False)` ¶

Forward pass: evaluate the 3D Gaussian volume at given points.

Parameters:

points –

            Input points at which to evaluate the Gaussian volume, where each row is a 3D point.

test –

            If True, disables gradient computation (default: False).

Returns:

total_intensities ( Tensor ) –

Total intensities at the input points, weighted by alphas.

Source code in odak/learn/models/gaussians.py

def forward(self, points, test=False):
    """
    Forward pass: evaluate the 3D Gaussian volume at given points.

    Parameters
    ----------
    points            : torch.Tensor,  shape (N, 3)
                        Input points at which to evaluate the Gaussian volume, where each row is a 3D point.
    test              : bool, optional
                        If True, disables gradient computation (default: False).

    Returns
    -------
    total_intensities : torch.Tensor
                        Total intensities at the input points, weighted by alphas.
    """
    if test:
        torch.no_grad()
    intensities = evaluate_3d_gaussians(
        points=points,
        centers=self.centers,
        scales=self.scales,
        angles=self.angles * 180,
        opacity=self.alphas,
    )
    total_intensities = torch.mean(intensities, axis=-1)
    return total_intensities

`initialize_parameters(centers=None, angles=None, scales=None, alphas=None, device=torch.device('cpu'))` ¶

Initialize model parameters using PyTorch tensors.

Parameters:

centers (Tensor, default: None ) –

  If None (default), initializes as a tensor of shape
  (number_of_elements, 3) with values sampled from standard normal distribution.

angles –

  If None (default), initializes similarly to centers: shape (n,3).

scales –

  If None (default), initializes as a tensor of shape
  (number_of_elements, 3) with values uniformly distributed between 0 and 1.

alphas –

  If None (default), initializes as a tensor of shape
  (number_of_elements, 1) with values uniformly distributed between 0 and 1.

device –

  Device to be used to define the parameters.
  Make sure to pass the device you use with this model for proper manual parameter initilization.

Source code in odak/learn/models/gaussians.py

def initialize_parameters(
    self,
    centers=None,
    angles=None,
    scales=None,
    alphas=None,
    device=torch.device("cpu"),
):
    """
    Initialize model parameters using PyTorch tensors.

    Parameters
    ----------
    centers : torch.Tensor, optional
              If None (default), initializes as a tensor of shape
              (number_of_elements, 3) with values sampled from standard normal distribution.
    angles  : torch.Tensor, optional
              If None (default), initializes similarly to centers: shape (n,3).
    scales  : torch.Tensor, optional
              If None (default), initializes as a tensor of shape
              (number_of_elements, 3) with values uniformly distributed between 0 and 1.
    alphas  : torch.Tensor, optional
              If None (default), initializes as a tensor of shape
              (number_of_elements, 1) with values uniformly distributed between 0 and 1.
    device  : torch.device
              Device to be used to define the parameters.
              Make sure to pass the device you use with this model for proper manual parameter initilization.
    """
    if isinstance(centers, type(None)):
        centers = torch.randn(self.number_of_elements, 3, device=device)
    if isinstance(angles, type(None)):
        angles = torch.randn(self.number_of_elements, 3, device=device)
    if isinstance(scales, type(None)):
        scales = torch.rand(self.number_of_elements, 3, device=device)
    if isinstance(alphas, type(None)):
        alphas = torch.rand(self.number_of_elements, 1, device=device)
    self.centers = torch.nn.Parameter(centers)
    self.angles = torch.nn.Parameter(angles)
    self.scales = torch.nn.Parameter(scales)
    self.alphas = torch.nn.Parameter(alphas)

`load_weights(weights_filename=None, device=torch.device('cpu'))` ¶

Load model weights from a file.

Parameters:

weights_filename (str, default: None ) –

            Path to the weights file. If None, no weights are loaded.

device –

            Device to load the weights onto (default: 'cpu').

Raises:

ValueError : If path validation fails or extension is not allowed. –
FileNotFoundError: If file does not exist after validation. –

Notes

If weights_filename is a valid file, the model state is updated and set to eval mode.
The file path is validated for security (tilde expanded, path traversal blocked).
A log message is emitted upon successful loading.

Source code in odak/learn/models/gaussians.py

def load_weights(self, weights_filename=None, device=torch.device("cpu")):
    """
    Load model weights from a file.

    Parameters
    ----------
    weights_filename : str
                        Path to the weights file. If None, no weights are loaded.
    device           : torch.device, optional
                        Device to load the weights onto (default: 'cpu').

    Raises
    ------
    ValueError       : If path validation fails or extension is not allowed.
    FileNotFoundError: If file does not exist after validation.

    Notes
    -----
    - If `weights_filename` is a valid file, the model state is updated and set to eval mode.
    - The file path is validated for security (tilde expanded, path traversal blocked).
    - A log message is emitted upon successful loading.
    """
    if not isinstance(weights_filename, type(None)):
        safe_path = validate_path(
            weights_filename, allowed_extensions=[".pth", ".pt", ".bin"]
        )
        if os.path.isfile(safe_path):
            self.load_state_dict(
                torch.load(safe_path, weights_only=True, map_location=device)
            )
            self.eval()
            logger.info(
                "gaussian_3d_volume model weights loaded: {}".format(safe_path)
            )

`optimize(points, ground_truth, loss_weights, learning_rate=0.01, number_of_epochs=10, scheduler_power=1, save_at_every=1, max_norm=None, weights_filename=None)` ¶

Optimize model parameters using AdamW and a polynomial learning rate scheduler.

Parameters:

points –

           Input data points for the model.

ground_truth –

           Ground truth values corresponding to the input points.

loss_weights –

           Dictionary of weights for each loss component.

learning_rate –

           Learning rate for the optimizer. Default is 1e-2.

number_of_epochs (int, default: 10 ) –

           Number of training epochs. Default is 10.

scheduler_power –

           Power parameter for the polynomial learning rate scheduler. Default is 1.

save_at_every –

           Save model weights every `save_at_every` epochs. Default is 1.

max_norm –

           By default it is None, when set clips the gradient with the given threshold.

weights_filename (str, default: None ) –

           Filename for saving model weights. If None, weights are not saved.

Notes

Uses AdamW optimizer and PolynomialLR scheduler.
Logs loss at each epoch and saves weights periodically.

Source code in odak/learn/models/gaussians.py

def optimize(
    self,
    points,
    ground_truth,
    loss_weights,
    learning_rate=1e-2,
    number_of_epochs=10,
    scheduler_power=1,
    save_at_every=1,
    max_norm=None,
    weights_filename=None,
):
    """
    Optimize model parameters using AdamW and a polynomial learning rate scheduler.

    Parameters
    ----------
    points           : torch.Tensor
                       Input data points for the model.
    ground_truth     : torch.Tensor
                       Ground truth values corresponding to the input points.
    loss_weights     : dict
                       Dictionary of weights for each loss component.
    learning_rate    : float, optional
                       Learning rate for the optimizer. Default is 1e-2.
    number_of_epochs : int, optional
                       Number of training epochs. Default is 10.
    scheduler_power  : float, optional
                       Power parameter for the polynomial learning rate scheduler. Default is 1.
    save_at_every    : int
                       Save model weights every `save_at_every` epochs. Default is 1.
    max_norm         : float, optional
                       By default it is None, when set clips the gradient with the given threshold.
    weights_filename : str, optional
                       Filename for saving model weights. If None, weights are not saved.

    Notes
    -----
    - Uses AdamW optimizer and PolynomialLR scheduler.
    - Logs loss at each epoch and saves weights periodically.
    """
    optimizer = torch.optim.AdamW(self.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.PolynomialLR(
        optimizer,
        total_iters=number_of_epochs,
        power=scheduler_power,
        last_epoch=-1,
    )
    t_epoch = tqdm(range(number_of_epochs), leave=False, dynamic_ncols=True)
    for epoch_id in t_epoch:
        optimizer.zero_grad()
        estimates = self.forward(points)
        loss = self.evaluate(
            estimates,
            ground_truth,
            epoch_id=epoch_id,
            epoch_count=number_of_epochs,
            weights=loss_weights,
        )
        loss.backward(retain_graph=True)
        if not isinstance(max_norm, type(None)):
            torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm)
        optimizer.step()
        scheduler.step()
        description = "gaussian_3d_volume model loss:{:.4f}".format(loss.item())
        t_epoch.set_description(description)
        if epoch_id % save_at_every == save_at_every - 1:
            self.save_weights(weights_filename)
    logger.info(description)

`save_weights(weights_filename)` ¶

Save the model weights to a specified file.

Parameters:

weights_filename (str) –

            Path or filename where the weights will be saved. The path can include
            relative paths and tilde notation (~), which will be expanded by `validate_path`.

Example:

Save model weights to current directory with filename 'model_weights.pth'¶

save_weights('model_weights.pth')

Save model weights to home directory using ~ notation¶

save_weights('~/.weights.pth')

Raises:

ValueError : If path validation fails or extension is not allowed. –

Source code in odak/learn/models/gaussians.py

def save_weights(self, weights_filename):
    """
    Save the model weights to a specified file.


    Parameters
    ----------
    weights_filename : str
                        Path or filename where the weights will be saved. The path can include
                        relative paths and tilde notation (~), which will be expanded by `validate_path`.


    Example:
    --------
    # Save model weights to current directory with filename 'model_weights.pth'
    save_weights('model_weights.pth')

    # Save model weights to home directory using ~ notation
    save_weights('~/.weights.pth')

    Raises
    ------
    ValueError : If path validation fails or extension is not allowed.
    """
    safe_path = validate_path(
        weights_filename, allowed_extensions=[".pth", ".pt", ".bin"]
    )
    torch.save(self.state_dict(), safe_path)
    logger.info("gaussian_3d_volume model weights saved: {}".format(safe_path))

`gaussians_2d` ¶

Bases: Module

Wrapper class for the 2D Gaussian model with loss computation and evaluation utilities.

This class wraps gaussian_2d and provides additional functionality: - Loss functions (L1, L2) pre-initialized - Weight saving/loading methods - Model parameter counting

Parameters:

number_of_elements (int, default: 10 ) –

            Number of 2D Gaussian primitives in the model (default: 10).

logger –

             Logger instance for tracking progress. If None, creates a new one.

Attributes:

model (gaussian_2d) –

The underlying primitive Gaussian model.
l2_loss (MSELoss) –

Mean squared error loss function.
l1_loss (L1Loss) –

L1 absolute loss function.
logger (Logger) –

Logger instance for info/debug messages.

Examples:

>>> model = gaussians_2d(number_of_elements=50)
>>> x = torch.linspace(-1, 1, 256)
>>> y = torch.linspace(-1, 1, 256)
>>> X, Y = torch.meshgrid(x, y, indexing='ij')
>>> output = model(X, Y, test=False)

Notes

The test flag in forward() controls gradient computation (not recommended use).
Use standard training loop with optimizer.zero_grad(), loss.backward(), optimizer.step().

Source code in odak/learn/models/gaussians.py

class gaussians_2d(torch.nn.Module):
    """
    Wrapper class for the 2D Gaussian model with loss computation and evaluation utilities.

    This class wraps `gaussian_2d` and provides additional functionality:
    - Loss functions (L1, L2) pre-initialized
    - Weight saving/loading methods
    - Model parameter counting

    Parameters
    ----------
    number_of_elements : int, optional
                        Number of 2D Gaussian primitives in the model (default: 10).
    logger             : logging.Logger or None, optional
                         Logger instance for tracking progress. If None, creates a new one.

    Attributes
    ----------
    model       : gaussian_2d
                  The underlying primitive Gaussian model.
    l2_loss     : torch.nn.MSELoss
                  Mean squared error loss function.
    l1_loss     : torch.nn.L1Loss
                  L1 absolute loss function.
    logger      : logging.Logger
                  Logger instance for info/debug messages.

    Examples
    --------
    >>> model = gaussians_2d(number_of_elements=50)
    >>> x = torch.linspace(-1, 1, 256)
    >>> y = torch.linspace(-1, 1, 256)
    >>> X, Y = torch.meshgrid(x, y, indexing='ij')
    >>> output = model(X, Y, test=False)

    Notes
    -----
    - The `test` flag in forward() controls gradient computation (not recommended use).
    - Use standard training loop with optimizer.zero_grad(), loss.backward(), optimizer.step().
    """

    def __init__(
        self,
        number_of_elements=10,
        logger=None,
    ):
        """
        Initialize the gaussians_2d wrapper model.

        Parameters
        ----------
        number_of_elements : int
                            Number of 2D Gaussian elements (default: 10).
        logger             : logging.Logger or None
                             Logger instance (default: creates new logger).
        """
        super(gaussians_2d, self).__init__()

        if not isinstance(number_of_elements, int) or number_of_elements <= 0:
            raise ValueError(
                "number_of_elements must be a positive integer, got {}".format(
                    type(number_of_elements).__name__ if not isinstance(number_of_elements, int) else str(number_of_elements)
                )
            )

        self.number_of_elements = number_of_elements
        self.model = gaussian_2d(
            number_of_elements=self.number_of_elements
        )

        # Count total trainable parameters
        self.total_params = sum(p.numel() for p in self.parameters() if p.requires_grad)

        # Setup logger
        from ...log import logger as default_logger

        self.logger = logger if logger is not None else default_logger

    def forward(
        self,
        x,
        y,
        test=False,
    ):
        """
        Forward pass through the Gaussian model.

        Parameters
        ----------
        x      : torch.Tensor
                 X-coordinates of evaluation grid.
        y      : torch.Tensor
                 Y-coordinates of evaluation grid.
        test   : bool, optional
                 If True, runs in no_grad mode (default: False).

        Returns
        -------
        result : torch.Tensor
                 The summed Gaussian field with shape matching x/y grids plus one dimension.

        Notes
        -----
        The `test` flag is deprecated. Use standard training pattern:
        ```python
        model.train()  # Enable gradients
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        ```
        """
        if test:
            with torch.no_grad():
                result = self.model(x=x, y=y)
        else:
            result = self.model(x=x, y=y)

        # Sum over Gaussian elements and add batch dimension
        result = torch.sum(result, dim=-1).unsqueeze(-1)

        return result

    def save_weights(self, weights_filename):
        """
        Save model weights to a file.

        Parameters
        ----------
        weights_filename : str
                          Path to save weights (must end with .pt, .pth, or similar).
        """
        from ...tools.file import validate_path

        safe_path = validate_path(
            os.path.expanduser(weights_filename), 
            allowed_extensions=[".pt", ".pth"]
        )
        torch.save(self.state_dict(), safe_path)
        self.logger.info("Model weights saved to: {}".format(safe_path))

    def load_weights(
        self, 
        weights_filename=None,
        device=torch.device("cpu")
    ):
        """
        Load model weights from a file.

        Parameters
        ----------
        weights_filename : str or None
                          Path to weights file. If None, skips loading.
        device           : torch.device, optional
                          Device to load weights onto (default: CPU).
        """
        if weights_filename is not None:
            from ...tools.file import validate_path

            safe_path = validate_path(
                os.path.expanduser(weights_filename),
                allowed_extensions=[".pt", ".pth"]
            )

            if not os.path.isfile(safe_path):
                raise FileNotFoundError("Weights file not found: {}".format(safe_path))

            self.load_state_dict(
                torch.load(safe_path, weights_only=True, map_location=device)
            )
            self.eval()  # Set to evaluation mode
            self.logger.info("Model weights loaded from: {}".format(safe_path))

`init(number_of_elements=10, logger=None)` ¶

Initialize the gaussians_2d wrapper model.

Parameters:

number_of_elements (int, default: 10 ) –

            Number of 2D Gaussian elements (default: 10).

logger –

             Logger instance (default: creates new logger).

Source code in odak/learn/models/gaussians.py

def __init__(
    self,
    number_of_elements=10,
    logger=None,
):
    """
    Initialize the gaussians_2d wrapper model.

    Parameters
    ----------
    number_of_elements : int
                        Number of 2D Gaussian elements (default: 10).
    logger             : logging.Logger or None
                         Logger instance (default: creates new logger).
    """
    super(gaussians_2d, self).__init__()

    if not isinstance(number_of_elements, int) or number_of_elements <= 0:
        raise ValueError(
            "number_of_elements must be a positive integer, got {}".format(
                type(number_of_elements).__name__ if not isinstance(number_of_elements, int) else str(number_of_elements)
            )
        )

    self.number_of_elements = number_of_elements
    self.model = gaussian_2d(
        number_of_elements=self.number_of_elements
    )

    # Count total trainable parameters
    self.total_params = sum(p.numel() for p in self.parameters() if p.requires_grad)

    # Setup logger
    from ...log import logger as default_logger

    self.logger = logger if logger is not None else default_logger

`forward(x, y, test=False)` ¶

Forward pass through the Gaussian model.

Parameters:

x –
```
 X-coordinates of evaluation grid.
```
y –
```
 Y-coordinates of evaluation grid.
```

test –

 If True, runs in no_grad mode (default: False).

Returns:

result ( Tensor ) –

The summed Gaussian field with shape matching x/y grids plus one dimension.

Notes

The test flag is deprecated. Use standard training pattern:

model.train()  # Enable gradients
loss = criterion(output, target)
loss.backward()
optimizer.step()

Source code in odak/learn/models/gaussians.py

def forward(
    self,
    x,
    y,
    test=False,
):
    """
    Forward pass through the Gaussian model.

    Parameters
    ----------
    x      : torch.Tensor
             X-coordinates of evaluation grid.
    y      : torch.Tensor
             Y-coordinates of evaluation grid.
    test   : bool, optional
             If True, runs in no_grad mode (default: False).

    Returns
    -------
    result : torch.Tensor
             The summed Gaussian field with shape matching x/y grids plus one dimension.

    Notes
    -----
    The `test` flag is deprecated. Use standard training pattern:
    ```python
    model.train()  # Enable gradients
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    ```
    """
    if test:
        with torch.no_grad():
            result = self.model(x=x, y=y)
    else:
        result = self.model(x=x, y=y)

    # Sum over Gaussian elements and add batch dimension
    result = torch.sum(result, dim=-1).unsqueeze(-1)

    return result

`load_weights(weights_filename=None, device=torch.device('cpu'))` ¶

Load model weights from a file.

Parameters:

weights_filename (str or None, default: None ) –

          Path to weights file. If None, skips loading.

device –

          Device to load weights onto (default: CPU).

Source code in odak/learn/models/gaussians.py

def load_weights(
    self, 
    weights_filename=None,
    device=torch.device("cpu")
):
    """
    Load model weights from a file.

    Parameters
    ----------
    weights_filename : str or None
                      Path to weights file. If None, skips loading.
    device           : torch.device, optional
                      Device to load weights onto (default: CPU).
    """
    if weights_filename is not None:
        from ...tools.file import validate_path

        safe_path = validate_path(
            os.path.expanduser(weights_filename),
            allowed_extensions=[".pt", ".pth"]
        )

        if not os.path.isfile(safe_path):
            raise FileNotFoundError("Weights file not found: {}".format(safe_path))

        self.load_state_dict(
            torch.load(safe_path, weights_only=True, map_location=device)
        )
        self.eval()  # Set to evaluation mode
        self.logger.info("Model weights loaded from: {}".format(safe_path))

`save_weights(weights_filename)` ¶

Save model weights to a file.

Parameters:

weights_filename (str) –

          Path to save weights (must end with .pt, .pth, or similar).

Source code in odak/learn/models/gaussians.py

def save_weights(self, weights_filename):
    """
    Save model weights to a file.

    Parameters
    ----------
    weights_filename : str
                      Path to save weights (must end with .pt, .pth, or similar).
    """
    from ...tools.file import validate_path

    safe_path = validate_path(
        os.path.expanduser(weights_filename), 
        allowed_extensions=[".pt", ".pth"]
    )
    torch.save(self.state_dict(), safe_path)
    self.logger.info("Model weights saved to: {}".format(safe_path))

`global_feature_module` ¶

Bases: Module

A global feature layer that processes global features from input channels and applies them to another input tensor via learned transformations.

Source code in odak/learn/models/components.py

class global_feature_module(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies them to another input tensor via learned transformations.
    """

    def __init__(
        self,
        input_channels,
        mid_channels,
        output_channels,
        kernel_size,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels  : int
                          Number of mid channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.transformations_1 = global_transformations(input_channels, output_channels)
        self.global_features_1 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.global_features_2 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.transformations_2 = global_transformations(input_channels, output_channels)

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        global_tensor_1 = self.transformations_1(x1, x2)
        y1 = self.global_features_1(global_tensor_1)
        y2 = self.global_features_2(y1)
        global_tensor_2 = self.transformations_2(y1, y2)
        return global_tensor_2

`init(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

A global feature layer.

Parameters:

input_channels –
```
          Number of input channels.
```
mid_channels –
```
          Number of mid channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    mid_channels,
    output_channels,
    kernel_size,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels  : int
                      Number of mid channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.transformations_1 = global_transformations(input_channels, output_channels)
    self.global_features_1 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.global_features_2 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.transformations_2 = global_transformations(input_channels, output_channels)

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    global_tensor_1 = self.transformations_1(x1, x2)
    y1 = self.global_features_1(global_tensor_1)
    y2 = self.global_features_2(y1)
    global_tensor_2 = self.transformations_2(y1, y2)
    return global_tensor_2

`global_transformations` ¶

Bases: Module

A global feature layer that processes global features from input channels and applies learned transformations to another input tensor.

This implementation is adapted from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Reference: J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."

Source code in odak/learn/models/components.py

class global_transformations(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies learned transformations to another input tensor.

    This implementation is adapted from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Reference:
    J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."
    """

    def __init__(self, input_channels, output_channels):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        """
        super().__init__()
        self.global_feature_1 = torch.nn.Sequential(
            torch.nn.Linear(input_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_2 = torch.nn.Sequential(
            torch.nn.Linear(output_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y = torch.mean(x2, dim=(2, 3))
        y1 = self.global_feature_1(y)
        y2 = self.global_feature_2(y1)
        y1 = y1.unsqueeze(2).unsqueeze(3)
        y2 = y2.unsqueeze(2).unsqueeze(3)
        result = x1 * y1 + y2
        return result

`init(input_channels, output_channels)` ¶

A global feature layer.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```

Source code in odak/learn/models/components.py

def __init__(self, input_channels, output_channels):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    """
    super().__init__()
    self.global_feature_1 = torch.nn.Sequential(
        torch.nn.Linear(input_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_2 = torch.nn.Sequential(
        torch.nn.Linear(output_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y = torch.mean(x2, dim=(2, 3))
    y1 = self.global_feature_1(y)
    y2 = self.global_feature_2(y1)
    y1 = y1.unsqueeze(2).unsqueeze(3)
    y2 = y2.unsqueeze(2).unsqueeze(3)
    result = x1 * y1 + y2
    return result

`multi_layer_perceptron` ¶

Bases: Module

A multi-layer perceptron model.

Source code in odak/learn/models/models.py

class multi_layer_perceptron(torch.nn.Module):
    """
    A multi-layer perceptron model.
    """

    def __init__(
        self,
        dimensions,
        activation=torch.nn.ReLU(),
        bias=False,
        model_type="conventional",
        siren_multiplier=1.0,
        input_multiplier=None,
    ):
        """
        Initialize the multi-layer perceptron.

        Parameters
        ----------
        dimensions : list of int
            List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
        activation : torch.nn.Module, optional
            Nonlinear activation function. Default is `torch.nn.ReLU()`.
        bias : bool, optional
            If set to True, linear layers will include biases. Default is False.
        siren_multiplier : float, optional
            When using `SIREN` model type, this parameter functions as a hyperparameter.
            The original SIREN work uses 30.
            You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
        input_multiplier : float, optional
            Initial value of the input multiplier before the very first layer.
        model_type : str, optional
            Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
            `conventional` refers to a standard multi layer perceptron.
            For `SIREN`, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
            For `Swish`, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017).
            For `FILM SIREN`, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
            For `Gaussian`, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
            Default is "conventional".
        """
        super(multi_layer_perceptron, self).__init__()
        self.activation = activation
        self.bias = bias
        self.model_type = model_type
        self.layers = torch.nn.ModuleList()
        self.siren_multiplier = siren_multiplier
        self.dimensions = dimensions
        logger.info(
            f"Initializing multi_layer_perceptron: model_type={model_type}, "
            f"dimensions={dimensions}, bias={bias}, "
            f"siren_multiplier={siren_multiplier}"
        )
        for i in range(len(self.dimensions) - 1):
            self.layers.append(
                torch.nn.Linear(
                    self.dimensions[i], self.dimensions[i + 1], bias=self.bias
                )
            )
        if not isinstance(input_multiplier, type(None)):
            self.input_multiplier = torch.nn.ParameterList()
            self.input_multiplier.append(
                torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier)
            )
            logger.debug(f"Input multiplier initialized: {input_multiplier}")
        if self.model_type == "FILM SIREN":
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1::]:
                self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
            logger.debug("FILM SIREN alpha parameters initialized")
        if self.model_type == "Gaussian":
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1::]:
                self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))
            logger.debug("Gaussian alpha parameters initialized")

    def forward(self, x):
        """
        Forward pass of the multi-layer perceptron.

        Parameters
        ----------
        x : torch.Tensor
            Input data.

        Returns
        -------
        result : torch.Tensor
            Estimated output.
        """
        if hasattr(self, "input_multiplier"):
            result = x * self.input_multiplier[0]
        else:
            result = x
        for layer_id, layer in enumerate(self.layers):
            result = layer(result)
            if self.model_type == "conventional" and layer_id != len(self.layers) - 1:
                result = self.activation(result)
            elif self.model_type == "swish" and layer_id != len(self.layers) - 1:
                result = swish(result)
            elif self.model_type == "SIREN" and layer_id != len(self.layers) - 1:
                result = torch.sin(result * self.siren_multiplier)
            elif self.model_type == "FILM SIREN" and layer_id != len(self.layers) - 1:
                result = torch.sin(
                    self.alpha[layer_id][0] * result + self.alpha[layer_id][1]
                )
            elif self.model_type == "Gaussian" and layer_id != len(self.layers) - 1:
                result = gaussian(result, self.alpha[layer_id][0])
        return result

`init(dimensions, activation=torch.nn.ReLU(), bias=False, model_type='conventional', siren_multiplier=1.0, input_multiplier=None)` ¶

Initialize the multi-layer perceptron.

Parameters:

dimensions (list of int) –

List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
activation (Module, default: ReLU() ) –

Nonlinear activation function. Default is torch.nn.ReLU().
bias (bool, default: False ) –

If set to True, linear layers will include biases. Default is False.
siren_multiplier (float, default: 1.0 ) –

When using SIREN model type, this parameter functions as a hyperparameter. The original SIREN work uses 30. You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
input_multiplier (float, default: None ) –

Initial value of the input multiplier before the very first layer.
model_type (str, default: 'conventional' ) –

Model type: conventional, swish, SIREN, FILM SIREN, Gaussian. conventional refers to a standard multi layer perceptron. For SIREN, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473. For Swish, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). For FILM SIREN, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021. For Gaussian, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022. Default is "conventional".

Source code in odak/learn/models/models.py

def __init__(
    self,
    dimensions,
    activation=torch.nn.ReLU(),
    bias=False,
    model_type="conventional",
    siren_multiplier=1.0,
    input_multiplier=None,
):
    """
    Initialize the multi-layer perceptron.

    Parameters
    ----------
    dimensions : list of int
        List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
    activation : torch.nn.Module, optional
        Nonlinear activation function. Default is `torch.nn.ReLU()`.
    bias : bool, optional
        If set to True, linear layers will include biases. Default is False.
    siren_multiplier : float, optional
        When using `SIREN` model type, this parameter functions as a hyperparameter.
        The original SIREN work uses 30.
        You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
    input_multiplier : float, optional
        Initial value of the input multiplier before the very first layer.
    model_type : str, optional
        Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
        `conventional` refers to a standard multi layer perceptron.
        For `SIREN`, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
        For `Swish`, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017).
        For `FILM SIREN`, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
        For `Gaussian`, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
        Default is "conventional".
    """
    super(multi_layer_perceptron, self).__init__()
    self.activation = activation
    self.bias = bias
    self.model_type = model_type
    self.layers = torch.nn.ModuleList()
    self.siren_multiplier = siren_multiplier
    self.dimensions = dimensions
    logger.info(
        f"Initializing multi_layer_perceptron: model_type={model_type}, "
        f"dimensions={dimensions}, bias={bias}, "
        f"siren_multiplier={siren_multiplier}"
    )
    for i in range(len(self.dimensions) - 1):
        self.layers.append(
            torch.nn.Linear(
                self.dimensions[i], self.dimensions[i + 1], bias=self.bias
            )
        )
    if not isinstance(input_multiplier, type(None)):
        self.input_multiplier = torch.nn.ParameterList()
        self.input_multiplier.append(
            torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier)
        )
        logger.debug(f"Input multiplier initialized: {input_multiplier}")
    if self.model_type == "FILM SIREN":
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1::]:
            self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
        logger.debug("FILM SIREN alpha parameters initialized")
    if self.model_type == "Gaussian":
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1::]:
            self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))
        logger.debug("Gaussian alpha parameters initialized")

`forward(x)` ¶

Forward pass of the multi-layer perceptron.

Parameters:

x (Tensor) –

Input data.

Returns:

result ( Tensor ) –

Estimated output.

Source code in odak/learn/models/models.py

def forward(self, x):
    """
    Forward pass of the multi-layer perceptron.

    Parameters
    ----------
    x : torch.Tensor
        Input data.

    Returns
    -------
    result : torch.Tensor
        Estimated output.
    """
    if hasattr(self, "input_multiplier"):
        result = x * self.input_multiplier[0]
    else:
        result = x
    for layer_id, layer in enumerate(self.layers):
        result = layer(result)
        if self.model_type == "conventional" and layer_id != len(self.layers) - 1:
            result = self.activation(result)
        elif self.model_type == "swish" and layer_id != len(self.layers) - 1:
            result = swish(result)
        elif self.model_type == "SIREN" and layer_id != len(self.layers) - 1:
            result = torch.sin(result * self.siren_multiplier)
        elif self.model_type == "FILM SIREN" and layer_id != len(self.layers) - 1:
            result = torch.sin(
                self.alpha[layer_id][0] * result + self.alpha[layer_id][1]
            )
        elif self.model_type == "Gaussian" and layer_id != len(self.layers) - 1:
            result = gaussian(result, self.alpha[layer_id][0])
    return result

`non_local_layer` ¶

Bases: Module

Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)

Source code in odak/learn/models/components.py

class non_local_layer(torch.nn.Module):
    """
    Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)
    """

    def __init__(
        self,
        input_channels=1024,
        bottleneck_channels=512,
        kernel_size=1,
        bias=False,
    ):
        """

        Parameters
        ----------
        input_channels      : int
                              Number of input channels.
        bottleneck_channels : int
                              Number of middle channels.
        kernel_size         : int
                              Kernel size.
        bias                : bool
                              Set to True to let convolutional layers have bias term.
        """
        super(non_local_layer, self).__init__()
        self.input_channels = input_channels
        self.bottleneck_channels = bottleneck_channels
        self.g = torch.nn.Conv2d(
            self.input_channels,
            self.bottleneck_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        self.W_z = torch.nn.Sequential(
            torch.nn.Conv2d(
                self.bottleneck_channels,
                self.input_channels,
                kernel_size=kernel_size,
                bias=bias,
                padding=kernel_size // 2,
            ),
            torch.nn.BatchNorm2d(self.input_channels),
        )
        torch.nn.init.constant_(self.W_z[1].weight, 0)
        torch.nn.init.constant_(self.W_z[1].bias, 0)

    def forward(self, x):
        """
        Forward model [zi = Wzyi + xi]

        Parameters
        ----------
        x               : torch.tensor
                          First input data.


        Returns
        ----------
        z               : torch.tensor
                          Estimated output.
        """
        batch_size, channels, height, width = x.size()
        theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
        phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
        g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
        attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
        attn = torch.nn.functional.softmax(attn, dim=-1)
        y = (
            torch.bmm(attn, g)
            .permute(0, 2, 1)
            .contiguous()
            .view(batch_size, self.bottleneck_channels, height, width)
        )
        W_y = self.W_z(y)
        z = W_y + x
        return z

`init(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)` ¶

Parameters:

input_channels –

              Number of input channels.

bottleneck_channels (int, default: 512 ) –

              Number of middle channels.

kernel_size –
```
              Kernel size.
```

bias –

              Set to True to let convolutional layers have bias term.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=1024,
    bottleneck_channels=512,
    kernel_size=1,
    bias=False,
):
    """

    Parameters
    ----------
    input_channels      : int
                          Number of input channels.
    bottleneck_channels : int
                          Number of middle channels.
    kernel_size         : int
                          Kernel size.
    bias                : bool
                          Set to True to let convolutional layers have bias term.
    """
    super(non_local_layer, self).__init__()
    self.input_channels = input_channels
    self.bottleneck_channels = bottleneck_channels
    self.g = torch.nn.Conv2d(
        self.input_channels,
        self.bottleneck_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    self.W_z = torch.nn.Sequential(
        torch.nn.Conv2d(
            self.bottleneck_channels,
            self.input_channels,
            kernel_size=kernel_size,
            bias=bias,
            padding=kernel_size // 2,
        ),
        torch.nn.BatchNorm2d(self.input_channels),
    )
    torch.nn.init.constant_(self.W_z[1].weight, 0)
    torch.nn.init.constant_(self.W_z[1].bias, 0)

`forward(x)` ¶

Forward model [zi = Wzyi + xi]

Parameters:

x –
```
          First input data.
```

Returns:

z ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model [zi = Wzyi + xi]

    Parameters
    ----------
    x               : torch.tensor
                      First input data.


    Returns
    ----------
    z               : torch.tensor
                      Estimated output.
    """
    batch_size, channels, height, width = x.size()
    theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
    phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
    g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
    attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
    attn = torch.nn.functional.softmax(attn, dim=-1)
    y = (
        torch.bmm(attn, g)
        .permute(0, 2, 1)
        .contiguous()
        .view(batch_size, self.bottleneck_channels, height, width)
    )
    W_y = self.W_z(y)
    z = W_y + x
    return z

`normalization` ¶

Bases: Module

A normalization layer.

Source code in odak/learn/models/components.py

class normalization(torch.nn.Module):
    """
    A normalization layer.
    """

    def __init__(
        self,
        dim=1,
    ):
        """
        Normalization layer.


        Parameters
        ----------
        dim             : int
                          Dimension (axis) to normalize.
        """
        super().__init__()
        self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        eps = 1e-5 if x.dtype == torch.float32 else 1e-3
        var = torch.var(x, dim=1, unbiased=False, keepdim=True)
        mean = torch.mean(x, dim=1, keepdim=True)
        result = (x - mean) * (var + eps).rsqrt() * self.k
        return result

`init(dim=1)` ¶

Normalization layer.

Parameters:

dim –

          Dimension (axis) to normalize.

Source code in odak/learn/models/components.py

def __init__(
    self,
    dim=1,
):
    """
    Normalization layer.


    Parameters
    ----------
    dim             : int
                      Dimension (axis) to normalize.
    """
    super().__init__()
    self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    eps = 1e-5 if x.dtype == torch.float32 else 1e-3
    var = torch.var(x, dim=1, unbiased=False, keepdim=True)
    mean = torch.mean(x, dim=1, keepdim=True)
    result = (x - mean) * (var + eps).rsqrt() * self.k
    return result

`positional_encoder` ¶

Bases: Module

A positional encoder module. This implementation follows this specific work: Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021..

Source code in odak/learn/models/components.py

class positional_encoder(torch.nn.Module):
    """
    A positional encoder module.
    This implementation follows this specific work: `Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021.`.
    """

    def __init__(self, L):
        """
        A positional encoder module.

        Parameters
        ----------
        L                   : int
                              Positional encoding level.
        """
        super(positional_encoder, self).__init__()
        self.L = L

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x               : torch.tensor
                          Input data [b x n], where `b` is batch size, `n` is the feature size.

        Returns
        ----------
        result          : torch.tensor
                          Result of the forward operation.
        """
        freqs = 2 ** torch.arange(self.L, device=x.device)
        freqs = freqs.view(1, 1, -1)
        results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results = torch.cat((x, results_cos, results_sin), dim=1)
        return results

`init(L)` ¶

A positional encoder module.

Parameters:

L –

              Positional encoding level.

Source code in odak/learn/models/components.py

def __init__(self, L):
    """
    A positional encoder module.

    Parameters
    ----------
    L                   : int
                          Positional encoding level.
    """
    super(positional_encoder, self).__init__()
    self.L = L

`forward(x)` ¶

Forward model.

Parameters:

x –

          Input data [b x n], where `b` is batch size, `n` is the feature size.

Returns:

result ( tensor ) –

Result of the forward operation.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x               : torch.tensor
                      Input data [b x n], where `b` is batch size, `n` is the feature size.

    Returns
    ----------
    result          : torch.tensor
                      Result of the forward operation.
    """
    freqs = 2 ** torch.arange(self.L, device=x.device)
    freqs = freqs.view(1, 1, -1)
    results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results = torch.cat((x, results_cos, results_sin), dim=1)
    return results

`residual_attention_layer` ¶

Bases: Module

A residual block with an attention layer.

Source code in odak/learn/models/components.py

class residual_attention_layer(torch.nn.Module):
    """
    A residual block with an attention layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=1,
        bias=False,
        activation=torch.nn.ReLU(),
    ):
        """
        An attention layer class.


        Parameters
        ----------
        input_channels  : int or optioal
                          Number of input channels.
        output_channels : int or optional
                          Number of middle channels.
        kernel_size     : int or optional
                          Kernel size.
        bias            : bool or optional
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn or optional
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution0 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.convolution1 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.final_layer = torch.nn.Sequential(
            self.activation,
            torch.nn.Conv2d(
                output_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )

    def forward(self, x0, x1):
        """
        Forward model.

        Parameters
        ----------
        x0             : torch.tensor
                         First input data.

        x1             : torch.tensor
                         Seconnd input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y0 = self.convolution0(x0)
        y1 = self.convolution1(x1)
        y2 = torch.add(y0, y1)
        result = self.final_layer(y2) * x0
        return result

`init(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())` ¶

An attention layer class.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int or optional, default: 2 ) –
```
          Number of middle channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=1,
    bias=False,
    activation=torch.nn.ReLU(),
):
    """
    An attention layer class.


    Parameters
    ----------
    input_channels  : int or optioal
                      Number of input channels.
    output_channels : int or optional
                      Number of middle channels.
    kernel_size     : int or optional
                      Kernel size.
    bias            : bool or optional
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn or optional
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution0 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.convolution1 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.final_layer = torch.nn.Sequential(
        self.activation,
        torch.nn.Conv2d(
            output_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
    )

`forward(x0, x1)` ¶

Forward model.

Parameters:

x0 –
```
         First input data.
```
x1 –
```
         Seconnd input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x0, x1):
    """
    Forward model.

    Parameters
    ----------
    x0             : torch.tensor
                     First input data.

    x1             : torch.tensor
                     Seconnd input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y0 = self.convolution0(x0)
    y1 = self.convolution1(x1)
    y2 = torch.add(y0, y1)
    result = self.final_layer(y2) * x0
    return result

`residual_layer` ¶

Bases: Module

A residual layer.

Source code in odak/learn/models/components.py

class residual_layer(torch.nn.Module):
    """
    A residual layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=16,
        kernel_size=3,
        bias=False,
        normalization=True,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of middle channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution = double_convolution(
            input_channels,
            mid_channels=mid_channels,
            output_channels=input_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        x0 = self.convolution(x)
        return x + x0

`init(input_channels=2, mid_channels=16, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())` ¶

A convolutional layer class.

Parameters:

input_channels –
```
          Number of input channels.
```
mid_channels –
```
          Number of middle channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    mid_channels=16,
    kernel_size=3,
    bias=False,
    normalization=True,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of middle channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution = double_convolution(
        input_channels,
        mid_channels=mid_channels,
        output_channels=input_channels,
        kernel_size=kernel_size,
        normalization=normalization,
        bias=bias,
        activation=activation,
    )

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    x0 = self.convolution(x)
    return x + x0

`spatial_gate` ¶

Bases: Module

Spatial attention module that applies a convolution layer after channel pooling. This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.

Source code in odak/learn/models/components.py

class spatial_gate(torch.nn.Module):
    """
    Spatial attention module that applies a convolution layer after channel pooling.
    This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.
    """

    def __init__(self):
        """
        Initializes the spatial gate module.
        """
        super().__init__()
        kernel_size = 7
        self.spatial = convolution_layer(
            2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
        )

    def channel_pool(self, x):
        """
        Applies max and average pooling on the channels.

        Parameters
        ----------
        x             : torch.tensor
                        Input tensor.

        Returns
        -------
        output        : torch.tensor
                        Output tensor.
        """
        max_pool = torch.max(x, 1)[0].unsqueeze(1)
        avg_pool = torch.mean(x, 1).unsqueeze(1)
        output = torch.cat((max_pool, avg_pool), dim=1)
        return output

    def forward(self, x):
        """
        Forward pass of the SpatialGate module.

        Applies spatial attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the SpatialGate module.

        Returns
        -------
        scaled_x     : torch.tensor
                       Output tensor after applying spatial attention.
        """
        x_compress = self.channel_pool(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out)
        scaled_x = x * scale
        return scaled_x

`init()` ¶

Initializes the spatial gate module.

Source code in odak/learn/models/components.py

def __init__(self):
    """
    Initializes the spatial gate module.
    """
    super().__init__()
    kernel_size = 7
    self.spatial = convolution_layer(
        2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
    )

`channel_pool(x)` ¶

Applies max and average pooling on the channels.

Parameters:

x –
```
        Input tensor.
```

Returns:

output ( tensor ) –

Output tensor.

Source code in odak/learn/models/components.py

def channel_pool(self, x):
    """
    Applies max and average pooling on the channels.

    Parameters
    ----------
    x             : torch.tensor
                    Input tensor.

    Returns
    -------
    output        : torch.tensor
                    Output tensor.
    """
    max_pool = torch.max(x, 1)[0].unsqueeze(1)
    avg_pool = torch.mean(x, 1).unsqueeze(1)
    output = torch.cat((max_pool, avg_pool), dim=1)
    return output

`forward(x)` ¶

Forward pass of the SpatialGate module.

Applies spatial attention to the input tensor.

Parameters:

x –

       Input tensor to the SpatialGate module.

Returns:

scaled_x ( tensor ) –

Output tensor after applying spatial attention.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward pass of the SpatialGate module.

    Applies spatial attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the SpatialGate module.

    Returns
    -------
    scaled_x     : torch.tensor
                   Output tensor after applying spatial attention.
    """
    x_compress = self.channel_pool(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out)
    scaled_x = x * scale
    return scaled_x

`spatially_adaptive_convolution` ¶

Bases: Module

A spatially adaptive convolution layer.

References

C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions." C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation." C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."

Source code in odak/learn/models/components.py

class spatially_adaptive_convolution(torch.nn.Module):
    """
    A spatially adaptive convolution layer.

    References
    ----------

    C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions."
    C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation."
    C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive convolution layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn.Module
                          Activation function to apply. If None, no activation is applied.
        """
        super(spatially_adaptive_convolution, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive convolution layer.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        sa_output          : torch.tensor
                            Estimated output tensor.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Resize weight to match the input channels and kernel size
        si_kernel = self.weight.reshape(
            self.output_channels,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply spatially varying kernels
        sv_feature = input_feature * sv_kernel

        # Perform matrix multiplication
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )
        return sa_output

`init(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initializes a spatially adaptive convolution layer.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int, default: 2 ) –
```
          Number of output channels.
```

kernel_size –

          Size of the convolution kernel.

stride –
```
          Stride of the convolution.
```

padding –

          Padding added to both sides of the input.

bias –

          If True, includes a bias term in the convolution.

activation –

          Activation function to apply. If None, no activation is applied.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive convolution layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn.Module
                      Activation function to apply. If None, no activation is applied.
    """
    super(spatially_adaptive_convolution, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

`forward(x, sv_kernel_feature)` ¶

Forward pass for the spatially adaptive convolution layer.

Parameters:

x –

            Input data tensor.
            Dimension: (1, C, H, W)

sv_kernel_feature –

            Spatially varying kernel features.
            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

Returns:

sa_output ( tensor ) –

Estimated output tensor. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py

def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive convolution layer.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    sa_output          : torch.tensor
                        Estimated output tensor.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Resize weight to match the input channels and kernel size
    si_kernel = self.weight.reshape(
        self.output_channels,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply spatially varying kernels
    sv_feature = input_feature * sv_kernel

    # Perform matrix multiplication
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )
    return sa_output

`spatially_adaptive_module` ¶

Bases: Module

A spatially adaptive module that combines learned spatially adaptive convolutions.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/components.py

class spatially_adaptive_module(torch.nn.Module):
    """
    A spatially adaptive module that combines learned spatially adaptive convolutions.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive module.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super(spatially_adaptive_module, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.output_channels_for_weight = self.output_channels - 1
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels_for_weight,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive module.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        output             : torch.tensor
                            Combined output tensor from standard and spatially adaptive convolutions.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Apply sv_kernel to the input_feature
        sv_feature = input_feature * sv_kernel

        # Original spatially varying convolution output
        sv_output = torch.sum(sv_feature, dim=1).reshape(
            1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
        )

        # Reshape weight for spatially adaptive convolution
        si_kernel = self.weight.reshape(
            self.output_channels_for_weight,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply si_kernel on sv convolution output
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels_for_weight,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )

        # Combine the outputs and apply activation function
        output = self.activation(torch.cat((sv_output, sa_output), dim=1))
        return output

`init(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initializes a spatially adaptive module.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int, default: 2 ) –
```
          Number of output channels.
```

kernel_size –

          Size of the convolution kernel.

stride –
```
          Stride of the convolution.
```

padding –

          Padding added to both sides of the input.

bias –

          If True, includes a bias term in the convolution.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive module.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super(spatially_adaptive_module, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.output_channels_for_weight = self.output_channels - 1
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels_for_weight,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

`forward(x, sv_kernel_feature)` ¶

Forward pass for the spatially adaptive module.

Parameters:

x –

            Input data tensor.
            Dimension: (1, C, H, W)

sv_kernel_feature –

            Spatially varying kernel features.
            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

Returns:

output ( tensor ) –

Combined output tensor from standard and spatially adaptive convolutions. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py

def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive module.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    output             : torch.tensor
                        Combined output tensor from standard and spatially adaptive convolutions.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Apply sv_kernel to the input_feature
    sv_feature = input_feature * sv_kernel

    # Original spatially varying convolution output
    sv_output = torch.sum(sv_feature, dim=1).reshape(
        1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
    )

    # Reshape weight for spatially adaptive convolution
    si_kernel = self.weight.reshape(
        self.output_channels_for_weight,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply si_kernel on sv convolution output
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels_for_weight,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )

    # Combine the outputs and apply activation function
    output = self.activation(torch.cat((sv_output, sa_output), dim=1))
    return output

`spatially_adaptive_unet` ¶

Bases: Module

Spatially varying U-Net model based on spatially adaptive convolution.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/models.py

class spatially_adaptive_unet(torch.nn.Module):
    """
    Spatially varying U-Net model based on spatially adaptive convolution.

    References
    ----------
    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        depth=3,
        dimensions=8,
        input_channels=6,
        out_channels=6,
        kernel_size=3,
        bias=True,
        normalization=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initialize the spatially adaptive U-Net model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 3.
        dimensions : int, optional
            Number of dimensions. Default is 8.
        input_channels : int, optional
            Number of input channels. Default is 6.
        out_channels : int, optional
            Number of output channels. Default is 6.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set to True to let convolutional layers learn a bias term. Default is True.
        normalization : bool, optional
            If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
        """
        super().__init__()
        self.depth = depth
        self.out_channels = out_channels
        logger.info(
            f"Initializing spatially_adaptive_unet: "
            f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
            f"out_channels={out_channels}, kernel_size={kernel_size}, "
            f"bias={bias}, normalization={normalization}"
        )
        self.inc = convolution_layer(
            input_channels=input_channels,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )

        self.encoder = torch.nn.ModuleList()
        for i in range(self.depth + 1):  # Downsampling layers
            down_in_channels = dimensions * (2**i)
            down_out_channels = 2 * down_in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                input_channels=down_in_channels,
                mid_channels=down_in_channels,
                output_channels=down_in_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            sam = spatially_adaptive_module(
                input_channels=down_in_channels,
                output_channels=down_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
            )
            self.encoder.append(
                torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam])
            )
            logger.debug(f"Added encoder block {i}: {down_in_channels} -> {down_out_channels}")
        self.global_feature_module = torch.nn.ModuleList()
        double_convolution_layer = double_convolution(
            input_channels=dimensions * (2 ** (depth + 1)),
            mid_channels=dimensions * (2 ** (depth + 1)),
            output_channels=dimensions * (2 ** (depth + 1)),
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        global_feature_layer = global_feature_module(
            input_channels=dimensions * (2 ** (depth + 1)),
            mid_channels=dimensions * (2 ** (depth + 1)),
            output_channels=dimensions * (2 ** (depth + 1)),
            kernel_size=kernel_size,
            bias=bias,
            activation=torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_module.append(
            torch.nn.ModuleList([double_convolution_layer, global_feature_layer])
        )
        logger.debug("Added global feature module")

        self.decoder = torch.nn.ModuleList()
        for i in range(depth, -1, -1):
            up_in_channels = dimensions * (2 ** (i + 1))
            up_mid_channels = up_in_channels // 2
            if i == 0:
                up_out_channels = self.out_channels
                upsample_layer = upsample_convtranspose2d_layer(
                    input_channels=up_in_channels,
                    output_channels=up_mid_channels,
                    kernel_size=2,
                    stride=2,
                    bias=bias,
                )
                conv_layer = torch.nn.Sequential(
                    convolution_layer(
                        input_channels=up_mid_channels,
                        output_channels=up_mid_channels,
                        kernel_size=kernel_size,
                        bias=bias,
                        normalization=normalization,
                        activation=activation,
                    ),
                    convolution_layer(
                        input_channels=up_mid_channels,
                        output_channels=up_out_channels,
                        kernel_size=1,
                        bias=bias,
                        normalization=normalization,
                        activation=None,
                    ),
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
                logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
            else:
                up_out_channels = up_in_channels // 2
                upsample_layer = upsample_convtranspose2d_layer(
                    input_channels=up_in_channels,
                    output_channels=up_mid_channels,
                    kernel_size=2,
                    stride=2,
                    bias=bias,
                )
                conv_layer = double_convolution(
                    input_channels=up_mid_channels,
                    mid_channels=up_mid_channels,
                    output_channels=up_out_channels,
                    kernel_size=kernel_size,
                    bias=bias,
                    normalization=normalization,
                    activation=activation,
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
                logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        logger.info("spatially_adaptive_unet initialization completed")

    def forward(self, sv_kernel, field):
        """
        Forward pass of the spatially adaptive U-Net.

        Parameters
        ----------
        sv_kernel : list of torch.Tensor
            Learned spatially varying kernels.
            Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
            where C_i, H_i, and W_i represent the channel, height, and width
            of each feature at a certain scale.

        field : torch.Tensor
            Input field data.
            Dimension: (1, 6, H, W)

        Returns
        -------
        target_field : torch.Tensor
            Estimated output.
            Dimension: (1, 6, H, W)
        """
        x = self.inc(field)
        downsampling_outputs = [x]
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer[0](downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
            sam_output = down_layer[2](
                x_down + down_layer[1](x_down), sv_kernel[self.depth - i]
            )
            downsampling_outputs.append(sam_output)
        global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
        global_feature = self.global_feature_module[0][1](
            downsampling_outputs[-1], global_feature
        )
        downsampling_outputs.append(global_feature)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate(self.decoder):
            x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
            x_up = up_layer[1](x_up)
        result = x_up
        return result

`init(depth=3, dimensions=8, input_channels=6, out_channels=6, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initialize the spatially adaptive U-Net model.

Parameters:

depth (int, default: 3 ) –

Number of upsampling and downsampling layers. Default is 3.
dimensions (int, default: 8 ) –

Number of dimensions. Default is 8.
input_channels (int, default: 6 ) –

Number of input channels. Default is 6.
out_channels (int, default: 6 ) –

Number of output channels. Default is 6.
kernel_size (int, default: 3 ) –

Kernel size for convolutional layers. Default is 3.
bias (bool, default: True ) –

Set to True to let convolutional layers learn a bias term. Default is True.
normalization (bool, default: False ) –

If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
activation (Module, default: LeakyReLU(0.2, inplace=True) ) –

Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).

Source code in odak/learn/models/models.py

def __init__(
    self,
    depth=3,
    dimensions=8,
    input_channels=6,
    out_channels=6,
    kernel_size=3,
    bias=True,
    normalization=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initialize the spatially adaptive U-Net model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 3.
    dimensions : int, optional
        Number of dimensions. Default is 8.
    input_channels : int, optional
        Number of input channels. Default is 6.
    out_channels : int, optional
        Number of output channels. Default is 6.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set to True to let convolutional layers learn a bias term. Default is True.
    normalization : bool, optional
        If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
    """
    super().__init__()
    self.depth = depth
    self.out_channels = out_channels
    logger.info(
        f"Initializing spatially_adaptive_unet: "
        f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
        f"out_channels={out_channels}, kernel_size={kernel_size}, "
        f"bias={bias}, normalization={normalization}"
    )
    self.inc = convolution_layer(
        input_channels=input_channels,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )

    self.encoder = torch.nn.ModuleList()
    for i in range(self.depth + 1):  # Downsampling layers
        down_in_channels = dimensions * (2**i)
        down_out_channels = 2 * down_in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
            input_channels=down_in_channels,
            mid_channels=down_in_channels,
            output_channels=down_in_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        sam = spatially_adaptive_module(
            input_channels=down_in_channels,
            output_channels=down_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )
        self.encoder.append(
            torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam])
        )
        logger.debug(f"Added encoder block {i}: {down_in_channels} -> {down_out_channels}")
    self.global_feature_module = torch.nn.ModuleList()
    double_convolution_layer = double_convolution(
        input_channels=dimensions * (2 ** (depth + 1)),
        mid_channels=dimensions * (2 ** (depth + 1)),
        output_channels=dimensions * (2 ** (depth + 1)),
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    global_feature_layer = global_feature_module(
        input_channels=dimensions * (2 ** (depth + 1)),
        mid_channels=dimensions * (2 ** (depth + 1)),
        output_channels=dimensions * (2 ** (depth + 1)),
        kernel_size=kernel_size,
        bias=bias,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_module.append(
        torch.nn.ModuleList([double_convolution_layer, global_feature_layer])
    )
    logger.debug("Added global feature module")

    self.decoder = torch.nn.ModuleList()
    for i in range(depth, -1, -1):
        up_in_channels = dimensions * (2 ** (i + 1))
        up_mid_channels = up_in_channels // 2
        if i == 0:
            up_out_channels = self.out_channels
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = torch.nn.Sequential(
                convolution_layer(
                    input_channels=up_mid_channels,
                    output_channels=up_mid_channels,
                    kernel_size=kernel_size,
                    bias=bias,
                    normalization=normalization,
                    activation=activation,
                ),
                convolution_layer(
                    input_channels=up_mid_channels,
                    output_channels=up_out_channels,
                    kernel_size=1,
                    bias=bias,
                    normalization=normalization,
                    activation=None,
                ),
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        else:
            up_out_channels = up_in_channels // 2
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = double_convolution(
                input_channels=up_mid_channels,
                mid_channels=up_mid_channels,
                output_channels=up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
    logger.info("spatially_adaptive_unet initialization completed")

`forward(sv_kernel, field)` ¶

Forward pass of the spatially adaptive U-Net.

Parameters:

sv_kernel (list of torch.Tensor) –

Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.
field (Tensor) –

Input field data. Dimension: (1, 6, H, W)

Returns:

target_field ( Tensor ) –

Estimated output. Dimension: (1, 6, H, W)

Source code in odak/learn/models/models.py

def forward(self, sv_kernel, field):
    """
    Forward pass of the spatially adaptive U-Net.

    Parameters
    ----------
    sv_kernel : list of torch.Tensor
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.

    field : torch.Tensor
        Input field data.
        Dimension: (1, 6, H, W)

    Returns
    -------
    target_field : torch.Tensor
        Estimated output.
        Dimension: (1, 6, H, W)
    """
    x = self.inc(field)
    downsampling_outputs = [x]
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer[0](downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
        sam_output = down_layer[2](
            x_down + down_layer[1](x_down), sv_kernel[self.depth - i]
        )
        downsampling_outputs.append(sam_output)
    global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
    global_feature = self.global_feature_module[0][1](
        downsampling_outputs[-1], global_feature
    )
    downsampling_outputs.append(global_feature)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate(self.decoder):
        x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
        x_up = up_layer[1](x_up)
    result = x_up
    return result

`spatially_varying_kernel_generation_model` ¶

Bases: Module

Spatially_varying_kernel_generation_model revised from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Refer to: J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.

Source code in odak/learn/models/models.py

class spatially_varying_kernel_generation_model(torch.nn.Module):
    """
    Spatially_varying_kernel_generation_model revised from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Refer to:
    J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.
    """

    def __init__(
        self,
        depth=3,
        dimensions=8,
        input_channels=7,
        kernel_size=3,
        bias=True,
        normalization=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initialize the spatially varying kernel generation model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 3.
        dimensions : int, optional
            Number of dimensions. Default is 8.
        input_channels : int, optional
            Number of input channels. Default is 7.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set to True to let convolutional layers learn a bias term. Default is True.
        normalization : bool, optional
            If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
        """
        super().__init__()
        self.depth = depth
        logger.info(
            f"Initializing spatially_varying_kernel_generation_model: "
            f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
            f"kernel_size={kernel_size}, bias={bias}, normalization={normalization}"
        )
        self.inc = convolution_layer(
            input_channels=input_channels,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )

        self.encoder = torch.nn.ModuleList()
        for i in range(depth + 1):  # downsampling layers
            if i == 0:
                in_channels = dimensions * (2**i)
                out_channels = dimensions * (2**i)
            elif i == depth:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = dimensions * (2 ** (i - 1))
            else:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = 2 * in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                input_channels=in_channels,
                mid_channels=in_channels,
                output_channels=out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.encoder.append(pooling_layer)
            self.encoder.append(double_convolution_layer)
            logger.debug(f"Added encoder block {i}: {in_channels} -> {out_channels}")

        self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
        for i in range(depth, -1, -1):
            if i == 1:
                svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
            else:
                svf_in_channels = 2 ** (self.depth + i) + 1
            svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
            svf_mid_channels = dimensions * (2 ** (self.depth - 1))
            spatially_varying_kernel_generation = torch.nn.ModuleList()
            for j in range(i, -1, -1):
                pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
                spatially_varying_kernel_generation.append(pooling_layer)
            kernel_generation_block = torch.nn.Sequential(
                torch.nn.Conv2d(
                    in_channels=svf_in_channels,
                    out_channels=svf_mid_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
                activation,
                torch.nn.Conv2d(
                    in_channels=svf_mid_channels,
                    out_channels=svf_mid_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
                activation,
                torch.nn.Conv2d(
                    in_channels=svf_mid_channels,
                    out_channels=svf_out_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
            )
            spatially_varying_kernel_generation.append(kernel_generation_block)
            self.spatially_varying_feature.append(spatially_varying_kernel_generation)
            logger.debug(f"Added SVF block {i}: {svf_in_channels} -> {svf_out_channels}")

        self.decoder = torch.nn.ModuleList()
        global_feature_layer = global_feature_module(  # global feature layer
            input_channels=dimensions * (2 ** (depth - 1)),
            mid_channels=dimensions * (2 ** (depth - 1)),
            output_channels=dimensions * (2 ** (depth - 1)),
            kernel_size=kernel_size,
            bias=bias,
            activation=torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.decoder.append(global_feature_layer)
        for i in range(depth, 0, -1):
            if i == 2:
                up_in_channels = (dimensions // 2) * (2**i)
                up_out_channels = up_in_channels
                up_mid_channels = up_in_channels
            elif i == 1:
                up_in_channels = dimensions * 2
                up_out_channels = dimensions
                up_mid_channels = up_out_channels
            else:
                up_in_channels = (dimensions // 2) * (2**i)
                up_out_channels = up_in_channels // 2
                up_mid_channels = up_in_channels
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = double_convolution(
                input_channels=up_mid_channels,
                output_channels=up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        logger.info("spatially_varying_kernel_generation_model initialization completed")

    def forward(self, focal_surface, field):
        """
        Forward pass of the spatially varying kernel generation model.

        Parameters
        ----------
        focal_surface : torch.Tensor
            Input focal surface data.
            Dimension: (1, 1, H, W)

        field : torch.Tensor
            Input field data.
            Dimension: (1, 6, H, W)

        Returns
        -------
        sv_kernel : list of torch.Tensor
            Learned spatially varying kernels.
            Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
            where C_i, H_i, and W_i represent the channel, height, and width
            of each feature at a certain scale.
        """
        x = self.inc(torch.cat((focal_surface, field), dim=1))
        downsampling_outputs = [focal_surface]
        downsampling_outputs.append(x)
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        sv_kernels = []
        for i, (up_layer, svf_layer) in enumerate(
            zip(self.decoder, self.spatially_varying_feature)
        ):
            if i == 0:
                global_feature = up_layer(
                    downsampling_outputs[-2], downsampling_outputs[-1]
                )
                downsampling_outputs[-1] = global_feature
                sv_feature = [global_feature, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                sv_feature = [
                    sv_feature[0],
                    sv_feature[1],
                    sv_feature[4],
                    sv_feature[2],
                    sv_feature[3],
                ]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
                sv_kernels.append(sv_kernel)
            else:
                x_up = up_layer[0](
                    downsampling_outputs[-1],
                    downsampling_outputs[2 * (self.depth + 1 - i) + 1],
                )
                x_up = up_layer[1](x_up)
                downsampling_outputs[-1] = x_up
                sv_feature = [x_up, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                if i == 1:
                    sv_feature = [
                        sv_feature[0],
                        sv_feature[1],
                        sv_feature[3],
                        sv_feature[2],
                    ]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
                sv_kernels.append(sv_kernel)
        return sv_kernels

`init(depth=3, dimensions=8, input_channels=7, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initialize the spatially varying kernel generation model.

Parameters:

depth (int, default: 3 ) –

Number of upsampling and downsampling layers. Default is 3.
dimensions (int, default: 8 ) –

Number of dimensions. Default is 8.
input_channels (int, default: 7 ) –

Number of input channels. Default is 7.
kernel_size (int, default: 3 ) –

Kernel size for convolutional layers. Default is 3.
bias (bool, default: True ) –

Set to True to let convolutional layers learn a bias term. Default is True.
normalization (bool, default: False ) –

If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
activation (Module, default: LeakyReLU(0.2, inplace=True) ) –

Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).

Source code in odak/learn/models/models.py

def __init__(
    self,
    depth=3,
    dimensions=8,
    input_channels=7,
    kernel_size=3,
    bias=True,
    normalization=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initialize the spatially varying kernel generation model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 3.
    dimensions : int, optional
        Number of dimensions. Default is 8.
    input_channels : int, optional
        Number of input channels. Default is 7.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set to True to let convolutional layers learn a bias term. Default is True.
    normalization : bool, optional
        If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
    """
    super().__init__()
    self.depth = depth
    logger.info(
        f"Initializing spatially_varying_kernel_generation_model: "
        f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
        f"kernel_size={kernel_size}, bias={bias}, normalization={normalization}"
    )
    self.inc = convolution_layer(
        input_channels=input_channels,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )

    self.encoder = torch.nn.ModuleList()
    for i in range(depth + 1):  # downsampling layers
        if i == 0:
            in_channels = dimensions * (2**i)
            out_channels = dimensions * (2**i)
        elif i == depth:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = dimensions * (2 ** (i - 1))
        else:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = 2 * in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
            input_channels=in_channels,
            mid_channels=in_channels,
            output_channels=out_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.encoder.append(pooling_layer)
        self.encoder.append(double_convolution_layer)
        logger.debug(f"Added encoder block {i}: {in_channels} -> {out_channels}")

    self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
    for i in range(depth, -1, -1):
        if i == 1:
            svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
        else:
            svf_in_channels = 2 ** (self.depth + i) + 1
        svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
        svf_mid_channels = dimensions * (2 ** (self.depth - 1))
        spatially_varying_kernel_generation = torch.nn.ModuleList()
        for j in range(i, -1, -1):
            pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
            spatially_varying_kernel_generation.append(pooling_layer)
        kernel_generation_block = torch.nn.Sequential(
            torch.nn.Conv2d(
                in_channels=svf_in_channels,
                out_channels=svf_mid_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            activation,
            torch.nn.Conv2d(
                in_channels=svf_mid_channels,
                out_channels=svf_mid_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            activation,
            torch.nn.Conv2d(
                in_channels=svf_mid_channels,
                out_channels=svf_out_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )
        spatially_varying_kernel_generation.append(kernel_generation_block)
        self.spatially_varying_feature.append(spatially_varying_kernel_generation)
        logger.debug(f"Added SVF block {i}: {svf_in_channels} -> {svf_out_channels}")

    self.decoder = torch.nn.ModuleList()
    global_feature_layer = global_feature_module(  # global feature layer
        input_channels=dimensions * (2 ** (depth - 1)),
        mid_channels=dimensions * (2 ** (depth - 1)),
        output_channels=dimensions * (2 ** (depth - 1)),
        kernel_size=kernel_size,
        bias=bias,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.decoder.append(global_feature_layer)
    for i in range(depth, 0, -1):
        if i == 2:
            up_in_channels = (dimensions // 2) * (2**i)
            up_out_channels = up_in_channels
            up_mid_channels = up_in_channels
        elif i == 1:
            up_in_channels = dimensions * 2
            up_out_channels = dimensions
            up_mid_channels = up_out_channels
        else:
            up_in_channels = (dimensions // 2) * (2**i)
            up_out_channels = up_in_channels // 2
            up_mid_channels = up_in_channels
        upsample_layer = upsample_convtranspose2d_layer(
            input_channels=up_in_channels,
            output_channels=up_mid_channels,
            kernel_size=2,
            stride=2,
            bias=bias,
        )
        conv_layer = double_convolution(
            input_channels=up_mid_channels,
            output_channels=up_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
        logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
    logger.info("spatially_varying_kernel_generation_model initialization completed")

`forward(focal_surface, field)` ¶

Forward pass of the spatially varying kernel generation model.

Parameters:

focal_surface (Tensor) –

Input focal surface data. Dimension: (1, 1, H, W)
field (Tensor) –

Input field data. Dimension: (1, 6, H, W)

Returns:

sv_kernel ( list of torch.Tensor ) –

Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.

Source code in odak/learn/models/models.py

def forward(self, focal_surface, field):
    """
    Forward pass of the spatially varying kernel generation model.

    Parameters
    ----------
    focal_surface : torch.Tensor
        Input focal surface data.
        Dimension: (1, 1, H, W)

    field : torch.Tensor
        Input field data.
        Dimension: (1, 6, H, W)

    Returns
    -------
    sv_kernel : list of torch.Tensor
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.
    """
    x = self.inc(torch.cat((focal_surface, field), dim=1))
    downsampling_outputs = [focal_surface]
    downsampling_outputs.append(x)
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    sv_kernels = []
    for i, (up_layer, svf_layer) in enumerate(
        zip(self.decoder, self.spatially_varying_feature)
    ):
        if i == 0:
            global_feature = up_layer(
                downsampling_outputs[-2], downsampling_outputs[-1]
            )
            downsampling_outputs[-1] = global_feature
            sv_feature = [global_feature, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            sv_feature = [
                sv_feature[0],
                sv_feature[1],
                sv_feature[4],
                sv_feature[2],
                sv_feature[3],
            ]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
            sv_kernels.append(sv_kernel)
        else:
            x_up = up_layer[0](
                downsampling_outputs[-1],
                downsampling_outputs[2 * (self.depth + 1 - i) + 1],
            )
            x_up = up_layer[1](x_up)
            downsampling_outputs[-1] = x_up
            sv_feature = [x_up, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            if i == 1:
                sv_feature = [
                    sv_feature[0],
                    sv_feature[1],
                    sv_feature[3],
                    sv_feature[2],
                ]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
            sv_kernels.append(sv_kernel)
    return sv_kernels

`unet` ¶

Bases: Module

A U-Net model, heavily inspired from https://github.com/milesial/Pytorch-UNet/tree/master/unet and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.

Source code in odak/learn/models/models.py

class unet(torch.nn.Module):
    """
    A U-Net model, heavily inspired from `https://github.com/milesial/Pytorch-UNet/tree/master/unet` and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.
    """

    def __init__(
        self,
        depth=4,
        dimensions=64,
        input_channels=2,
        output_channels=1,
        bilinear=False,
        kernel_size=3,
        bias=False,
        activation=torch.nn.ReLU(inplace=True),
    ):
        """
        Initialize the U-Net model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 4.
        dimensions : int, optional
            Number of dimensions. Default is 64.
        input_channels : int, optional
            Number of input channels. Default is 2.
        output_channels : int, optional
            Number of output channels. Default is 1.
        bilinear : bool, optional
            Uses bilinear upsampling in upsampling layers when set True. Default is False.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set True to let convolutional layers learn a bias term. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).
        """
        super(unet, self).__init__()
        logger.info(
            f"Initializing U-Net: depth={depth}, dimensions={dimensions}, "
            f"input_channels={input_channels}, output_channels={output_channels}, "
            f"bilinear={bilinear}, kernel_size={kernel_size}"
        )
        self.inc = double_convolution(
            input_channels=input_channels,
            mid_channels=dimensions,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )

        self.downsampling_layers = torch.nn.ModuleList()
        self.upsampling_layers = torch.nn.ModuleList()
        for i in range(depth):  # downsampling layers
            in_channels = dimensions * (2**i)
            out_channels = dimensions * (2 ** (i + 1))
            down_layer = downsample_layer(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
            )
            self.downsampling_layers.append(down_layer)
            logger.debug(f"Added downsampling layer {i}: {in_channels} -> {out_channels}")

        for i in range(depth - 1, -1, -1):  # upsampling layers
            up_in_channels = dimensions * (2 ** (i + 1))
            up_out_channels = dimensions * (2**i)
            up_layer = upsample_layer(
                up_in_channels,
                up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
                bilinear=bilinear,
            )
            self.upsampling_layers.append(up_layer)
            logger.debug(f"Added upsampling layer: {up_in_channels} -> {up_out_channels}")
        self.outc = torch.nn.Conv2d(
            dimensions,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        logger.info("U-Net initialization completed")

    def forward(self, x):
        """
        Forward pass of the U-Net.

        Parameters
        ----------
        x : torch.Tensor
            Input data.

        Returns
        -------
        result : torch.Tensor
            Estimated output.
        """
        downsampling_outputs = [self.inc(x)]
        for down_layer in self.downsampling_layers:
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate((self.upsampling_layers)):
            x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])
        result = self.outc(x_up)
        return result

`init(depth=4, dimensions=64, input_channels=2, output_channels=1, bilinear=False, kernel_size=3, bias=False, activation=torch.nn.ReLU(inplace=True))` ¶

Initialize the U-Net model.

Parameters:

depth (int, default: 4 ) –

Number of upsampling and downsampling layers. Default is 4.
dimensions (int, default: 64 ) –

Number of dimensions. Default is 64.
input_channels (int, default: 2 ) –

Number of input channels. Default is 2.
output_channels (int, default: 1 ) –

Number of output channels. Default is 1.
bilinear (bool, default: False ) –

Uses bilinear upsampling in upsampling layers when set True. Default is False.
kernel_size (int, default: 3 ) –

Kernel size for convolutional layers. Default is 3.
bias (bool, default: False ) –

Set True to let convolutional layers learn a bias term. Default is False.
activation (Module, default: ReLU(inplace=True) ) –

Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).

Source code in odak/learn/models/models.py

def __init__(
    self,
    depth=4,
    dimensions=64,
    input_channels=2,
    output_channels=1,
    bilinear=False,
    kernel_size=3,
    bias=False,
    activation=torch.nn.ReLU(inplace=True),
):
    """
    Initialize the U-Net model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 4.
    dimensions : int, optional
        Number of dimensions. Default is 64.
    input_channels : int, optional
        Number of input channels. Default is 2.
    output_channels : int, optional
        Number of output channels. Default is 1.
    bilinear : bool, optional
        Uses bilinear upsampling in upsampling layers when set True. Default is False.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set True to let convolutional layers learn a bias term. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).
    """
    super(unet, self).__init__()
    logger.info(
        f"Initializing U-Net: depth={depth}, dimensions={dimensions}, "
        f"input_channels={input_channels}, output_channels={output_channels}, "
        f"bilinear={bilinear}, kernel_size={kernel_size}"
    )
    self.inc = double_convolution(
        input_channels=input_channels,
        mid_channels=dimensions,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        activation=activation,
    )

    self.downsampling_layers = torch.nn.ModuleList()
    self.upsampling_layers = torch.nn.ModuleList()
    for i in range(depth):  # downsampling layers
        in_channels = dimensions * (2**i)
        out_channels = dimensions * (2 ** (i + 1))
        down_layer = downsample_layer(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )
        self.downsampling_layers.append(down_layer)
        logger.debug(f"Added downsampling layer {i}: {in_channels} -> {out_channels}")

    for i in range(depth - 1, -1, -1):  # upsampling layers
        up_in_channels = dimensions * (2 ** (i + 1))
        up_out_channels = dimensions * (2**i)
        up_layer = upsample_layer(
            up_in_channels,
            up_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
            bilinear=bilinear,
        )
        self.upsampling_layers.append(up_layer)
        logger.debug(f"Added upsampling layer: {up_in_channels} -> {up_out_channels}")
    self.outc = torch.nn.Conv2d(
        dimensions,
        output_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    logger.info("U-Net initialization completed")

`forward(x)` ¶

Forward pass of the U-Net.

Parameters:

x (Tensor) –

Input data.

Returns:

result ( Tensor ) –

Estimated output.

Source code in odak/learn/models/models.py

def forward(self, x):
    """
    Forward pass of the U-Net.

    Parameters
    ----------
    x : torch.Tensor
        Input data.

    Returns
    -------
    result : torch.Tensor
        Estimated output.
    """
    downsampling_outputs = [self.inc(x)]
    for down_layer in self.downsampling_layers:
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate((self.upsampling_layers)):
        x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])
    result = self.outc(x_up)
    return result

`upsample_convtranspose2d_layer` ¶

Bases: Module

An upsampling convtranspose2d layer.

Source code in odak/learn/models/components.py

class upsample_convtranspose2d_layer(torch.nn.Module):
    """
    An upsampling convtranspose2d layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=2,
        stride=2,
        bias=False,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        """
        super().__init__()
        self.up = torch.nn.ConvTranspose2d(
            in_channels=input_channels,
            out_channels=output_channels,
            bias=bias,
            kernel_size=kernel_size,
            stride=stride,
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        result = x1 + x2
        return result

`init(input_channels, output_channels, kernel_size=2, stride=2, bias=False)` ¶

A downscaling component with a double convolution.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=2,
    stride=2,
    bias=False,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    """
    super().__init__()
    self.up = torch.nn.ConvTranspose2d(
        in_channels=input_channels,
        out_channels=output_channels,
        bias=bias,
        kernel_size=kernel_size,
        stride=stride,
    )

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Result of the forward operation

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    result = x1 + x2
    return result

`upsample_layer` ¶

Bases: Module

An upsampling convolutional layer.

Source code in odak/learn/models/components.py

class upsample_layer(torch.nn.Module):
    """
    An upsampling convolutional layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
        bilinear=True,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        bilinear        : bool
                          If set to True, bilinear sampling is used.
        """
        super(upsample_layer, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(
                scale_factor=2, mode="bilinear", align_corners=True
            )
            self.conv = double_convolution(
                input_channels=input_channels + output_channels,
                mid_channels=input_channels // 2,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )
        else:
            self.up = torch.nn.ConvTranspose2d(
                input_channels, input_channels // 2, kernel_size=2, stride=2
            )
            self.conv = double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        x = torch.cat([x2, x1], dim=1)
        result = self.conv(x)
        return result

`init(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU(), bilinear=True)` ¶

A downscaling component with a double convolution.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

bilinear –

          If set to True, bilinear sampling is used.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
    bilinear=True,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    bilinear        : bool
                      If set to True, bilinear sampling is used.
    """
    super(upsample_layer, self).__init__()
    if bilinear:
        self.up = torch.nn.Upsample(
            scale_factor=2, mode="bilinear", align_corners=True
        )
        self.conv = double_convolution(
            input_channels=input_channels + output_channels,
            mid_channels=input_channels // 2,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )
    else:
        self.up = torch.nn.ConvTranspose2d(
            input_channels, input_channels // 2, kernel_size=2, stride=2
        )
        self.conv = double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Result of the forward operation

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    x = torch.cat([x2, x1], dim=1)
    result = self.conv(x)
    return result

`evaluate_3d_gaussians(points, centers=torch.zeros(1, 3), scales=torch.ones(1, 3), angles=torch.zeros(1, 3), opacity=torch.ones(1, 1))` ¶

Evaluate 3D Gaussian functions at given points, with optional rotation.

Parameters:

points –

      The 3D points at which to evaluate the Gaussians.

centers –
```
      The centers of the Gaussians.
```

scales –

      The standard deviations (spread) of the Gaussians along each axis.

angles –

      The rotation angles (in radians) for each Gaussian, applied to the points.

opacity –
```
      Opacity of the Gaussians.
```

Returns:

intensities ( (Tensor, shape[n, 1]) ) –

The evaluated Gaussian intensities at each point.

Source code in odak/learn/tools/function.py

def evaluate_3d_gaussians(
    points,
    centers=torch.zeros(1, 3),
    scales=torch.ones(1, 3),
    angles=torch.zeros(1, 3),
    opacity=torch.ones(1, 1),
) -> torch.Tensor:
    """
    Evaluate 3D Gaussian functions at given points, with optional rotation.

    Parameters
    ----------
    points      : torch.Tensor, shape [n, 3]
                  The 3D points at which to evaluate the Gaussians.
    centers     : torch.Tensor, shape [n, 3]
                  The centers of the Gaussians.
    scales      : torch.Tensor, shape [n, 3]
                  The standard deviations (spread) of the Gaussians along each axis.
    angles      : torch.Tensor, shape [n, 3]
                  The rotation angles (in radians) for each Gaussian, applied to the points.
    opacity     : torch.Tensor, shape [n, 1]
                  Opacity of the Gaussians.

    Returns
    -------
    intensities : torch.Tensor, shape [n, 1]
                  The evaluated Gaussian intensities at each point.
    """
    points_rotated, _, _, _ = rotate_points(point=points, angles=angles, origin=centers)
    points_rotated = points_rotated - centers.unsqueeze(0)
    scales = scales.unsqueeze(0)
    exponent = torch.sum(-0.5 * (points_rotated / scales) ** 2, dim=-1)
    divider = (scales[:, :, 0] * scales[:, :, 1] * scales[:, :, 2]) * (
        2.0 * torch.pi
    ) ** (3.0 / 2.0)
    exponential = torch.exp(exponent)
    intensities = exponential / divider
    intensities = opacity.T * intensities
    return intensities

`gaussian(x, multiplier=1.0)` ¶

A Gaussian non-linear activation. For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

Parameters:

x –
```
       Input data.
```
multiplier –
```
       Multiplier.
```

Returns:

result ( float or tensor ) –

Ouput data.

Source code in odak/learn/models/components.py

def gaussian(x, multiplier=1.0):
    """
    A Gaussian non-linear activation.
    For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

    Parameters
    ----------
    x            : float or torch.tensor
                   Input data.
    multiplier   : float or torch.tensor
                   Multiplier.

    Returns
    -------
    result       : float or torch.tensor
                   Ouput data.
    """
    result = torch.exp(-((multiplier * x) ** 2))
    return result

`swish(x)` ¶

A swish non-linear activation. For more details: https://en.wikipedia.org/wiki/Swish_function

Parameters:

x –
```
         Input.
```

Returns:

out ( float or tensor ) –

Output.

Source code in odak/learn/models/components.py

def swish(x):
    """
    A swish non-linear activation.
    For more details: https://en.wikipedia.org/wiki/Swish_function

    Parameters
    -----------
    x              : float or torch.tensor
                     Input.

    Returns
    -------
    out            : float or torch.tensor
                     Output.
    """
    out = x * torch.sigmoid(x)
    return out

`validate_path(path, allowed_extensions=None)` ¶

Validates a file path for security safety.

Parameters:

path –
```
          Path to validate.
```

allowed_extensions (list, default: None ) –

              List of allowed extensions (e.g., ['.png', '.jpg']).
              If None, all extensions are allowed.

Returns:

safe_path ( str ) –

The validated and secured path (with tilde expanded).

Raises:

ValueError : If path traversal attempt detected or extension not allowed. –
TypeError : If path is not a string. –

Source code in odak/tools/file.py

def validate_path(path, allowed_extensions=None):
    """
    Validates a file path for security safety.

    Parameters
    ----------
    path            : str
                      Path to validate.
    allowed_extensions : list, optional
                          List of allowed extensions (e.g., ['.png', '.jpg']).
                          If None, all extensions are allowed.

    Returns
    -------
    safe_path       : str
                      The validated and secured path (with tilde expanded).

    Raises
    ------
    ValueError      : If path traversal attempt detected or extension not allowed.
    TypeError       : If path is not a string.
    """
    if not isinstance(path, str):
        raise TypeError(f"Path must be a string, got {type(path).__name__}")

    # Check for null bytes before expanding user (Windows path injection)
    if "\x00" in path:
        raise ValueError("Null bytes not allowed in path")

    # Check for path traversal patterns BEFORE expanding
    if ".." in path.split(os.sep) or ".." in path.replace(os.sep, "/").split("/"):
        if re.search(r"(^|[/\\])\.\.([/\\]|$)", path):
            raise ValueError("Path traversal detected: '..' not allowed in path")

    # Check for URL protocols before expanding
    path_lower = path.lower()
    if re.search(r"https?://|ftp://", path_lower):
        raise ValueError("URL protocols not allowed in file paths")

    path = os.path.expanduser(path)
    resolved_path = os.path.abspath(path)

    # Check for UNC or device paths on Windows
    if re.match(r"\\\\\\\|\\\\\\?\.\\", path) or path.startswith("//."):
        raise ValueError("UNC/device paths not allowed")

    if len(resolved_path) > 260:  # Windows MAX_PATH limit
        raise ValueError("Path exceeds maximum allowed length (260 characters)")

    if allowed_extensions is not None:
        _, file_ext = os.path.splitext(path)
        ext_lower = file_ext.lower()
        allowed_normalized = [
            ext.lower() if ext.startswith(".") else f".{ext}"
            for ext in allowed_extensions
        ]
        if ext_lower not in allowed_normalized:
            raise ValueError(
                f"File extension '{file_ext}' is not allowed. "
                f"Allowed: {allowed_extensions}"
            )

    logger.debug(f"Path validated: {path}")
    return resolved_path

`channel_gate` ¶

Bases: Module

Channel attention module with various pooling strategies. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py

class channel_gate(torch.nn.Module):
    """
    Channel attention module with various pooling strategies.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
        """
        Initializes the channel gate module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the intermediate layer.
        pool_types      : list
                          List of pooling operations to apply.
        """
        super().__init__()
        self.gate_channels = gate_channels
        hidden_channels = gate_channels // reduction_ratio
        if hidden_channels == 0:
            hidden_channels = 1
        self.mlp = torch.nn.Sequential(
            convolutional_block_attention.Flatten(),
            torch.nn.Linear(gate_channels, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_channels, gate_channels),
        )
        self.pool_types = pool_types

    def forward(self, x):
        """
        Forward pass of the ChannelGate module.

        Applies channel-wise attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the ChannelGate module.

        Returns
        -------
        output       : torch.tensor
                       Output tensor after applying channel attention.
        """
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == "avg":
                pool = torch.nn.functional.avg_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            elif pool_type == "max":
                pool = torch.nn.functional.max_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            channel_att_raw = self.mlp(pool)
            channel_att_sum = (
                channel_att_raw
                if channel_att_sum is None
                else channel_att_sum + channel_att_raw
            )
        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
        output = x * scale
        return output

`init(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])` ¶

Initializes the channel gate module.

Parameters:

gate_channels –

          Number of channels of the input feature map.

reduction_ratio (int, default: 16 ) –

          Reduction ratio for the intermediate layer.

pool_types –

          List of pooling operations to apply.

Source code in odak/learn/models/components.py

def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
    """
    Initializes the channel gate module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the intermediate layer.
    pool_types      : list
                      List of pooling operations to apply.
    """
    super().__init__()
    self.gate_channels = gate_channels
    hidden_channels = gate_channels // reduction_ratio
    if hidden_channels == 0:
        hidden_channels = 1
    self.mlp = torch.nn.Sequential(
        convolutional_block_attention.Flatten(),
        torch.nn.Linear(gate_channels, hidden_channels),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_channels, gate_channels),
    )
    self.pool_types = pool_types

`forward(x)` ¶

Forward pass of the ChannelGate module.

Applies channel-wise attention to the input tensor.

Parameters:

x –

       Input tensor to the ChannelGate module.

Returns:

output ( tensor ) –

Output tensor after applying channel attention.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward pass of the ChannelGate module.

    Applies channel-wise attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the ChannelGate module.

    Returns
    -------
    output       : torch.tensor
                   Output tensor after applying channel attention.
    """
    channel_att_sum = None
    for pool_type in self.pool_types:
        if pool_type == "avg":
            pool = torch.nn.functional.avg_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        elif pool_type == "max":
            pool = torch.nn.functional.max_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        channel_att_raw = self.mlp(pool)
        channel_att_sum = (
            channel_att_raw
            if channel_att_sum is None
            else channel_att_sum + channel_att_raw
        )
    scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
    output = x * scale
    return output

`convolution_layer` ¶

Bases: Module

A convolution layer.

Source code in odak/learn/models/components.py

class convolution_layer(torch.nn.Module):
    """
    A convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        bias=False,
        stride=1,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        layers = [
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=kernel_size // 2,
                bias=bias,
            )
        ]
        if normalization:
            layers.append(torch.nn.BatchNorm2d(output_channels))
        if activation:
            layers.append(activation)
        self.model = torch.nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

`init(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=False, activation=torch.nn.ReLU())` ¶

A convolutional layer class.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int, default: 2 ) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    bias=False,
    stride=1,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    layers = [
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=kernel_size // 2,
            bias=bias,
        )
    ]
    if normalization:
        layers.append(torch.nn.BatchNorm2d(output_channels))
    if activation:
        layers.append(activation)
    self.model = torch.nn.Sequential(*layers)

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

`convolutional_block_attention` ¶

Bases: Module

Convolutional Block Attention Module (CBAM) class. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py

class convolutional_block_attention(torch.nn.Module):
    """
    Convolutional Block Attention Module (CBAM) class.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(
        self,
        gate_channels,
        reduction_ratio=16,
        pool_types=["avg", "max"],
        no_spatial=False,
    ):
        """
        Initializes the convolutional block attention module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the channel attention.
        pool_types      : list
                          List of pooling operations to apply for channel attention.
        no_spatial      : bool
                          If True, spatial attention is not applied.
        """
        super(convolutional_block_attention, self).__init__()
        self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.spatial_gate = spatial_gate()

    class Flatten(torch.nn.Module):
        """
        Flattens the input tensor to a 2D matrix.
        """

        def forward(self, x):
            return x.view(x.size(0), -1)

    def forward(self, x):
        """
        Forward pass of the convolutional block attention module.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the CBAM module.

        Returns
        -------
        x_out        : torch.tensor
                       Output tensor after applying channel and spatial attention.
        """
        x_out = self.channel_gate(x)
        if not self.no_spatial:
            x_out = self.spatial_gate(x_out)
        return x_out

`Flatten` ¶

Bases: Module

Flattens the input tensor to a 2D matrix.

Source code in odak/learn/models/components.py

class Flatten(torch.nn.Module):
    """
    Flattens the input tensor to a 2D matrix.
    """

    def forward(self, x):
        return x.view(x.size(0), -1)

`init(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)` ¶

Initializes the convolutional block attention module.

Parameters:

gate_channels –

          Number of channels of the input feature map.

reduction_ratio (int, default: 16 ) –

          Reduction ratio for the channel attention.

pool_types –

          List of pooling operations to apply for channel attention.

no_spatial –

          If True, spatial attention is not applied.

Source code in odak/learn/models/components.py

def __init__(
    self,
    gate_channels,
    reduction_ratio=16,
    pool_types=["avg", "max"],
    no_spatial=False,
):
    """
    Initializes the convolutional block attention module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the channel attention.
    pool_types      : list
                      List of pooling operations to apply for channel attention.
    no_spatial      : bool
                      If True, spatial attention is not applied.
    """
    super(convolutional_block_attention, self).__init__()
    self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
    self.no_spatial = no_spatial
    if not no_spatial:
        self.spatial_gate = spatial_gate()

`forward(x)` ¶

Forward pass of the convolutional block attention module.

Parameters:

x –

       Input tensor to the CBAM module.

Returns:

x_out ( tensor ) –

Output tensor after applying channel and spatial attention.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward pass of the convolutional block attention module.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the CBAM module.

    Returns
    -------
    x_out        : torch.tensor
                   Output tensor after applying channel and spatial attention.
    """
    x_out = self.channel_gate(x)
    if not self.no_spatial:
        x_out = self.spatial_gate(x_out)
    return x_out

`double_convolution` ¶

Bases: Module

A double convolution layer.

Source code in odak/learn/models/components.py

class double_convolution(torch.nn.Module):
    """
    A double convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=None,
        output_channels=2,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        Double convolution model.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of channels in the hidden layer between two convolutions.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        if isinstance(mid_channels, type(None)):
            mid_channels = output_channels
        self.activation = activation
        self.model = torch.nn.Sequential(
            convolution_layer(
                input_channels=input_channels,
                output_channels=mid_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
            convolution_layer(
                input_channels=mid_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

`init(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

Double convolution model.

Parameters:

input_channels –
```
          Number of input channels.
```

mid_channels –

          Number of channels in the hidden layer between two convolutions.

output_channels (int, default: 2 ) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    mid_channels=None,
    output_channels=2,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    Double convolution model.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of channels in the hidden layer between two convolutions.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    if isinstance(mid_channels, type(None)):
        mid_channels = output_channels
    self.activation = activation
    self.model = torch.nn.Sequential(
        convolution_layer(
            input_channels=input_channels,
            output_channels=mid_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
        convolution_layer(
            input_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
    )

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

`downsample_layer` ¶

Bases: Module

A downscaling component followed by a double convolution.

Source code in odak/learn/models/components.py

class downsample_layer(torch.nn.Module):
    """
    A downscaling component followed by a double convolution.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.maxpool_conv = torch.nn.Sequential(
            torch.nn.MaxPool2d(2),
            double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x              : torch.tensor
                         First input data.



        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.maxpool_conv(x)
        return result

`init(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

A downscaling component with a double convolution.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.maxpool_conv = torch.nn.Sequential(
        torch.nn.MaxPool2d(2),
        double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        ),
    )

`forward(x)` ¶

Forward model.

Parameters:

x –
```
         First input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x              : torch.tensor
                     First input data.



    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.maxpool_conv(x)
    return result

`global_feature_module` ¶

Bases: Module

A global feature layer that processes global features from input channels and applies them to another input tensor via learned transformations.

Source code in odak/learn/models/components.py

class global_feature_module(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies them to another input tensor via learned transformations.
    """

    def __init__(
        self,
        input_channels,
        mid_channels,
        output_channels,
        kernel_size,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels  : int
                          Number of mid channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.transformations_1 = global_transformations(input_channels, output_channels)
        self.global_features_1 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.global_features_2 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.transformations_2 = global_transformations(input_channels, output_channels)

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        global_tensor_1 = self.transformations_1(x1, x2)
        y1 = self.global_features_1(global_tensor_1)
        y2 = self.global_features_2(y1)
        global_tensor_2 = self.transformations_2(y1, y2)
        return global_tensor_2

`init(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

A global feature layer.

Parameters:

input_channels –
```
          Number of input channels.
```
mid_channels –
```
          Number of mid channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    mid_channels,
    output_channels,
    kernel_size,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels  : int
                      Number of mid channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.transformations_1 = global_transformations(input_channels, output_channels)
    self.global_features_1 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.global_features_2 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.transformations_2 = global_transformations(input_channels, output_channels)

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    global_tensor_1 = self.transformations_1(x1, x2)
    y1 = self.global_features_1(global_tensor_1)
    y2 = self.global_features_2(y1)
    global_tensor_2 = self.transformations_2(y1, y2)
    return global_tensor_2

`global_transformations` ¶

Bases: Module

A global feature layer that processes global features from input channels and applies learned transformations to another input tensor.

This implementation is adapted from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Reference: J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."

Source code in odak/learn/models/components.py

class global_transformations(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies learned transformations to another input tensor.

    This implementation is adapted from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Reference:
    J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."
    """

    def __init__(self, input_channels, output_channels):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        """
        super().__init__()
        self.global_feature_1 = torch.nn.Sequential(
            torch.nn.Linear(input_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_2 = torch.nn.Sequential(
            torch.nn.Linear(output_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y = torch.mean(x2, dim=(2, 3))
        y1 = self.global_feature_1(y)
        y2 = self.global_feature_2(y1)
        y1 = y1.unsqueeze(2).unsqueeze(3)
        y2 = y2.unsqueeze(2).unsqueeze(3)
        result = x1 * y1 + y2
        return result

`init(input_channels, output_channels)` ¶

A global feature layer.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```

Source code in odak/learn/models/components.py

def __init__(self, input_channels, output_channels):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    """
    super().__init__()
    self.global_feature_1 = torch.nn.Sequential(
        torch.nn.Linear(input_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_2 = torch.nn.Sequential(
        torch.nn.Linear(output_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y = torch.mean(x2, dim=(2, 3))
    y1 = self.global_feature_1(y)
    y2 = self.global_feature_2(y1)
    y1 = y1.unsqueeze(2).unsqueeze(3)
    y2 = y2.unsqueeze(2).unsqueeze(3)
    result = x1 * y1 + y2
    return result

`non_local_layer` ¶

Bases: Module

Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)

Source code in odak/learn/models/components.py

class non_local_layer(torch.nn.Module):
    """
    Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)
    """

    def __init__(
        self,
        input_channels=1024,
        bottleneck_channels=512,
        kernel_size=1,
        bias=False,
    ):
        """

        Parameters
        ----------
        input_channels      : int
                              Number of input channels.
        bottleneck_channels : int
                              Number of middle channels.
        kernel_size         : int
                              Kernel size.
        bias                : bool
                              Set to True to let convolutional layers have bias term.
        """
        super(non_local_layer, self).__init__()
        self.input_channels = input_channels
        self.bottleneck_channels = bottleneck_channels
        self.g = torch.nn.Conv2d(
            self.input_channels,
            self.bottleneck_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        self.W_z = torch.nn.Sequential(
            torch.nn.Conv2d(
                self.bottleneck_channels,
                self.input_channels,
                kernel_size=kernel_size,
                bias=bias,
                padding=kernel_size // 2,
            ),
            torch.nn.BatchNorm2d(self.input_channels),
        )
        torch.nn.init.constant_(self.W_z[1].weight, 0)
        torch.nn.init.constant_(self.W_z[1].bias, 0)

    def forward(self, x):
        """
        Forward model [zi = Wzyi + xi]

        Parameters
        ----------
        x               : torch.tensor
                          First input data.


        Returns
        ----------
        z               : torch.tensor
                          Estimated output.
        """
        batch_size, channels, height, width = x.size()
        theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
        phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
        g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
        attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
        attn = torch.nn.functional.softmax(attn, dim=-1)
        y = (
            torch.bmm(attn, g)
            .permute(0, 2, 1)
            .contiguous()
            .view(batch_size, self.bottleneck_channels, height, width)
        )
        W_y = self.W_z(y)
        z = W_y + x
        return z

`init(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)` ¶

Parameters:

input_channels –

              Number of input channels.

bottleneck_channels (int, default: 512 ) –

              Number of middle channels.

kernel_size –
```
              Kernel size.
```

bias –

              Set to True to let convolutional layers have bias term.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=1024,
    bottleneck_channels=512,
    kernel_size=1,
    bias=False,
):
    """

    Parameters
    ----------
    input_channels      : int
                          Number of input channels.
    bottleneck_channels : int
                          Number of middle channels.
    kernel_size         : int
                          Kernel size.
    bias                : bool
                          Set to True to let convolutional layers have bias term.
    """
    super(non_local_layer, self).__init__()
    self.input_channels = input_channels
    self.bottleneck_channels = bottleneck_channels
    self.g = torch.nn.Conv2d(
        self.input_channels,
        self.bottleneck_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    self.W_z = torch.nn.Sequential(
        torch.nn.Conv2d(
            self.bottleneck_channels,
            self.input_channels,
            kernel_size=kernel_size,
            bias=bias,
            padding=kernel_size // 2,
        ),
        torch.nn.BatchNorm2d(self.input_channels),
    )
    torch.nn.init.constant_(self.W_z[1].weight, 0)
    torch.nn.init.constant_(self.W_z[1].bias, 0)

`forward(x)` ¶

Forward model [zi = Wzyi + xi]

Parameters:

x –
```
          First input data.
```

Returns:

z ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model [zi = Wzyi + xi]

    Parameters
    ----------
    x               : torch.tensor
                      First input data.


    Returns
    ----------
    z               : torch.tensor
                      Estimated output.
    """
    batch_size, channels, height, width = x.size()
    theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
    phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
    g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
    attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
    attn = torch.nn.functional.softmax(attn, dim=-1)
    y = (
        torch.bmm(attn, g)
        .permute(0, 2, 1)
        .contiguous()
        .view(batch_size, self.bottleneck_channels, height, width)
    )
    W_y = self.W_z(y)
    z = W_y + x
    return z

`normalization` ¶

Bases: Module

A normalization layer.

Source code in odak/learn/models/components.py

class normalization(torch.nn.Module):
    """
    A normalization layer.
    """

    def __init__(
        self,
        dim=1,
    ):
        """
        Normalization layer.


        Parameters
        ----------
        dim             : int
                          Dimension (axis) to normalize.
        """
        super().__init__()
        self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        eps = 1e-5 if x.dtype == torch.float32 else 1e-3
        var = torch.var(x, dim=1, unbiased=False, keepdim=True)
        mean = torch.mean(x, dim=1, keepdim=True)
        result = (x - mean) * (var + eps).rsqrt() * self.k
        return result

`init(dim=1)` ¶

Normalization layer.

Parameters:

dim –

          Dimension (axis) to normalize.

Source code in odak/learn/models/components.py

def __init__(
    self,
    dim=1,
):
    """
    Normalization layer.


    Parameters
    ----------
    dim             : int
                      Dimension (axis) to normalize.
    """
    super().__init__()
    self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    eps = 1e-5 if x.dtype == torch.float32 else 1e-3
    var = torch.var(x, dim=1, unbiased=False, keepdim=True)
    mean = torch.mean(x, dim=1, keepdim=True)
    result = (x - mean) * (var + eps).rsqrt() * self.k
    return result

`positional_encoder` ¶

Bases: Module

A positional encoder module. This implementation follows this specific work: Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021..

Source code in odak/learn/models/components.py

class positional_encoder(torch.nn.Module):
    """
    A positional encoder module.
    This implementation follows this specific work: `Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021.`.
    """

    def __init__(self, L):
        """
        A positional encoder module.

        Parameters
        ----------
        L                   : int
                              Positional encoding level.
        """
        super(positional_encoder, self).__init__()
        self.L = L

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x               : torch.tensor
                          Input data [b x n], where `b` is batch size, `n` is the feature size.

        Returns
        ----------
        result          : torch.tensor
                          Result of the forward operation.
        """
        freqs = 2 ** torch.arange(self.L, device=x.device)
        freqs = freqs.view(1, 1, -1)
        results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results = torch.cat((x, results_cos, results_sin), dim=1)
        return results

`init(L)` ¶

A positional encoder module.

Parameters:

L –

              Positional encoding level.

Source code in odak/learn/models/components.py

def __init__(self, L):
    """
    A positional encoder module.

    Parameters
    ----------
    L                   : int
                          Positional encoding level.
    """
    super(positional_encoder, self).__init__()
    self.L = L

`forward(x)` ¶

Forward model.

Parameters:

x –

          Input data [b x n], where `b` is batch size, `n` is the feature size.

Returns:

result ( tensor ) –

Result of the forward operation.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x               : torch.tensor
                      Input data [b x n], where `b` is batch size, `n` is the feature size.

    Returns
    ----------
    result          : torch.tensor
                      Result of the forward operation.
    """
    freqs = 2 ** torch.arange(self.L, device=x.device)
    freqs = freqs.view(1, 1, -1)
    results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results = torch.cat((x, results_cos, results_sin), dim=1)
    return results

`residual_attention_layer` ¶

Bases: Module

A residual block with an attention layer.

Source code in odak/learn/models/components.py

class residual_attention_layer(torch.nn.Module):
    """
    A residual block with an attention layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=1,
        bias=False,
        activation=torch.nn.ReLU(),
    ):
        """
        An attention layer class.


        Parameters
        ----------
        input_channels  : int or optioal
                          Number of input channels.
        output_channels : int or optional
                          Number of middle channels.
        kernel_size     : int or optional
                          Kernel size.
        bias            : bool or optional
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn or optional
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution0 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.convolution1 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.final_layer = torch.nn.Sequential(
            self.activation,
            torch.nn.Conv2d(
                output_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )

    def forward(self, x0, x1):
        """
        Forward model.

        Parameters
        ----------
        x0             : torch.tensor
                         First input data.

        x1             : torch.tensor
                         Seconnd input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y0 = self.convolution0(x0)
        y1 = self.convolution1(x1)
        y2 = torch.add(y0, y1)
        result = self.final_layer(y2) * x0
        return result

`init(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())` ¶

An attention layer class.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int or optional, default: 2 ) –
```
          Number of middle channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=1,
    bias=False,
    activation=torch.nn.ReLU(),
):
    """
    An attention layer class.


    Parameters
    ----------
    input_channels  : int or optioal
                      Number of input channels.
    output_channels : int or optional
                      Number of middle channels.
    kernel_size     : int or optional
                      Kernel size.
    bias            : bool or optional
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn or optional
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution0 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.convolution1 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.final_layer = torch.nn.Sequential(
        self.activation,
        torch.nn.Conv2d(
            output_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
    )

`forward(x0, x1)` ¶

Forward model.

Parameters:

x0 –
```
         First input data.
```
x1 –
```
         Seconnd input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x0, x1):
    """
    Forward model.

    Parameters
    ----------
    x0             : torch.tensor
                     First input data.

    x1             : torch.tensor
                     Seconnd input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y0 = self.convolution0(x0)
    y1 = self.convolution1(x1)
    y2 = torch.add(y0, y1)
    result = self.final_layer(y2) * x0
    return result

`residual_layer` ¶

Bases: Module

A residual layer.

Source code in odak/learn/models/components.py

class residual_layer(torch.nn.Module):
    """
    A residual layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=16,
        kernel_size=3,
        bias=False,
        normalization=True,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of middle channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution = double_convolution(
            input_channels,
            mid_channels=mid_channels,
            output_channels=input_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        x0 = self.convolution(x)
        return x + x0

`init(input_channels=2, mid_channels=16, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())` ¶

A convolutional layer class.

Parameters:

input_channels –
```
          Number of input channels.
```
mid_channels –
```
          Number of middle channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    mid_channels=16,
    kernel_size=3,
    bias=False,
    normalization=True,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of middle channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution = double_convolution(
        input_channels,
        mid_channels=mid_channels,
        output_channels=input_channels,
        kernel_size=kernel_size,
        normalization=normalization,
        bias=bias,
        activation=activation,
    )

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    x0 = self.convolution(x)
    return x + x0

`spatial_gate` ¶

Bases: Module

Spatial attention module that applies a convolution layer after channel pooling. This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.

Source code in odak/learn/models/components.py

class spatial_gate(torch.nn.Module):
    """
    Spatial attention module that applies a convolution layer after channel pooling.
    This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.
    """

    def __init__(self):
        """
        Initializes the spatial gate module.
        """
        super().__init__()
        kernel_size = 7
        self.spatial = convolution_layer(
            2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
        )

    def channel_pool(self, x):
        """
        Applies max and average pooling on the channels.

        Parameters
        ----------
        x             : torch.tensor
                        Input tensor.

        Returns
        -------
        output        : torch.tensor
                        Output tensor.
        """
        max_pool = torch.max(x, 1)[0].unsqueeze(1)
        avg_pool = torch.mean(x, 1).unsqueeze(1)
        output = torch.cat((max_pool, avg_pool), dim=1)
        return output

    def forward(self, x):
        """
        Forward pass of the SpatialGate module.

        Applies spatial attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the SpatialGate module.

        Returns
        -------
        scaled_x     : torch.tensor
                       Output tensor after applying spatial attention.
        """
        x_compress = self.channel_pool(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out)
        scaled_x = x * scale
        return scaled_x

`init()` ¶

Initializes the spatial gate module.

Source code in odak/learn/models/components.py

def __init__(self):
    """
    Initializes the spatial gate module.
    """
    super().__init__()
    kernel_size = 7
    self.spatial = convolution_layer(
        2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
    )

`channel_pool(x)` ¶

Applies max and average pooling on the channels.

Parameters:

x –
```
        Input tensor.
```

Returns:

output ( tensor ) –

Output tensor.

Source code in odak/learn/models/components.py

def channel_pool(self, x):
    """
    Applies max and average pooling on the channels.

    Parameters
    ----------
    x             : torch.tensor
                    Input tensor.

    Returns
    -------
    output        : torch.tensor
                    Output tensor.
    """
    max_pool = torch.max(x, 1)[0].unsqueeze(1)
    avg_pool = torch.mean(x, 1).unsqueeze(1)
    output = torch.cat((max_pool, avg_pool), dim=1)
    return output

`forward(x)` ¶

Forward pass of the SpatialGate module.

Applies spatial attention to the input tensor.

Parameters:

x –

       Input tensor to the SpatialGate module.

Returns:

scaled_x ( tensor ) –

Output tensor after applying spatial attention.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward pass of the SpatialGate module.

    Applies spatial attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the SpatialGate module.

    Returns
    -------
    scaled_x     : torch.tensor
                   Output tensor after applying spatial attention.
    """
    x_compress = self.channel_pool(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out)
    scaled_x = x * scale
    return scaled_x

`spatially_adaptive_convolution` ¶

Bases: Module

A spatially adaptive convolution layer.

References

C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions." C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation." C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."

Source code in odak/learn/models/components.py

class spatially_adaptive_convolution(torch.nn.Module):
    """
    A spatially adaptive convolution layer.

    References
    ----------

    C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions."
    C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation."
    C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive convolution layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn.Module
                          Activation function to apply. If None, no activation is applied.
        """
        super(spatially_adaptive_convolution, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive convolution layer.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        sa_output          : torch.tensor
                            Estimated output tensor.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Resize weight to match the input channels and kernel size
        si_kernel = self.weight.reshape(
            self.output_channels,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply spatially varying kernels
        sv_feature = input_feature * sv_kernel

        # Perform matrix multiplication
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )
        return sa_output

`init(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initializes a spatially adaptive convolution layer.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int, default: 2 ) –
```
          Number of output channels.
```

kernel_size –

          Size of the convolution kernel.

stride –
```
          Stride of the convolution.
```

padding –

          Padding added to both sides of the input.

bias –

          If True, includes a bias term in the convolution.

activation –

          Activation function to apply. If None, no activation is applied.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive convolution layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn.Module
                      Activation function to apply. If None, no activation is applied.
    """
    super(spatially_adaptive_convolution, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

`forward(x, sv_kernel_feature)` ¶

Forward pass for the spatially adaptive convolution layer.

Parameters:

x –

            Input data tensor.
            Dimension: (1, C, H, W)

sv_kernel_feature –

            Spatially varying kernel features.
            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

Returns:

sa_output ( tensor ) –

Estimated output tensor. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py

def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive convolution layer.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    sa_output          : torch.tensor
                        Estimated output tensor.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Resize weight to match the input channels and kernel size
    si_kernel = self.weight.reshape(
        self.output_channels,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply spatially varying kernels
    sv_feature = input_feature * sv_kernel

    # Perform matrix multiplication
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )
    return sa_output

`spatially_adaptive_module` ¶

Bases: Module

A spatially adaptive module that combines learned spatially adaptive convolutions.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/components.py

class spatially_adaptive_module(torch.nn.Module):
    """
    A spatially adaptive module that combines learned spatially adaptive convolutions.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive module.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super(spatially_adaptive_module, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.output_channels_for_weight = self.output_channels - 1
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels_for_weight,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive module.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        output             : torch.tensor
                            Combined output tensor from standard and spatially adaptive convolutions.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Apply sv_kernel to the input_feature
        sv_feature = input_feature * sv_kernel

        # Original spatially varying convolution output
        sv_output = torch.sum(sv_feature, dim=1).reshape(
            1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
        )

        # Reshape weight for spatially adaptive convolution
        si_kernel = self.weight.reshape(
            self.output_channels_for_weight,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply si_kernel on sv convolution output
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels_for_weight,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )

        # Combine the outputs and apply activation function
        output = self.activation(torch.cat((sv_output, sa_output), dim=1))
        return output

`init(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initializes a spatially adaptive module.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int, default: 2 ) –
```
          Number of output channels.
```

kernel_size –

          Size of the convolution kernel.

stride –
```
          Stride of the convolution.
```

padding –

          Padding added to both sides of the input.

bias –

          If True, includes a bias term in the convolution.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive module.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super(spatially_adaptive_module, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.output_channels_for_weight = self.output_channels - 1
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels_for_weight,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

`forward(x, sv_kernel_feature)` ¶

Forward pass for the spatially adaptive module.

Parameters:

x –

            Input data tensor.
            Dimension: (1, C, H, W)

sv_kernel_feature –

            Spatially varying kernel features.
            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

Returns:

output ( tensor ) –

Combined output tensor from standard and spatially adaptive convolutions. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py

def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive module.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    output             : torch.tensor
                        Combined output tensor from standard and spatially adaptive convolutions.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Apply sv_kernel to the input_feature
    sv_feature = input_feature * sv_kernel

    # Original spatially varying convolution output
    sv_output = torch.sum(sv_feature, dim=1).reshape(
        1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
    )

    # Reshape weight for spatially adaptive convolution
    si_kernel = self.weight.reshape(
        self.output_channels_for_weight,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply si_kernel on sv convolution output
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels_for_weight,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )

    # Combine the outputs and apply activation function
    output = self.activation(torch.cat((sv_output, sa_output), dim=1))
    return output

`upsample_convtranspose2d_layer` ¶

Bases: Module

An upsampling convtranspose2d layer.

Source code in odak/learn/models/components.py

class upsample_convtranspose2d_layer(torch.nn.Module):
    """
    An upsampling convtranspose2d layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=2,
        stride=2,
        bias=False,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        """
        super().__init__()
        self.up = torch.nn.ConvTranspose2d(
            in_channels=input_channels,
            out_channels=output_channels,
            bias=bias,
            kernel_size=kernel_size,
            stride=stride,
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        result = x1 + x2
        return result

`init(input_channels, output_channels, kernel_size=2, stride=2, bias=False)` ¶

A downscaling component with a double convolution.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=2,
    stride=2,
    bias=False,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    """
    super().__init__()
    self.up = torch.nn.ConvTranspose2d(
        in_channels=input_channels,
        out_channels=output_channels,
        bias=bias,
        kernel_size=kernel_size,
        stride=stride,
    )

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Result of the forward operation

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    result = x1 + x2
    return result

`upsample_layer` ¶

Bases: Module

An upsampling convolutional layer.

Source code in odak/learn/models/components.py

class upsample_layer(torch.nn.Module):
    """
    An upsampling convolutional layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
        bilinear=True,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        bilinear        : bool
                          If set to True, bilinear sampling is used.
        """
        super(upsample_layer, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(
                scale_factor=2, mode="bilinear", align_corners=True
            )
            self.conv = double_convolution(
                input_channels=input_channels + output_channels,
                mid_channels=input_channels // 2,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )
        else:
            self.up = torch.nn.ConvTranspose2d(
                input_channels, input_channels // 2, kernel_size=2, stride=2
            )
            self.conv = double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        x = torch.cat([x2, x1], dim=1)
        result = self.conv(x)
        return result

`init(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU(), bilinear=True)` ¶

A downscaling component with a double convolution.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

bilinear –

          If set to True, bilinear sampling is used.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
    bilinear=True,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    bilinear        : bool
                      If set to True, bilinear sampling is used.
    """
    super(upsample_layer, self).__init__()
    if bilinear:
        self.up = torch.nn.Upsample(
            scale_factor=2, mode="bilinear", align_corners=True
        )
        self.conv = double_convolution(
            input_channels=input_channels + output_channels,
            mid_channels=input_channels // 2,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )
    else:
        self.up = torch.nn.ConvTranspose2d(
            input_channels, input_channels // 2, kernel_size=2, stride=2
        )
        self.conv = double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Result of the forward operation

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    x = torch.cat([x2, x1], dim=1)
    result = self.conv(x)
    return result

`gaussian(x, multiplier=1.0)` ¶

A Gaussian non-linear activation. For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

Parameters:

x –
```
       Input data.
```
multiplier –
```
       Multiplier.
```

Returns:

result ( float or tensor ) –

Ouput data.

Source code in odak/learn/models/components.py

def gaussian(x, multiplier=1.0):
    """
    A Gaussian non-linear activation.
    For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

    Parameters
    ----------
    x            : float or torch.tensor
                   Input data.
    multiplier   : float or torch.tensor
                   Multiplier.

    Returns
    -------
    result       : float or torch.tensor
                   Ouput data.
    """
    result = torch.exp(-((multiplier * x) ** 2))
    return result

`swish(x)` ¶

A swish non-linear activation. For more details: https://en.wikipedia.org/wiki/Swish_function

Parameters:

x –
```
         Input.
```

Returns:

out ( float or tensor ) –

Output.

Source code in odak/learn/models/components.py

def swish(x):
    """
    A swish non-linear activation.
    For more details: https://en.wikipedia.org/wiki/Swish_function

    Parameters
    -----------
    x              : float or torch.tensor
                     Input.

    Returns
    -------
    out            : float or torch.tensor
                     Output.
    """
    out = x * torch.sigmoid(x)
    return out

`channel_gate` ¶

Bases: Module

Channel attention module with various pooling strategies. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py

class channel_gate(torch.nn.Module):
    """
    Channel attention module with various pooling strategies.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
        """
        Initializes the channel gate module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the intermediate layer.
        pool_types      : list
                          List of pooling operations to apply.
        """
        super().__init__()
        self.gate_channels = gate_channels
        hidden_channels = gate_channels // reduction_ratio
        if hidden_channels == 0:
            hidden_channels = 1
        self.mlp = torch.nn.Sequential(
            convolutional_block_attention.Flatten(),
            torch.nn.Linear(gate_channels, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_channels, gate_channels),
        )
        self.pool_types = pool_types

    def forward(self, x):
        """
        Forward pass of the ChannelGate module.

        Applies channel-wise attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the ChannelGate module.

        Returns
        -------
        output       : torch.tensor
                       Output tensor after applying channel attention.
        """
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == "avg":
                pool = torch.nn.functional.avg_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            elif pool_type == "max":
                pool = torch.nn.functional.max_pool2d(
                    x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
                )
            channel_att_raw = self.mlp(pool)
            channel_att_sum = (
                channel_att_raw
                if channel_att_sum is None
                else channel_att_sum + channel_att_raw
            )
        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
        output = x * scale
        return output

`init(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])` ¶

Initializes the channel gate module.

Parameters:

gate_channels –

          Number of channels of the input feature map.

reduction_ratio (int, default: 16 ) –

          Reduction ratio for the intermediate layer.

pool_types –

          List of pooling operations to apply.

Source code in odak/learn/models/components.py

def __init__(self, gate_channels, reduction_ratio=16, pool_types=["avg", "max"]):
    """
    Initializes the channel gate module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the intermediate layer.
    pool_types      : list
                      List of pooling operations to apply.
    """
    super().__init__()
    self.gate_channels = gate_channels
    hidden_channels = gate_channels // reduction_ratio
    if hidden_channels == 0:
        hidden_channels = 1
    self.mlp = torch.nn.Sequential(
        convolutional_block_attention.Flatten(),
        torch.nn.Linear(gate_channels, hidden_channels),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_channels, gate_channels),
    )
    self.pool_types = pool_types

`forward(x)` ¶

Forward pass of the ChannelGate module.

Applies channel-wise attention to the input tensor.

Parameters:

x –

       Input tensor to the ChannelGate module.

Returns:

output ( tensor ) –

Output tensor after applying channel attention.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward pass of the ChannelGate module.

    Applies channel-wise attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the ChannelGate module.

    Returns
    -------
    output       : torch.tensor
                   Output tensor after applying channel attention.
    """
    channel_att_sum = None
    for pool_type in self.pool_types:
        if pool_type == "avg":
            pool = torch.nn.functional.avg_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        elif pool_type == "max":
            pool = torch.nn.functional.max_pool2d(
                x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))
            )
        channel_att_raw = self.mlp(pool)
        channel_att_sum = (
            channel_att_raw
            if channel_att_sum is None
            else channel_att_sum + channel_att_raw
        )
    scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
    output = x * scale
    return output

`convolution_layer` ¶

Bases: Module

A convolution layer.

Source code in odak/learn/models/components.py

class convolution_layer(torch.nn.Module):
    """
    A convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        bias=False,
        stride=1,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        layers = [
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=kernel_size // 2,
                bias=bias,
            )
        ]
        if normalization:
            layers.append(torch.nn.BatchNorm2d(output_channels))
        if activation:
            layers.append(activation)
        self.model = torch.nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

`init(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=False, activation=torch.nn.ReLU())` ¶

A convolutional layer class.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int, default: 2 ) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    bias=False,
    stride=1,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    layers = [
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=kernel_size // 2,
            bias=bias,
        )
    ]
    if normalization:
        layers.append(torch.nn.BatchNorm2d(output_channels))
    if activation:
        layers.append(activation)
    self.model = torch.nn.Sequential(*layers)

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

`convolutional_block_attention` ¶

Bases: Module

Convolutional Block Attention Module (CBAM) class. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py

class convolutional_block_attention(torch.nn.Module):
    """
    Convolutional Block Attention Module (CBAM) class.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """

    def __init__(
        self,
        gate_channels,
        reduction_ratio=16,
        pool_types=["avg", "max"],
        no_spatial=False,
    ):
        """
        Initializes the convolutional block attention module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the channel attention.
        pool_types      : list
                          List of pooling operations to apply for channel attention.
        no_spatial      : bool
                          If True, spatial attention is not applied.
        """
        super(convolutional_block_attention, self).__init__()
        self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.spatial_gate = spatial_gate()

    class Flatten(torch.nn.Module):
        """
        Flattens the input tensor to a 2D matrix.
        """

        def forward(self, x):
            return x.view(x.size(0), -1)

    def forward(self, x):
        """
        Forward pass of the convolutional block attention module.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the CBAM module.

        Returns
        -------
        x_out        : torch.tensor
                       Output tensor after applying channel and spatial attention.
        """
        x_out = self.channel_gate(x)
        if not self.no_spatial:
            x_out = self.spatial_gate(x_out)
        return x_out

`Flatten` ¶

Bases: Module

Flattens the input tensor to a 2D matrix.

Source code in odak/learn/models/components.py

class Flatten(torch.nn.Module):
    """
    Flattens the input tensor to a 2D matrix.
    """

    def forward(self, x):
        return x.view(x.size(0), -1)

`init(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)` ¶

Initializes the convolutional block attention module.

Parameters:

gate_channels –

          Number of channels of the input feature map.

reduction_ratio (int, default: 16 ) –

          Reduction ratio for the channel attention.

pool_types –

          List of pooling operations to apply for channel attention.

no_spatial –

          If True, spatial attention is not applied.

Source code in odak/learn/models/components.py

def __init__(
    self,
    gate_channels,
    reduction_ratio=16,
    pool_types=["avg", "max"],
    no_spatial=False,
):
    """
    Initializes the convolutional block attention module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the channel attention.
    pool_types      : list
                      List of pooling operations to apply for channel attention.
    no_spatial      : bool
                      If True, spatial attention is not applied.
    """
    super(convolutional_block_attention, self).__init__()
    self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
    self.no_spatial = no_spatial
    if not no_spatial:
        self.spatial_gate = spatial_gate()

`forward(x)` ¶

Forward pass of the convolutional block attention module.

Parameters:

x –

       Input tensor to the CBAM module.

Returns:

x_out ( tensor ) –

Output tensor after applying channel and spatial attention.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward pass of the convolutional block attention module.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the CBAM module.

    Returns
    -------
    x_out        : torch.tensor
                   Output tensor after applying channel and spatial attention.
    """
    x_out = self.channel_gate(x)
    if not self.no_spatial:
        x_out = self.spatial_gate(x_out)
    return x_out

`double_convolution` ¶

Bases: Module

A double convolution layer.

Source code in odak/learn/models/components.py

class double_convolution(torch.nn.Module):
    """
    A double convolution layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=None,
        output_channels=2,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        Double convolution model.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of channels in the hidden layer between two convolutions.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        if isinstance(mid_channels, type(None)):
            mid_channels = output_channels
        self.activation = activation
        self.model = torch.nn.Sequential(
            convolution_layer(
                input_channels=input_channels,
                output_channels=mid_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
            convolution_layer(
                input_channels=mid_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=self.activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

`init(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

Double convolution model.

Parameters:

input_channels –
```
          Number of input channels.
```

mid_channels –

          Number of channels in the hidden layer between two convolutions.

output_channels (int, default: 2 ) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    mid_channels=None,
    output_channels=2,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    Double convolution model.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of channels in the hidden layer between two convolutions.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    if isinstance(mid_channels, type(None)):
        mid_channels = output_channels
    self.activation = activation
    self.model = torch.nn.Sequential(
        convolution_layer(
            input_channels=input_channels,
            output_channels=mid_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
        convolution_layer(
            input_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=self.activation,
        ),
    )

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

`downsample_layer` ¶

Bases: Module

A downscaling component followed by a double convolution.

Source code in odak/learn/models/components.py

class downsample_layer(torch.nn.Module):
    """
    A downscaling component followed by a double convolution.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.maxpool_conv = torch.nn.Sequential(
            torch.nn.MaxPool2d(2),
            double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            ),
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x              : torch.tensor
                         First input data.



        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.maxpool_conv(x)
        return result

`init(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

A downscaling component with a double convolution.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.maxpool_conv = torch.nn.Sequential(
        torch.nn.MaxPool2d(2),
        double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        ),
    )

`forward(x)` ¶

Forward model.

Parameters:

x –
```
         First input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x              : torch.tensor
                     First input data.



    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.maxpool_conv(x)
    return result

`global_feature_module` ¶

Bases: Module

A global feature layer that processes global features from input channels and applies them to another input tensor via learned transformations.

Source code in odak/learn/models/components.py

class global_feature_module(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies them to another input tensor via learned transformations.
    """

    def __init__(
        self,
        input_channels,
        mid_channels,
        output_channels,
        kernel_size,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
    ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels  : int
                          Number of mid channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.transformations_1 = global_transformations(input_channels, output_channels)
        self.global_features_1 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.global_features_2 = double_convolution(
            input_channels=input_channels,
            mid_channels=mid_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.transformations_2 = global_transformations(input_channels, output_channels)

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        global_tensor_1 = self.transformations_1(x1, x2)
        y1 = self.global_features_1(global_tensor_1)
        y2 = self.global_features_2(y1)
        global_tensor_2 = self.transformations_2(y1, y2)
        return global_tensor_2

`init(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

A global feature layer.

Parameters:

input_channels –
```
          Number of input channels.
```
mid_channels –
```
          Number of mid channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    mid_channels,
    output_channels,
    kernel_size,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels  : int
                      Number of mid channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.transformations_1 = global_transformations(input_channels, output_channels)
    self.global_features_1 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.global_features_2 = double_convolution(
        input_channels=input_channels,
        mid_channels=mid_channels,
        output_channels=output_channels,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    self.transformations_2 = global_transformations(input_channels, output_channels)

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    global_tensor_1 = self.transformations_1(x1, x2)
    y1 = self.global_features_1(global_tensor_1)
    y2 = self.global_features_2(y1)
    global_tensor_2 = self.transformations_2(y1, y2)
    return global_tensor_2

`global_transformations` ¶

Bases: Module

A global feature layer that processes global features from input channels and applies learned transformations to another input tensor.

This implementation is adapted from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Reference: J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."

Source code in odak/learn/models/components.py

class global_transformations(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies learned transformations to another input tensor.

    This implementation is adapted from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Reference:
    J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."
    """

    def __init__(self, input_channels, output_channels):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        """
        super().__init__()
        self.global_feature_1 = torch.nn.Sequential(
            torch.nn.Linear(input_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_2 = torch.nn.Sequential(
            torch.nn.Linear(output_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace=True),
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y = torch.mean(x2, dim=(2, 3))
        y1 = self.global_feature_1(y)
        y2 = self.global_feature_2(y1)
        y1 = y1.unsqueeze(2).unsqueeze(3)
        y2 = y2.unsqueeze(2).unsqueeze(3)
        result = x1 * y1 + y2
        return result

`init(input_channels, output_channels)` ¶

A global feature layer.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```

Source code in odak/learn/models/components.py

def __init__(self, input_channels, output_channels):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    """
    super().__init__()
    self.global_feature_1 = torch.nn.Sequential(
        torch.nn.Linear(input_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_2 = torch.nn.Sequential(
        torch.nn.Linear(output_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace=True),
    )

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y = torch.mean(x2, dim=(2, 3))
    y1 = self.global_feature_1(y)
    y2 = self.global_feature_2(y1)
    y1 = y1.unsqueeze(2).unsqueeze(3)
    y2 = y2.unsqueeze(2).unsqueeze(3)
    result = x1 * y1 + y2
    return result

`multi_layer_perceptron` ¶

Bases: Module

A multi-layer perceptron model.

Source code in odak/learn/models/models.py

class multi_layer_perceptron(torch.nn.Module):
    """
    A multi-layer perceptron model.
    """

    def __init__(
        self,
        dimensions,
        activation=torch.nn.ReLU(),
        bias=False,
        model_type="conventional",
        siren_multiplier=1.0,
        input_multiplier=None,
    ):
        """
        Initialize the multi-layer perceptron.

        Parameters
        ----------
        dimensions : list of int
            List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
        activation : torch.nn.Module, optional
            Nonlinear activation function. Default is `torch.nn.ReLU()`.
        bias : bool, optional
            If set to True, linear layers will include biases. Default is False.
        siren_multiplier : float, optional
            When using `SIREN` model type, this parameter functions as a hyperparameter.
            The original SIREN work uses 30.
            You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
        input_multiplier : float, optional
            Initial value of the input multiplier before the very first layer.
        model_type : str, optional
            Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
            `conventional` refers to a standard multi layer perceptron.
            For `SIREN`, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
            For `Swish`, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017).
            For `FILM SIREN`, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
            For `Gaussian`, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
            Default is "conventional".
        """
        super(multi_layer_perceptron, self).__init__()
        self.activation = activation
        self.bias = bias
        self.model_type = model_type
        self.layers = torch.nn.ModuleList()
        self.siren_multiplier = siren_multiplier
        self.dimensions = dimensions
        logger.info(
            f"Initializing multi_layer_perceptron: model_type={model_type}, "
            f"dimensions={dimensions}, bias={bias}, "
            f"siren_multiplier={siren_multiplier}"
        )
        for i in range(len(self.dimensions) - 1):
            self.layers.append(
                torch.nn.Linear(
                    self.dimensions[i], self.dimensions[i + 1], bias=self.bias
                )
            )
        if not isinstance(input_multiplier, type(None)):
            self.input_multiplier = torch.nn.ParameterList()
            self.input_multiplier.append(
                torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier)
            )
            logger.debug(f"Input multiplier initialized: {input_multiplier}")
        if self.model_type == "FILM SIREN":
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1::]:
                self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
            logger.debug("FILM SIREN alpha parameters initialized")
        if self.model_type == "Gaussian":
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1::]:
                self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))
            logger.debug("Gaussian alpha parameters initialized")

    def forward(self, x):
        """
        Forward pass of the multi-layer perceptron.

        Parameters
        ----------
        x : torch.Tensor
            Input data.

        Returns
        -------
        result : torch.Tensor
            Estimated output.
        """
        if hasattr(self, "input_multiplier"):
            result = x * self.input_multiplier[0]
        else:
            result = x
        for layer_id, layer in enumerate(self.layers):
            result = layer(result)
            if self.model_type == "conventional" and layer_id != len(self.layers) - 1:
                result = self.activation(result)
            elif self.model_type == "swish" and layer_id != len(self.layers) - 1:
                result = swish(result)
            elif self.model_type == "SIREN" and layer_id != len(self.layers) - 1:
                result = torch.sin(result * self.siren_multiplier)
            elif self.model_type == "FILM SIREN" and layer_id != len(self.layers) - 1:
                result = torch.sin(
                    self.alpha[layer_id][0] * result + self.alpha[layer_id][1]
                )
            elif self.model_type == "Gaussian" and layer_id != len(self.layers) - 1:
                result = gaussian(result, self.alpha[layer_id][0])
        return result

`init(dimensions, activation=torch.nn.ReLU(), bias=False, model_type='conventional', siren_multiplier=1.0, input_multiplier=None)` ¶

Initialize the multi-layer perceptron.

Parameters:

dimensions (list of int) –

List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
activation (Module, default: ReLU() ) –

Nonlinear activation function. Default is torch.nn.ReLU().
bias (bool, default: False ) –

If set to True, linear layers will include biases. Default is False.
siren_multiplier (float, default: 1.0 ) –

When using SIREN model type, this parameter functions as a hyperparameter. The original SIREN work uses 30. You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
input_multiplier (float, default: None ) –

Initial value of the input multiplier before the very first layer.
model_type (str, default: 'conventional' ) –

Model type: conventional, swish, SIREN, FILM SIREN, Gaussian. conventional refers to a standard multi layer perceptron. For SIREN, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473. For Swish, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). For FILM SIREN, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021. For Gaussian, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022. Default is "conventional".

Source code in odak/learn/models/models.py

def __init__(
    self,
    dimensions,
    activation=torch.nn.ReLU(),
    bias=False,
    model_type="conventional",
    siren_multiplier=1.0,
    input_multiplier=None,
):
    """
    Initialize the multi-layer perceptron.

    Parameters
    ----------
    dimensions : list of int
        List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel).
    activation : torch.nn.Module, optional
        Nonlinear activation function. Default is `torch.nn.ReLU()`.
    bias : bool, optional
        If set to True, linear layers will include biases. Default is False.
    siren_multiplier : float, optional
        When using `SIREN` model type, this parameter functions as a hyperparameter.
        The original SIREN work uses 30.
        You can bypass this parameter by providing input that are not normalized and larger than one. Default is 1.0.
    input_multiplier : float, optional
        Initial value of the input multiplier before the very first layer.
    model_type : str, optional
        Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
        `conventional` refers to a standard multi layer perceptron.
        For `SIREN`, see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
        For `Swish`, see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017).
        For `FILM SIREN`, see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
        For `Gaussian`, see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
        Default is "conventional".
    """
    super(multi_layer_perceptron, self).__init__()
    self.activation = activation
    self.bias = bias
    self.model_type = model_type
    self.layers = torch.nn.ModuleList()
    self.siren_multiplier = siren_multiplier
    self.dimensions = dimensions
    logger.info(
        f"Initializing multi_layer_perceptron: model_type={model_type}, "
        f"dimensions={dimensions}, bias={bias}, "
        f"siren_multiplier={siren_multiplier}"
    )
    for i in range(len(self.dimensions) - 1):
        self.layers.append(
            torch.nn.Linear(
                self.dimensions[i], self.dimensions[i + 1], bias=self.bias
            )
        )
    if not isinstance(input_multiplier, type(None)):
        self.input_multiplier = torch.nn.ParameterList()
        self.input_multiplier.append(
            torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier)
        )
        logger.debug(f"Input multiplier initialized: {input_multiplier}")
    if self.model_type == "FILM SIREN":
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1::]:
            self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
        logger.debug("FILM SIREN alpha parameters initialized")
    if self.model_type == "Gaussian":
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1::]:
            self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))
        logger.debug("Gaussian alpha parameters initialized")

`forward(x)` ¶

Forward pass of the multi-layer perceptron.

Parameters:

x (Tensor) –

Input data.

Returns:

result ( Tensor ) –

Estimated output.

Source code in odak/learn/models/models.py

def forward(self, x):
    """
    Forward pass of the multi-layer perceptron.

    Parameters
    ----------
    x : torch.Tensor
        Input data.

    Returns
    -------
    result : torch.Tensor
        Estimated output.
    """
    if hasattr(self, "input_multiplier"):
        result = x * self.input_multiplier[0]
    else:
        result = x
    for layer_id, layer in enumerate(self.layers):
        result = layer(result)
        if self.model_type == "conventional" and layer_id != len(self.layers) - 1:
            result = self.activation(result)
        elif self.model_type == "swish" and layer_id != len(self.layers) - 1:
            result = swish(result)
        elif self.model_type == "SIREN" and layer_id != len(self.layers) - 1:
            result = torch.sin(result * self.siren_multiplier)
        elif self.model_type == "FILM SIREN" and layer_id != len(self.layers) - 1:
            result = torch.sin(
                self.alpha[layer_id][0] * result + self.alpha[layer_id][1]
            )
        elif self.model_type == "Gaussian" and layer_id != len(self.layers) - 1:
            result = gaussian(result, self.alpha[layer_id][0])
    return result

`non_local_layer` ¶

Bases: Module

Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)

Source code in odak/learn/models/components.py

class non_local_layer(torch.nn.Module):
    """
    Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)
    """

    def __init__(
        self,
        input_channels=1024,
        bottleneck_channels=512,
        kernel_size=1,
        bias=False,
    ):
        """

        Parameters
        ----------
        input_channels      : int
                              Number of input channels.
        bottleneck_channels : int
                              Number of middle channels.
        kernel_size         : int
                              Kernel size.
        bias                : bool
                              Set to True to let convolutional layers have bias term.
        """
        super(non_local_layer, self).__init__()
        self.input_channels = input_channels
        self.bottleneck_channels = bottleneck_channels
        self.g = torch.nn.Conv2d(
            self.input_channels,
            self.bottleneck_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        self.W_z = torch.nn.Sequential(
            torch.nn.Conv2d(
                self.bottleneck_channels,
                self.input_channels,
                kernel_size=kernel_size,
                bias=bias,
                padding=kernel_size // 2,
            ),
            torch.nn.BatchNorm2d(self.input_channels),
        )
        torch.nn.init.constant_(self.W_z[1].weight, 0)
        torch.nn.init.constant_(self.W_z[1].bias, 0)

    def forward(self, x):
        """
        Forward model [zi = Wzyi + xi]

        Parameters
        ----------
        x               : torch.tensor
                          First input data.


        Returns
        ----------
        z               : torch.tensor
                          Estimated output.
        """
        batch_size, channels, height, width = x.size()
        theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
        phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
        g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
        attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
        attn = torch.nn.functional.softmax(attn, dim=-1)
        y = (
            torch.bmm(attn, g)
            .permute(0, 2, 1)
            .contiguous()
            .view(batch_size, self.bottleneck_channels, height, width)
        )
        W_y = self.W_z(y)
        z = W_y + x
        return z

`init(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)` ¶

Parameters:

input_channels –

              Number of input channels.

bottleneck_channels (int, default: 512 ) –

              Number of middle channels.

kernel_size –
```
              Kernel size.
```

bias –

              Set to True to let convolutional layers have bias term.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=1024,
    bottleneck_channels=512,
    kernel_size=1,
    bias=False,
):
    """

    Parameters
    ----------
    input_channels      : int
                          Number of input channels.
    bottleneck_channels : int
                          Number of middle channels.
    kernel_size         : int
                          Kernel size.
    bias                : bool
                          Set to True to let convolutional layers have bias term.
    """
    super(non_local_layer, self).__init__()
    self.input_channels = input_channels
    self.bottleneck_channels = bottleneck_channels
    self.g = torch.nn.Conv2d(
        self.input_channels,
        self.bottleneck_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    self.W_z = torch.nn.Sequential(
        torch.nn.Conv2d(
            self.bottleneck_channels,
            self.input_channels,
            kernel_size=kernel_size,
            bias=bias,
            padding=kernel_size // 2,
        ),
        torch.nn.BatchNorm2d(self.input_channels),
    )
    torch.nn.init.constant_(self.W_z[1].weight, 0)
    torch.nn.init.constant_(self.W_z[1].bias, 0)

`forward(x)` ¶

Forward model [zi = Wzyi + xi]

Parameters:

x –
```
          First input data.
```

Returns:

z ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model [zi = Wzyi + xi]

    Parameters
    ----------
    x               : torch.tensor
                      First input data.


    Returns
    ----------
    z               : torch.tensor
                      Estimated output.
    """
    batch_size, channels, height, width = x.size()
    theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
    phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
    g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
    attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
    attn = torch.nn.functional.softmax(attn, dim=-1)
    y = (
        torch.bmm(attn, g)
        .permute(0, 2, 1)
        .contiguous()
        .view(batch_size, self.bottleneck_channels, height, width)
    )
    W_y = self.W_z(y)
    z = W_y + x
    return z

`normalization` ¶

Bases: Module

A normalization layer.

Source code in odak/learn/models/components.py

class normalization(torch.nn.Module):
    """
    A normalization layer.
    """

    def __init__(
        self,
        dim=1,
    ):
        """
        Normalization layer.


        Parameters
        ----------
        dim             : int
                          Dimension (axis) to normalize.
        """
        super().__init__()
        self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        eps = 1e-5 if x.dtype == torch.float32 else 1e-3
        var = torch.var(x, dim=1, unbiased=False, keepdim=True)
        mean = torch.mean(x, dim=1, keepdim=True)
        result = (x - mean) * (var + eps).rsqrt() * self.k
        return result

`init(dim=1)` ¶

Normalization layer.

Parameters:

dim –

          Dimension (axis) to normalize.

Source code in odak/learn/models/components.py

def __init__(
    self,
    dim=1,
):
    """
    Normalization layer.


    Parameters
    ----------
    dim             : int
                      Dimension (axis) to normalize.
    """
    super().__init__()
    self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    eps = 1e-5 if x.dtype == torch.float32 else 1e-3
    var = torch.var(x, dim=1, unbiased=False, keepdim=True)
    mean = torch.mean(x, dim=1, keepdim=True)
    result = (x - mean) * (var + eps).rsqrt() * self.k
    return result

`positional_encoder` ¶

Bases: Module

A positional encoder module. This implementation follows this specific work: Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021..

Source code in odak/learn/models/components.py

class positional_encoder(torch.nn.Module):
    """
    A positional encoder module.
    This implementation follows this specific work: `Martin-Brualla, Ricardo, Noha Radwan, Mehdi SM Sajjadi, Jonathan T. Barron, Alexey Dosovitskiy, and Daniel Duckworth. "Nerf in the wild: Neural radiance fields for unconstrained photo collections." In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pp. 7210-7219. 2021.`.
    """

    def __init__(self, L):
        """
        A positional encoder module.

        Parameters
        ----------
        L                   : int
                              Positional encoding level.
        """
        super(positional_encoder, self).__init__()
        self.L = L

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x               : torch.tensor
                          Input data [b x n], where `b` is batch size, `n` is the feature size.

        Returns
        ----------
        result          : torch.tensor
                          Result of the forward operation.
        """
        freqs = 2 ** torch.arange(self.L, device=x.device)
        freqs = freqs.view(1, 1, -1)
        results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
        results = torch.cat((x, results_cos, results_sin), dim=1)
        return results

`init(L)` ¶

A positional encoder module.

Parameters:

L –

              Positional encoding level.

Source code in odak/learn/models/components.py

def __init__(self, L):
    """
    A positional encoder module.

    Parameters
    ----------
    L                   : int
                          Positional encoding level.
    """
    super(positional_encoder, self).__init__()
    self.L = L

`forward(x)` ¶

Forward model.

Parameters:

x –

          Input data [b x n], where `b` is batch size, `n` is the feature size.

Returns:

result ( tensor ) –

Result of the forward operation.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x               : torch.tensor
                      Input data [b x n], where `b` is batch size, `n` is the feature size.

    Returns
    ----------
    result          : torch.tensor
                      Result of the forward operation.
    """
    freqs = 2 ** torch.arange(self.L, device=x.device)
    freqs = freqs.view(1, 1, -1)
    results_cos = torch.cos(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results_sin = torch.sin(x.unsqueeze(-1) * freqs).reshape(x.shape[0], -1)
    results = torch.cat((x, results_cos, results_sin), dim=1)
    return results

`residual_attention_layer` ¶

Bases: Module

A residual block with an attention layer.

Source code in odak/learn/models/components.py

class residual_attention_layer(torch.nn.Module):
    """
    A residual block with an attention layer.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=1,
        bias=False,
        activation=torch.nn.ReLU(),
    ):
        """
        An attention layer class.


        Parameters
        ----------
        input_channels  : int or optioal
                          Number of input channels.
        output_channels : int or optional
                          Number of middle channels.
        kernel_size     : int or optional
                          Kernel size.
        bias            : bool or optional
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn or optional
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution0 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.convolution1 = torch.nn.Sequential(
            torch.nn.Conv2d(
                input_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            torch.nn.BatchNorm2d(output_channels),
        )
        self.final_layer = torch.nn.Sequential(
            self.activation,
            torch.nn.Conv2d(
                output_channels,
                output_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )

    def forward(self, x0, x1):
        """
        Forward model.

        Parameters
        ----------
        x0             : torch.tensor
                         First input data.

        x1             : torch.tensor
                         Seconnd input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y0 = self.convolution0(x0)
        y1 = self.convolution1(x1)
        y2 = torch.add(y0, y1)
        result = self.final_layer(y2) * x0
        return result

`init(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())` ¶

An attention layer class.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int or optional, default: 2 ) –
```
          Number of middle channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=1,
    bias=False,
    activation=torch.nn.ReLU(),
):
    """
    An attention layer class.


    Parameters
    ----------
    input_channels  : int or optioal
                      Number of input channels.
    output_channels : int or optional
                      Number of middle channels.
    kernel_size     : int or optional
                      Kernel size.
    bias            : bool or optional
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn or optional
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution0 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.convolution1 = torch.nn.Sequential(
        torch.nn.Conv2d(
            input_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
        torch.nn.BatchNorm2d(output_channels),
    )
    self.final_layer = torch.nn.Sequential(
        self.activation,
        torch.nn.Conv2d(
            output_channels,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        ),
    )

`forward(x0, x1)` ¶

Forward model.

Parameters:

x0 –
```
         First input data.
```
x1 –
```
         Seconnd input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x0, x1):
    """
    Forward model.

    Parameters
    ----------
    x0             : torch.tensor
                     First input data.

    x1             : torch.tensor
                     Seconnd input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y0 = self.convolution0(x0)
    y1 = self.convolution1(x1)
    y2 = torch.add(y0, y1)
    result = self.final_layer(y2) * x0
    return result

`residual_layer` ¶

Bases: Module

A residual layer.

Source code in odak/learn/models/components.py

class residual_layer(torch.nn.Module):
    """
    A residual layer.
    """

    def __init__(
        self,
        input_channels=2,
        mid_channels=16,
        kernel_size=3,
        bias=False,
        normalization=True,
        activation=torch.nn.ReLU(),
    ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of middle channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution = double_convolution(
            input_channels,
            mid_channels=mid_channels,
            output_channels=input_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        x0 = self.convolution(x)
        return x + x0

`init(input_channels=2, mid_channels=16, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())` ¶

A convolutional layer class.

Parameters:

input_channels –
```
          Number of input channels.
```
mid_channels –
```
          Number of middle channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    mid_channels=16,
    kernel_size=3,
    bias=False,
    normalization=True,
    activation=torch.nn.ReLU(),
):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of middle channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution = double_convolution(
        input_channels,
        mid_channels=mid_channels,
        output_channels=input_channels,
        kernel_size=kernel_size,
        normalization=normalization,
        bias=bias,
        activation=activation,
    )

`forward(x)` ¶

Forward model.

Parameters:

x –
```
        Input data.
```

Returns:

result ( tensor ) –

Estimated output.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    x0 = self.convolution(x)
    return x + x0

`spatial_gate` ¶

Bases: Module

Spatial attention module that applies a convolution layer after channel pooling. This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.

Source code in odak/learn/models/components.py

class spatial_gate(torch.nn.Module):
    """
    Spatial attention module that applies a convolution layer after channel pooling.
    This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.
    """

    def __init__(self):
        """
        Initializes the spatial gate module.
        """
        super().__init__()
        kernel_size = 7
        self.spatial = convolution_layer(
            2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
        )

    def channel_pool(self, x):
        """
        Applies max and average pooling on the channels.

        Parameters
        ----------
        x             : torch.tensor
                        Input tensor.

        Returns
        -------
        output        : torch.tensor
                        Output tensor.
        """
        max_pool = torch.max(x, 1)[0].unsqueeze(1)
        avg_pool = torch.mean(x, 1).unsqueeze(1)
        output = torch.cat((max_pool, avg_pool), dim=1)
        return output

    def forward(self, x):
        """
        Forward pass of the SpatialGate module.

        Applies spatial attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the SpatialGate module.

        Returns
        -------
        scaled_x     : torch.tensor
                       Output tensor after applying spatial attention.
        """
        x_compress = self.channel_pool(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out)
        scaled_x = x * scale
        return scaled_x

`init()` ¶

Initializes the spatial gate module.

Source code in odak/learn/models/components.py

def __init__(self):
    """
    Initializes the spatial gate module.
    """
    super().__init__()
    kernel_size = 7
    self.spatial = convolution_layer(
        2, 1, kernel_size, bias=False, activation=torch.nn.Identity()
    )

`channel_pool(x)` ¶

Applies max and average pooling on the channels.

Parameters:

x –
```
        Input tensor.
```

Returns:

output ( tensor ) –

Output tensor.

Source code in odak/learn/models/components.py

def channel_pool(self, x):
    """
    Applies max and average pooling on the channels.

    Parameters
    ----------
    x             : torch.tensor
                    Input tensor.

    Returns
    -------
    output        : torch.tensor
                    Output tensor.
    """
    max_pool = torch.max(x, 1)[0].unsqueeze(1)
    avg_pool = torch.mean(x, 1).unsqueeze(1)
    output = torch.cat((max_pool, avg_pool), dim=1)
    return output

`forward(x)` ¶

Forward pass of the SpatialGate module.

Applies spatial attention to the input tensor.

Parameters:

x –

       Input tensor to the SpatialGate module.

Returns:

scaled_x ( tensor ) –

Output tensor after applying spatial attention.

Source code in odak/learn/models/components.py

def forward(self, x):
    """
    Forward pass of the SpatialGate module.

    Applies spatial attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the SpatialGate module.

    Returns
    -------
    scaled_x     : torch.tensor
                   Output tensor after applying spatial attention.
    """
    x_compress = self.channel_pool(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out)
    scaled_x = x * scale
    return scaled_x

`spatially_adaptive_convolution` ¶

Bases: Module

A spatially adaptive convolution layer.

References

C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions." C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation." C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."

Source code in odak/learn/models/components.py

class spatially_adaptive_convolution(torch.nn.Module):
    """
    A spatially adaptive convolution layer.

    References
    ----------

    C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions."
    C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation."
    C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive convolution layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn.Module
                          Activation function to apply. If None, no activation is applied.
        """
        super(spatially_adaptive_convolution, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive convolution layer.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        sa_output          : torch.tensor
                            Estimated output tensor.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Resize weight to match the input channels and kernel size
        si_kernel = self.weight.reshape(
            self.output_channels,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply spatially varying kernels
        sv_feature = input_feature * sv_kernel

        # Perform matrix multiplication
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )
        return sa_output

`init(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initializes a spatially adaptive convolution layer.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int, default: 2 ) –
```
          Number of output channels.
```

kernel_size –

          Size of the convolution kernel.

stride –
```
          Stride of the convolution.
```

padding –

          Padding added to both sides of the input.

bias –

          If True, includes a bias term in the convolution.

activation –

          Activation function to apply. If None, no activation is applied.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive convolution layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn.Module
                      Activation function to apply. If None, no activation is applied.
    """
    super(spatially_adaptive_convolution, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

`forward(x, sv_kernel_feature)` ¶

Forward pass for the spatially adaptive convolution layer.

Parameters:

x –

            Input data tensor.
            Dimension: (1, C, H, W)

sv_kernel_feature –

            Spatially varying kernel features.
            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

Returns:

sa_output ( tensor ) –

Estimated output tensor. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py

def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive convolution layer.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    sa_output          : torch.tensor
                        Estimated output tensor.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Resize weight to match the input channels and kernel size
    si_kernel = self.weight.reshape(
        self.output_channels,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply spatially varying kernels
    sv_feature = input_feature * sv_kernel

    # Perform matrix multiplication
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )
    return sa_output

`spatially_adaptive_module` ¶

Bases: Module

A spatially adaptive module that combines learned spatially adaptive convolutions.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/components.py

class spatially_adaptive_module(torch.nn.Module):
    """
    A spatially adaptive module that combines learned spatially adaptive convolutions.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        input_channels=2,
        output_channels=2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initializes a spatially adaptive module.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super(spatially_adaptive_module, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.output_channels_for_weight = self.output_channels - 1
        self.standard_convolution = torch.nn.Conv2d(
            in_channels=input_channels,
            out_channels=self.output_channels_for_weight,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias,
        )
        self.weight = torch.nn.Parameter(
            data=self.standard_convolution.weight, requires_grad=True
        )
        self.activation = activation

    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive module.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        output             : torch.tensor
                            Combined output tensor from standard and spatially adaptive convolutions.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(
            -1
        ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(
                sv_kernel_feature,
                (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
            )
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(
                x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
            )

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
            x,
            kernel_size=(self.kernel_size, self.kernel_size),
            stride=self.stride,
            padding=self.padding,
        )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
            1,
            self.input_channels * self.kernel_size * self.kernel_size,
            (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
        )

        # Apply sv_kernel to the input_feature
        sv_feature = input_feature * sv_kernel

        # Original spatially varying convolution output
        sv_output = torch.sum(sv_feature, dim=1).reshape(
            1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
        )

        # Reshape weight for spatially adaptive convolution
        si_kernel = self.weight.reshape(
            self.output_channels_for_weight,
            self.input_channels * self.kernel_size * self.kernel_size,
        )

        # Apply si_kernel on sv convolution output
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
            1,
            self.output_channels_for_weight,
            (x.size(-2) // self.stride),
            (x.size(-1) // self.stride),
        )

        # Combine the outputs and apply activation function
        output = self.activation(torch.cat((sv_output, sa_output), dim=1))
        return output

`init(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initializes a spatially adaptive module.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int, default: 2 ) –
```
          Number of output channels.
```

kernel_size –

          Size of the convolution kernel.

stride –
```
          Stride of the convolution.
```

padding –

          Padding added to both sides of the input.

bias –

          If True, includes a bias term in the convolution.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels=2,
    output_channels=2,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initializes a spatially adaptive module.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super(spatially_adaptive_module, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.output_channels_for_weight = self.output_channels - 1
    self.standard_convolution = torch.nn.Conv2d(
        in_channels=input_channels,
        out_channels=self.output_channels_for_weight,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        bias=bias,
    )
    self.weight = torch.nn.Parameter(
        data=self.standard_convolution.weight, requires_grad=True
    )
    self.activation = activation

`forward(x, sv_kernel_feature)` ¶

Forward pass for the spatially adaptive module.

Parameters:

x –

            Input data tensor.
            Dimension: (1, C, H, W)

sv_kernel_feature –

            Spatially varying kernel features.
            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

Returns:

output ( tensor ) –

Combined output tensor from standard and spatially adaptive convolutions. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py

def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive module.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    output             : torch.tensor
                        Combined output tensor from standard and spatially adaptive convolutions.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(
        -1
    ) or sv_kernel_feature.size(-2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(
            sv_kernel_feature,
            (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2),
        )
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(
            x, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2)
        )

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
        x,
        kernel_size=(self.kernel_size, self.kernel_size),
        stride=self.stride,
        padding=self.padding,
    )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
        1,
        self.input_channels * self.kernel_size * self.kernel_size,
        (x.size(-2) // self.stride) * (x.size(-1) // self.stride),
    )

    # Apply sv_kernel to the input_feature
    sv_feature = input_feature * sv_kernel

    # Original spatially varying convolution output
    sv_output = torch.sum(sv_feature, dim=1).reshape(
        1, 1, (x.size(-2) // self.stride), (x.size(-1) // self.stride)
    )

    # Reshape weight for spatially adaptive convolution
    si_kernel = self.weight.reshape(
        self.output_channels_for_weight,
        self.input_channels * self.kernel_size * self.kernel_size,
    )

    # Apply si_kernel on sv convolution output
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
        1,
        self.output_channels_for_weight,
        (x.size(-2) // self.stride),
        (x.size(-1) // self.stride),
    )

    # Combine the outputs and apply activation function
    output = self.activation(torch.cat((sv_output, sa_output), dim=1))
    return output

`spatially_adaptive_unet` ¶

Bases: Module

Spatially varying U-Net model based on spatially adaptive convolution.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/models.py

class spatially_adaptive_unet(torch.nn.Module):
    """
    Spatially varying U-Net model based on spatially adaptive convolution.

    References
    ----------
    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """

    def __init__(
        self,
        depth=3,
        dimensions=8,
        input_channels=6,
        out_channels=6,
        kernel_size=3,
        bias=True,
        normalization=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initialize the spatially adaptive U-Net model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 3.
        dimensions : int, optional
            Number of dimensions. Default is 8.
        input_channels : int, optional
            Number of input channels. Default is 6.
        out_channels : int, optional
            Number of output channels. Default is 6.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set to True to let convolutional layers learn a bias term. Default is True.
        normalization : bool, optional
            If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
        """
        super().__init__()
        self.depth = depth
        self.out_channels = out_channels
        logger.info(
            f"Initializing spatially_adaptive_unet: "
            f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
            f"out_channels={out_channels}, kernel_size={kernel_size}, "
            f"bias={bias}, normalization={normalization}"
        )
        self.inc = convolution_layer(
            input_channels=input_channels,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )

        self.encoder = torch.nn.ModuleList()
        for i in range(self.depth + 1):  # Downsampling layers
            down_in_channels = dimensions * (2**i)
            down_out_channels = 2 * down_in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                input_channels=down_in_channels,
                mid_channels=down_in_channels,
                output_channels=down_in_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            sam = spatially_adaptive_module(
                input_channels=down_in_channels,
                output_channels=down_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
            )
            self.encoder.append(
                torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam])
            )
            logger.debug(f"Added encoder block {i}: {down_in_channels} -> {down_out_channels}")
        self.global_feature_module = torch.nn.ModuleList()
        double_convolution_layer = double_convolution(
            input_channels=dimensions * (2 ** (depth + 1)),
            mid_channels=dimensions * (2 ** (depth + 1)),
            output_channels=dimensions * (2 ** (depth + 1)),
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        global_feature_layer = global_feature_module(
            input_channels=dimensions * (2 ** (depth + 1)),
            mid_channels=dimensions * (2 ** (depth + 1)),
            output_channels=dimensions * (2 ** (depth + 1)),
            kernel_size=kernel_size,
            bias=bias,
            activation=torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.global_feature_module.append(
            torch.nn.ModuleList([double_convolution_layer, global_feature_layer])
        )
        logger.debug("Added global feature module")

        self.decoder = torch.nn.ModuleList()
        for i in range(depth, -1, -1):
            up_in_channels = dimensions * (2 ** (i + 1))
            up_mid_channels = up_in_channels // 2
            if i == 0:
                up_out_channels = self.out_channels
                upsample_layer = upsample_convtranspose2d_layer(
                    input_channels=up_in_channels,
                    output_channels=up_mid_channels,
                    kernel_size=2,
                    stride=2,
                    bias=bias,
                )
                conv_layer = torch.nn.Sequential(
                    convolution_layer(
                        input_channels=up_mid_channels,
                        output_channels=up_mid_channels,
                        kernel_size=kernel_size,
                        bias=bias,
                        normalization=normalization,
                        activation=activation,
                    ),
                    convolution_layer(
                        input_channels=up_mid_channels,
                        output_channels=up_out_channels,
                        kernel_size=1,
                        bias=bias,
                        normalization=normalization,
                        activation=None,
                    ),
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
                logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
            else:
                up_out_channels = up_in_channels // 2
                upsample_layer = upsample_convtranspose2d_layer(
                    input_channels=up_in_channels,
                    output_channels=up_mid_channels,
                    kernel_size=2,
                    stride=2,
                    bias=bias,
                )
                conv_layer = double_convolution(
                    input_channels=up_mid_channels,
                    mid_channels=up_mid_channels,
                    output_channels=up_out_channels,
                    kernel_size=kernel_size,
                    bias=bias,
                    normalization=normalization,
                    activation=activation,
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
                logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        logger.info("spatially_adaptive_unet initialization completed")

    def forward(self, sv_kernel, field):
        """
        Forward pass of the spatially adaptive U-Net.

        Parameters
        ----------
        sv_kernel : list of torch.Tensor
            Learned spatially varying kernels.
            Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
            where C_i, H_i, and W_i represent the channel, height, and width
            of each feature at a certain scale.

        field : torch.Tensor
            Input field data.
            Dimension: (1, 6, H, W)

        Returns
        -------
        target_field : torch.Tensor
            Estimated output.
            Dimension: (1, 6, H, W)
        """
        x = self.inc(field)
        downsampling_outputs = [x]
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer[0](downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
            sam_output = down_layer[2](
                x_down + down_layer[1](x_down), sv_kernel[self.depth - i]
            )
            downsampling_outputs.append(sam_output)
        global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
        global_feature = self.global_feature_module[0][1](
            downsampling_outputs[-1], global_feature
        )
        downsampling_outputs.append(global_feature)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate(self.decoder):
            x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
            x_up = up_layer[1](x_up)
        result = x_up
        return result

`init(depth=3, dimensions=8, input_channels=6, out_channels=6, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initialize the spatially adaptive U-Net model.

Parameters:

depth (int, default: 3 ) –

Number of upsampling and downsampling layers. Default is 3.
dimensions (int, default: 8 ) –

Number of dimensions. Default is 8.
input_channels (int, default: 6 ) –

Number of input channels. Default is 6.
out_channels (int, default: 6 ) –

Number of output channels. Default is 6.
kernel_size (int, default: 3 ) –

Kernel size for convolutional layers. Default is 3.
bias (bool, default: True ) –

Set to True to let convolutional layers learn a bias term. Default is True.
normalization (bool, default: False ) –

If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
activation (Module, default: LeakyReLU(0.2, inplace=True) ) –

Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).

Source code in odak/learn/models/models.py

def __init__(
    self,
    depth=3,
    dimensions=8,
    input_channels=6,
    out_channels=6,
    kernel_size=3,
    bias=True,
    normalization=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initialize the spatially adaptive U-Net model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 3.
    dimensions : int, optional
        Number of dimensions. Default is 8.
    input_channels : int, optional
        Number of input channels. Default is 6.
    out_channels : int, optional
        Number of output channels. Default is 6.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set to True to let convolutional layers learn a bias term. Default is True.
    normalization : bool, optional
        If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
    """
    super().__init__()
    self.depth = depth
    self.out_channels = out_channels
    logger.info(
        f"Initializing spatially_adaptive_unet: "
        f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
        f"out_channels={out_channels}, kernel_size={kernel_size}, "
        f"bias={bias}, normalization={normalization}"
    )
    self.inc = convolution_layer(
        input_channels=input_channels,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )

    self.encoder = torch.nn.ModuleList()
    for i in range(self.depth + 1):  # Downsampling layers
        down_in_channels = dimensions * (2**i)
        down_out_channels = 2 * down_in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
            input_channels=down_in_channels,
            mid_channels=down_in_channels,
            output_channels=down_in_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        sam = spatially_adaptive_module(
            input_channels=down_in_channels,
            output_channels=down_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )
        self.encoder.append(
            torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam])
        )
        logger.debug(f"Added encoder block {i}: {down_in_channels} -> {down_out_channels}")
    self.global_feature_module = torch.nn.ModuleList()
    double_convolution_layer = double_convolution(
        input_channels=dimensions * (2 ** (depth + 1)),
        mid_channels=dimensions * (2 ** (depth + 1)),
        output_channels=dimensions * (2 ** (depth + 1)),
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )
    global_feature_layer = global_feature_module(
        input_channels=dimensions * (2 ** (depth + 1)),
        mid_channels=dimensions * (2 ** (depth + 1)),
        output_channels=dimensions * (2 ** (depth + 1)),
        kernel_size=kernel_size,
        bias=bias,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.global_feature_module.append(
        torch.nn.ModuleList([double_convolution_layer, global_feature_layer])
    )
    logger.debug("Added global feature module")

    self.decoder = torch.nn.ModuleList()
    for i in range(depth, -1, -1):
        up_in_channels = dimensions * (2 ** (i + 1))
        up_mid_channels = up_in_channels // 2
        if i == 0:
            up_out_channels = self.out_channels
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = torch.nn.Sequential(
                convolution_layer(
                    input_channels=up_mid_channels,
                    output_channels=up_mid_channels,
                    kernel_size=kernel_size,
                    bias=bias,
                    normalization=normalization,
                    activation=activation,
                ),
                convolution_layer(
                    input_channels=up_mid_channels,
                    output_channels=up_out_channels,
                    kernel_size=1,
                    bias=bias,
                    normalization=normalization,
                    activation=None,
                ),
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        else:
            up_out_channels = up_in_channels // 2
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = double_convolution(
                input_channels=up_mid_channels,
                mid_channels=up_mid_channels,
                output_channels=up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
    logger.info("spatially_adaptive_unet initialization completed")

`forward(sv_kernel, field)` ¶

Forward pass of the spatially adaptive U-Net.

Parameters:

sv_kernel (list of torch.Tensor) –

Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.
field (Tensor) –

Input field data. Dimension: (1, 6, H, W)

Returns:

target_field ( Tensor ) –

Estimated output. Dimension: (1, 6, H, W)

Source code in odak/learn/models/models.py

def forward(self, sv_kernel, field):
    """
    Forward pass of the spatially adaptive U-Net.

    Parameters
    ----------
    sv_kernel : list of torch.Tensor
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.

    field : torch.Tensor
        Input field data.
        Dimension: (1, 6, H, W)

    Returns
    -------
    target_field : torch.Tensor
        Estimated output.
        Dimension: (1, 6, H, W)
    """
    x = self.inc(field)
    downsampling_outputs = [x]
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer[0](downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
        sam_output = down_layer[2](
            x_down + down_layer[1](x_down), sv_kernel[self.depth - i]
        )
        downsampling_outputs.append(sam_output)
    global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
    global_feature = self.global_feature_module[0][1](
        downsampling_outputs[-1], global_feature
    )
    downsampling_outputs.append(global_feature)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate(self.decoder):
        x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
        x_up = up_layer[1](x_up)
    result = x_up
    return result

`spatially_varying_kernel_generation_model` ¶

Bases: Module

Spatially_varying_kernel_generation_model revised from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Refer to: J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.

Source code in odak/learn/models/models.py

class spatially_varying_kernel_generation_model(torch.nn.Module):
    """
    Spatially_varying_kernel_generation_model revised from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Refer to:
    J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.
    """

    def __init__(
        self,
        depth=3,
        dimensions=8,
        input_channels=7,
        kernel_size=3,
        bias=True,
        normalization=False,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    ):
        """
        Initialize the spatially varying kernel generation model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 3.
        dimensions : int, optional
            Number of dimensions. Default is 8.
        input_channels : int, optional
            Number of input channels. Default is 7.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set to True to let convolutional layers learn a bias term. Default is True.
        normalization : bool, optional
            If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
        """
        super().__init__()
        self.depth = depth
        logger.info(
            f"Initializing spatially_varying_kernel_generation_model: "
            f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
            f"kernel_size={kernel_size}, bias={bias}, normalization={normalization}"
        )
        self.inc = convolution_layer(
            input_channels=input_channels,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )

        self.encoder = torch.nn.ModuleList()
        for i in range(depth + 1):  # downsampling layers
            if i == 0:
                in_channels = dimensions * (2**i)
                out_channels = dimensions * (2**i)
            elif i == depth:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = dimensions * (2 ** (i - 1))
            else:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = 2 * in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                input_channels=in_channels,
                mid_channels=in_channels,
                output_channels=out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.encoder.append(pooling_layer)
            self.encoder.append(double_convolution_layer)
            logger.debug(f"Added encoder block {i}: {in_channels} -> {out_channels}")

        self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
        for i in range(depth, -1, -1):
            if i == 1:
                svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
            else:
                svf_in_channels = 2 ** (self.depth + i) + 1
            svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
            svf_mid_channels = dimensions * (2 ** (self.depth - 1))
            spatially_varying_kernel_generation = torch.nn.ModuleList()
            for j in range(i, -1, -1):
                pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
                spatially_varying_kernel_generation.append(pooling_layer)
            kernel_generation_block = torch.nn.Sequential(
                torch.nn.Conv2d(
                    in_channels=svf_in_channels,
                    out_channels=svf_mid_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
                activation,
                torch.nn.Conv2d(
                    in_channels=svf_mid_channels,
                    out_channels=svf_mid_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
                activation,
                torch.nn.Conv2d(
                    in_channels=svf_mid_channels,
                    out_channels=svf_out_channels,
                    kernel_size=kernel_size,
                    padding=kernel_size // 2,
                    bias=bias,
                ),
            )
            spatially_varying_kernel_generation.append(kernel_generation_block)
            self.spatially_varying_feature.append(spatially_varying_kernel_generation)
            logger.debug(f"Added SVF block {i}: {svf_in_channels} -> {svf_out_channels}")

        self.decoder = torch.nn.ModuleList()
        global_feature_layer = global_feature_module(  # global feature layer
            input_channels=dimensions * (2 ** (depth - 1)),
            mid_channels=dimensions * (2 ** (depth - 1)),
            output_channels=dimensions * (2 ** (depth - 1)),
            kernel_size=kernel_size,
            bias=bias,
            activation=torch.nn.LeakyReLU(0.2, inplace=True),
        )
        self.decoder.append(global_feature_layer)
        for i in range(depth, 0, -1):
            if i == 2:
                up_in_channels = (dimensions // 2) * (2**i)
                up_out_channels = up_in_channels
                up_mid_channels = up_in_channels
            elif i == 1:
                up_in_channels = dimensions * 2
                up_out_channels = dimensions
                up_mid_channels = up_out_channels
            else:
                up_in_channels = (dimensions // 2) * (2**i)
                up_out_channels = up_in_channels // 2
                up_mid_channels = up_in_channels
            upsample_layer = upsample_convtranspose2d_layer(
                input_channels=up_in_channels,
                output_channels=up_mid_channels,
                kernel_size=2,
                stride=2,
                bias=bias,
            )
            conv_layer = double_convolution(
                input_channels=up_mid_channels,
                output_channels=up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                normalization=normalization,
                activation=activation,
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
        logger.info("spatially_varying_kernel_generation_model initialization completed")

    def forward(self, focal_surface, field):
        """
        Forward pass of the spatially varying kernel generation model.

        Parameters
        ----------
        focal_surface : torch.Tensor
            Input focal surface data.
            Dimension: (1, 1, H, W)

        field : torch.Tensor
            Input field data.
            Dimension: (1, 6, H, W)

        Returns
        -------
        sv_kernel : list of torch.Tensor
            Learned spatially varying kernels.
            Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
            where C_i, H_i, and W_i represent the channel, height, and width
            of each feature at a certain scale.
        """
        x = self.inc(torch.cat((focal_surface, field), dim=1))
        downsampling_outputs = [focal_surface]
        downsampling_outputs.append(x)
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        sv_kernels = []
        for i, (up_layer, svf_layer) in enumerate(
            zip(self.decoder, self.spatially_varying_feature)
        ):
            if i == 0:
                global_feature = up_layer(
                    downsampling_outputs[-2], downsampling_outputs[-1]
                )
                downsampling_outputs[-1] = global_feature
                sv_feature = [global_feature, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                sv_feature = [
                    sv_feature[0],
                    sv_feature[1],
                    sv_feature[4],
                    sv_feature[2],
                    sv_feature[3],
                ]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
                sv_kernels.append(sv_kernel)
            else:
                x_up = up_layer[0](
                    downsampling_outputs[-1],
                    downsampling_outputs[2 * (self.depth + 1 - i) + 1],
                )
                x_up = up_layer[1](x_up)
                downsampling_outputs[-1] = x_up
                sv_feature = [x_up, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                if i == 1:
                    sv_feature = [
                        sv_feature[0],
                        sv_feature[1],
                        sv_feature[3],
                        sv_feature[2],
                    ]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
                sv_kernels.append(sv_kernel)
        return sv_kernels

`init(depth=3, dimensions=8, input_channels=7, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

Initialize the spatially varying kernel generation model.

Parameters:

depth (int, default: 3 ) –

Number of upsampling and downsampling layers. Default is 3.
dimensions (int, default: 8 ) –

Number of dimensions. Default is 8.
input_channels (int, default: 7 ) –

Number of input channels. Default is 7.
kernel_size (int, default: 3 ) –

Kernel size for convolutional layers. Default is 3.
bias (bool, default: True ) –

Set to True to let convolutional layers learn a bias term. Default is True.
normalization (bool, default: False ) –

If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
activation (Module, default: LeakyReLU(0.2, inplace=True) ) –

Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).

Source code in odak/learn/models/models.py

def __init__(
    self,
    depth=3,
    dimensions=8,
    input_channels=7,
    kernel_size=3,
    bias=True,
    normalization=False,
    activation=torch.nn.LeakyReLU(0.2, inplace=True),
):
    """
    Initialize the spatially varying kernel generation model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 3.
    dimensions : int, optional
        Number of dimensions. Default is 8.
    input_channels : int, optional
        Number of input channels. Default is 7.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set to True to let convolutional layers learn a bias term. Default is True.
    normalization : bool, optional
        If True, adds a Batch Normalization layer after the convolutional layer. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.LeakyReLU(0.2, inplace=True).
    """
    super().__init__()
    self.depth = depth
    logger.info(
        f"Initializing spatially_varying_kernel_generation_model: "
        f"depth={depth}, dimensions={dimensions}, input_channels={input_channels}, "
        f"kernel_size={kernel_size}, bias={bias}, normalization={normalization}"
    )
    self.inc = convolution_layer(
        input_channels=input_channels,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        normalization=normalization,
        activation=activation,
    )

    self.encoder = torch.nn.ModuleList()
    for i in range(depth + 1):  # downsampling layers
        if i == 0:
            in_channels = dimensions * (2**i)
            out_channels = dimensions * (2**i)
        elif i == depth:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = dimensions * (2 ** (i - 1))
        else:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = 2 * in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
            input_channels=in_channels,
            mid_channels=in_channels,
            output_channels=out_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.encoder.append(pooling_layer)
        self.encoder.append(double_convolution_layer)
        logger.debug(f"Added encoder block {i}: {in_channels} -> {out_channels}")

    self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
    for i in range(depth, -1, -1):
        if i == 1:
            svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
        else:
            svf_in_channels = 2 ** (self.depth + i) + 1
        svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
        svf_mid_channels = dimensions * (2 ** (self.depth - 1))
        spatially_varying_kernel_generation = torch.nn.ModuleList()
        for j in range(i, -1, -1):
            pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
            spatially_varying_kernel_generation.append(pooling_layer)
        kernel_generation_block = torch.nn.Sequential(
            torch.nn.Conv2d(
                in_channels=svf_in_channels,
                out_channels=svf_mid_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            activation,
            torch.nn.Conv2d(
                in_channels=svf_mid_channels,
                out_channels=svf_mid_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
            activation,
            torch.nn.Conv2d(
                in_channels=svf_mid_channels,
                out_channels=svf_out_channels,
                kernel_size=kernel_size,
                padding=kernel_size // 2,
                bias=bias,
            ),
        )
        spatially_varying_kernel_generation.append(kernel_generation_block)
        self.spatially_varying_feature.append(spatially_varying_kernel_generation)
        logger.debug(f"Added SVF block {i}: {svf_in_channels} -> {svf_out_channels}")

    self.decoder = torch.nn.ModuleList()
    global_feature_layer = global_feature_module(  # global feature layer
        input_channels=dimensions * (2 ** (depth - 1)),
        mid_channels=dimensions * (2 ** (depth - 1)),
        output_channels=dimensions * (2 ** (depth - 1)),
        kernel_size=kernel_size,
        bias=bias,
        activation=torch.nn.LeakyReLU(0.2, inplace=True),
    )
    self.decoder.append(global_feature_layer)
    for i in range(depth, 0, -1):
        if i == 2:
            up_in_channels = (dimensions // 2) * (2**i)
            up_out_channels = up_in_channels
            up_mid_channels = up_in_channels
        elif i == 1:
            up_in_channels = dimensions * 2
            up_out_channels = dimensions
            up_mid_channels = up_out_channels
        else:
            up_in_channels = (dimensions // 2) * (2**i)
            up_out_channels = up_in_channels // 2
            up_mid_channels = up_in_channels
        upsample_layer = upsample_convtranspose2d_layer(
            input_channels=up_in_channels,
            output_channels=up_mid_channels,
            kernel_size=2,
            stride=2,
            bias=bias,
        )
        conv_layer = double_convolution(
            input_channels=up_mid_channels,
            output_channels=up_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            normalization=normalization,
            activation=activation,
        )
        self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
        logger.debug(f"Added decoder block {i}: {up_in_channels} -> {up_out_channels}")
    logger.info("spatially_varying_kernel_generation_model initialization completed")

`forward(focal_surface, field)` ¶

Forward pass of the spatially varying kernel generation model.

Parameters:

focal_surface (Tensor) –

Input focal surface data. Dimension: (1, 1, H, W)
field (Tensor) –

Input field data. Dimension: (1, 6, H, W)

Returns:

sv_kernel ( list of torch.Tensor ) –

Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.

Source code in odak/learn/models/models.py

def forward(self, focal_surface, field):
    """
    Forward pass of the spatially varying kernel generation model.

    Parameters
    ----------
    focal_surface : torch.Tensor
        Input focal surface data.
        Dimension: (1, 1, H, W)

    field : torch.Tensor
        Input field data.
        Dimension: (1, 6, H, W)

    Returns
    -------
    sv_kernel : list of torch.Tensor
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.
    """
    x = self.inc(torch.cat((focal_surface, field), dim=1))
    downsampling_outputs = [focal_surface]
    downsampling_outputs.append(x)
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    sv_kernels = []
    for i, (up_layer, svf_layer) in enumerate(
        zip(self.decoder, self.spatially_varying_feature)
    ):
        if i == 0:
            global_feature = up_layer(
                downsampling_outputs[-2], downsampling_outputs[-1]
            )
            downsampling_outputs[-1] = global_feature
            sv_feature = [global_feature, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            sv_feature = [
                sv_feature[0],
                sv_feature[1],
                sv_feature[4],
                sv_feature[2],
                sv_feature[3],
            ]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
            sv_kernels.append(sv_kernel)
        else:
            x_up = up_layer[0](
                downsampling_outputs[-1],
                downsampling_outputs[2 * (self.depth + 1 - i) + 1],
            )
            x_up = up_layer[1](x_up)
            downsampling_outputs[-1] = x_up
            sv_feature = [x_up, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            if i == 1:
                sv_feature = [
                    sv_feature[0],
                    sv_feature[1],
                    sv_feature[3],
                    sv_feature[2],
                ]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim=1))
            sv_kernels.append(sv_kernel)
    return sv_kernels

`unet` ¶

Bases: Module

A U-Net model, heavily inspired from https://github.com/milesial/Pytorch-UNet/tree/master/unet and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.

Source code in odak/learn/models/models.py

class unet(torch.nn.Module):
    """
    A U-Net model, heavily inspired from `https://github.com/milesial/Pytorch-UNet/tree/master/unet` and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.
    """

    def __init__(
        self,
        depth=4,
        dimensions=64,
        input_channels=2,
        output_channels=1,
        bilinear=False,
        kernel_size=3,
        bias=False,
        activation=torch.nn.ReLU(inplace=True),
    ):
        """
        Initialize the U-Net model.

        Parameters
        ----------
        depth : int, optional
            Number of upsampling and downsampling layers. Default is 4.
        dimensions : int, optional
            Number of dimensions. Default is 64.
        input_channels : int, optional
            Number of input channels. Default is 2.
        output_channels : int, optional
            Number of output channels. Default is 1.
        bilinear : bool, optional
            Uses bilinear upsampling in upsampling layers when set True. Default is False.
        kernel_size : int, optional
            Kernel size for convolutional layers. Default is 3.
        bias : bool, optional
            Set True to let convolutional layers learn a bias term. Default is False.
        activation : torch.nn.Module, optional
            Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).
        """
        super(unet, self).__init__()
        logger.info(
            f"Initializing U-Net: depth={depth}, dimensions={dimensions}, "
            f"input_channels={input_channels}, output_channels={output_channels}, "
            f"bilinear={bilinear}, kernel_size={kernel_size}"
        )
        self.inc = double_convolution(
            input_channels=input_channels,
            mid_channels=dimensions,
            output_channels=dimensions,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )

        self.downsampling_layers = torch.nn.ModuleList()
        self.upsampling_layers = torch.nn.ModuleList()
        for i in range(depth):  # downsampling layers
            in_channels = dimensions * (2**i)
            out_channels = dimensions * (2 ** (i + 1))
            down_layer = downsample_layer(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
            )
            self.downsampling_layers.append(down_layer)
            logger.debug(f"Added downsampling layer {i}: {in_channels} -> {out_channels}")

        for i in range(depth - 1, -1, -1):  # upsampling layers
            up_in_channels = dimensions * (2 ** (i + 1))
            up_out_channels = dimensions * (2**i)
            up_layer = upsample_layer(
                up_in_channels,
                up_out_channels,
                kernel_size=kernel_size,
                bias=bias,
                activation=activation,
                bilinear=bilinear,
            )
            self.upsampling_layers.append(up_layer)
            logger.debug(f"Added upsampling layer: {up_in_channels} -> {up_out_channels}")
        self.outc = torch.nn.Conv2d(
            dimensions,
            output_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            bias=bias,
        )
        logger.info("U-Net initialization completed")

    def forward(self, x):
        """
        Forward pass of the U-Net.

        Parameters
        ----------
        x : torch.Tensor
            Input data.

        Returns
        -------
        result : torch.Tensor
            Estimated output.
        """
        downsampling_outputs = [self.inc(x)]
        for down_layer in self.downsampling_layers:
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate((self.upsampling_layers)):
            x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])
        result = self.outc(x_up)
        return result

`init(depth=4, dimensions=64, input_channels=2, output_channels=1, bilinear=False, kernel_size=3, bias=False, activation=torch.nn.ReLU(inplace=True))` ¶

Initialize the U-Net model.

Parameters:

depth (int, default: 4 ) –

Number of upsampling and downsampling layers. Default is 4.
dimensions (int, default: 64 ) –

Number of dimensions. Default is 64.
input_channels (int, default: 2 ) –

Number of input channels. Default is 2.
output_channels (int, default: 1 ) –

Number of output channels. Default is 1.
bilinear (bool, default: False ) –

Uses bilinear upsampling in upsampling layers when set True. Default is False.
kernel_size (int, default: 3 ) –

Kernel size for convolutional layers. Default is 3.
bias (bool, default: False ) –

Set True to let convolutional layers learn a bias term. Default is False.
activation (Module, default: ReLU(inplace=True) ) –

Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).

Source code in odak/learn/models/models.py

def __init__(
    self,
    depth=4,
    dimensions=64,
    input_channels=2,
    output_channels=1,
    bilinear=False,
    kernel_size=3,
    bias=False,
    activation=torch.nn.ReLU(inplace=True),
):
    """
    Initialize the U-Net model.

    Parameters
    ----------
    depth : int, optional
        Number of upsampling and downsampling layers. Default is 4.
    dimensions : int, optional
        Number of dimensions. Default is 64.
    input_channels : int, optional
        Number of input channels. Default is 2.
    output_channels : int, optional
        Number of output channels. Default is 1.
    bilinear : bool, optional
        Uses bilinear upsampling in upsampling layers when set True. Default is False.
    kernel_size : int, optional
        Kernel size for convolutional layers. Default is 3.
    bias : bool, optional
        Set True to let convolutional layers learn a bias term. Default is False.
    activation : torch.nn.Module, optional
        Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()). Default is torch.nn.ReLU(inplace=True).
    """
    super(unet, self).__init__()
    logger.info(
        f"Initializing U-Net: depth={depth}, dimensions={dimensions}, "
        f"input_channels={input_channels}, output_channels={output_channels}, "
        f"bilinear={bilinear}, kernel_size={kernel_size}"
    )
    self.inc = double_convolution(
        input_channels=input_channels,
        mid_channels=dimensions,
        output_channels=dimensions,
        kernel_size=kernel_size,
        bias=bias,
        activation=activation,
    )

    self.downsampling_layers = torch.nn.ModuleList()
    self.upsampling_layers = torch.nn.ModuleList()
    for i in range(depth):  # downsampling layers
        in_channels = dimensions * (2**i)
        out_channels = dimensions * (2 ** (i + 1))
        down_layer = downsample_layer(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
        )
        self.downsampling_layers.append(down_layer)
        logger.debug(f"Added downsampling layer {i}: {in_channels} -> {out_channels}")

    for i in range(depth - 1, -1, -1):  # upsampling layers
        up_in_channels = dimensions * (2 ** (i + 1))
        up_out_channels = dimensions * (2**i)
        up_layer = upsample_layer(
            up_in_channels,
            up_out_channels,
            kernel_size=kernel_size,
            bias=bias,
            activation=activation,
            bilinear=bilinear,
        )
        self.upsampling_layers.append(up_layer)
        logger.debug(f"Added upsampling layer: {up_in_channels} -> {up_out_channels}")
    self.outc = torch.nn.Conv2d(
        dimensions,
        output_channels,
        kernel_size=kernel_size,
        padding=kernel_size // 2,
        bias=bias,
    )
    logger.info("U-Net initialization completed")

`forward(x)` ¶

Forward pass of the U-Net.

Parameters:

x (Tensor) –

Input data.

Returns:

result ( Tensor ) –

Estimated output.

Source code in odak/learn/models/models.py

def forward(self, x):
    """
    Forward pass of the U-Net.

    Parameters
    ----------
    x : torch.Tensor
        Input data.

    Returns
    -------
    result : torch.Tensor
        Estimated output.
    """
    downsampling_outputs = [self.inc(x)]
    for down_layer in self.downsampling_layers:
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate((self.upsampling_layers)):
        x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])
    result = self.outc(x_up)
    return result

`upsample_convtranspose2d_layer` ¶

Bases: Module

An upsampling convtranspose2d layer.

Source code in odak/learn/models/components.py

class upsample_convtranspose2d_layer(torch.nn.Module):
    """
    An upsampling convtranspose2d layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=2,
        stride=2,
        bias=False,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        """
        super().__init__()
        self.up = torch.nn.ConvTranspose2d(
            in_channels=input_channels,
            out_channels=output_channels,
            bias=bias,
            kernel_size=kernel_size,
            stride=stride,
        )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        result = x1 + x2
        return result

`init(input_channels, output_channels, kernel_size=2, stride=2, bias=False)` ¶

A downscaling component with a double convolution.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=2,
    stride=2,
    bias=False,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    """
    super().__init__()
    self.up = torch.nn.ConvTranspose2d(
        in_channels=input_channels,
        out_channels=output_channels,
        bias=bias,
        kernel_size=kernel_size,
        stride=stride,
    )

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Result of the forward operation

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    result = x1 + x2
    return result

`upsample_layer` ¶

Bases: Module

An upsampling convolutional layer.

Source code in odak/learn/models/components.py

class upsample_layer(torch.nn.Module):
    """
    An upsampling convolutional layer.
    """

    def __init__(
        self,
        input_channels,
        output_channels,
        kernel_size=3,
        bias=False,
        normalization=False,
        activation=torch.nn.ReLU(),
        bilinear=True,
    ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        bilinear        : bool
                          If set to True, bilinear sampling is used.
        """
        super(upsample_layer, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(
                scale_factor=2, mode="bilinear", align_corners=True
            )
            self.conv = double_convolution(
                input_channels=input_channels + output_channels,
                mid_channels=input_channels // 2,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )
        else:
            self.up = torch.nn.ConvTranspose2d(
                input_channels, input_channels // 2, kernel_size=2, stride=2
            )
            self.conv = double_convolution(
                input_channels=input_channels,
                mid_channels=output_channels,
                output_channels=output_channels,
                kernel_size=kernel_size,
                normalization=normalization,
                bias=bias,
                activation=activation,
            )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(
            x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
        )
        x = torch.cat([x2, x1], dim=1)
        result = self.conv(x)
        return result

`init(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU(), bilinear=True)` ¶

A downscaling component with a double convolution.

Parameters:

input_channels –
```
          Number of input channels.
```
output_channels (int) –
```
          Number of output channels.
```
kernel_size –
```
          Kernel size.
```

bias –

          Set to True to let convolutional layers have bias term.

normalization –

          If True, adds a Batch Normalization layer after the convolutional layer.

activation –

          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().

bilinear –

          If set to True, bilinear sampling is used.

Source code in odak/learn/models/components.py

def __init__(
    self,
    input_channels,
    output_channels,
    kernel_size=3,
    bias=False,
    normalization=False,
    activation=torch.nn.ReLU(),
    bilinear=True,
):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    bilinear        : bool
                      If set to True, bilinear sampling is used.
    """
    super(upsample_layer, self).__init__()
    if bilinear:
        self.up = torch.nn.Upsample(
            scale_factor=2, mode="bilinear", align_corners=True
        )
        self.conv = double_convolution(
            input_channels=input_channels + output_channels,
            mid_channels=input_channels // 2,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )
    else:
        self.up = torch.nn.ConvTranspose2d(
            input_channels, input_channels // 2, kernel_size=2, stride=2
        )
        self.conv = double_convolution(
            input_channels=input_channels,
            mid_channels=output_channels,
            output_channels=output_channels,
            kernel_size=kernel_size,
            normalization=normalization,
            bias=bias,
            activation=activation,
        )

`forward(x1, x2)` ¶

Forward model.

Parameters:

x1 –
```
         First input data.
```
x2 –
```
         Second input data.
```

Returns:

result ( tensor ) –

Result of the forward operation

Source code in odak/learn/models/components.py

def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(
        x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]
    )
    x = torch.cat([x2, x1], dim=1)
    result = self.conv(x)
    return result

`gaussian(x, multiplier=1.0)` ¶

A Gaussian non-linear activation. For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

Parameters:

x –
```
       Input data.
```
multiplier –
```
       Multiplier.
```

Returns:

result ( float or tensor ) –

Ouput data.

Source code in odak/learn/models/components.py

def gaussian(x, multiplier=1.0):
    """
    A Gaussian non-linear activation.
    For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

    Parameters
    ----------
    x            : float or torch.tensor
                   Input data.
    multiplier   : float or torch.tensor
                   Multiplier.

    Returns
    -------
    result       : float or torch.tensor
                   Ouput data.
    """
    result = torch.exp(-((multiplier * x) ** 2))
    return result

`swish(x)` ¶

A swish non-linear activation. For more details: https://en.wikipedia.org/wiki/Swish_function

Parameters:

x –
```
         Input.
```

Returns:

out ( float or tensor ) –

Output.

Source code in odak/learn/models/components.py

def swish(x):
    """
    A swish non-linear activation.
    For more details: https://en.wikipedia.org/wiki/Swish_function

    Parameters
    -----------
    x              : float or torch.tensor
                     Input.

    Returns
    -------
    out            : float or torch.tensor
                     Output.
    """
    out = x * torch.sigmoid(x)
    return out

odak.learn.models

channel_gate ¶

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max']) ¶

forward(x) ¶

convolution_layer ¶

__init__(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=False, activation=torch.nn.ReLU()) ¶

forward(x) ¶

convolutional_block_attention ¶

Flatten ¶

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False) ¶

forward(x) ¶

double_convolution ¶

__init__(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU()) ¶

forward(x) ¶

downsample_layer ¶

__init__(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU()) ¶

forward(x) ¶

gaussian_2d ¶

__init__(number_of_elements=10) ¶

forward(x, y, residual=1e-06) ¶

initialize_parameters_uniformly(ranges=None) ¶

gaussian_3d_volume ¶

__init__(number_of_elements=10, initial_centers=None, initial_angles=None, initial_scales=None, initial_alphas=None) ¶

forward(points, test=False) ¶

initialize_parameters(centers=None, angles=None, scales=None, alphas=None, device=torch.device('cpu')) ¶

load_weights(weights_filename=None, device=torch.device('cpu')) ¶

optimize(points, ground_truth, loss_weights, learning_rate=0.01, number_of_epochs=10, scheduler_power=1, save_at_every=1, max_norm=None, weights_filename=None) ¶

save_weights(weights_filename) ¶

Save model weights to current directory with filename 'model_weights.pth'¶

Save model weights to home directory using ~ notation¶

gaussians_2d ¶

__init__(number_of_elements=10, logger=None) ¶

forward(x, y, test=False) ¶

load_weights(weights_filename=None, device=torch.device('cpu')) ¶

save_weights(weights_filename) ¶

global_feature_module ¶

__init__(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU()) ¶

forward(x1, x2) ¶

global_transformations ¶

__init__(input_channels, output_channels) ¶

forward(x1, x2) ¶

multi_layer_perceptron ¶

__init__(dimensions, activation=torch.nn.ReLU(), bias=False, model_type='conventional', siren_multiplier=1.0, input_multiplier=None) ¶

forward(x) ¶

non_local_layer ¶

__init__(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False) ¶

forward(x) ¶

normalization ¶

__init__(dim=1) ¶

forward(x) ¶

positional_encoder ¶

__init__(L) ¶

forward(x) ¶

residual_attention_layer ¶

__init__(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU()) ¶

forward(x0, x1) ¶

residual_layer ¶

__init__(input_channels=2, mid_channels=16, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU()) ¶

forward(x) ¶

spatial_gate ¶

__init__() ¶

channel_pool(x) ¶

forward(x) ¶

spatially_adaptive_convolution ¶

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True)) ¶

forward(x, sv_kernel_feature) ¶

spatially_adaptive_module ¶

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True)) ¶

forward(x, sv_kernel_feature) ¶

spatially_adaptive_unet ¶

__init__(depth=3, dimensions=8, input_channels=6, out_channels=6, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True)) ¶

forward(sv_kernel, field) ¶

spatially_varying_kernel_generation_model ¶

__init__(depth=3, dimensions=8, input_channels=7, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True)) ¶

forward(focal_surface, field) ¶

unet ¶

__init__(depth=4, dimensions=64, input_channels=2, output_channels=1, bilinear=False, kernel_size=3, bias=False, activation=torch.nn.ReLU(inplace=True)) ¶

forward(x) ¶

upsample_convtranspose2d_layer ¶

__init__(input_channels, output_channels, kernel_size=2, stride=2, bias=False) ¶

`channel_gate` ¶

`init(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])` ¶

`forward(x)` ¶

`convolution_layer` ¶

`init(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=False, activation=torch.nn.ReLU())` ¶

`forward(x)` ¶

`convolutional_block_attention` ¶

`Flatten` ¶

`init(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)` ¶

`forward(x)` ¶

`double_convolution` ¶

`init(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

`forward(x)` ¶

`downsample_layer` ¶

`init(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

`forward(x)` ¶

`gaussian_2d` ¶

`init(number_of_elements=10)` ¶

`forward(x, y, residual=1e-06)` ¶

`initialize_parameters_uniformly(ranges=None)` ¶

`gaussian_3d_volume` ¶

`init(number_of_elements=10, initial_centers=None, initial_angles=None, initial_scales=None, initial_alphas=None)` ¶

`forward(points, test=False)` ¶

`initialize_parameters(centers=None, angles=None, scales=None, alphas=None, device=torch.device('cpu'))` ¶

`load_weights(weights_filename=None, device=torch.device('cpu'))` ¶

`optimize(points, ground_truth, loss_weights, learning_rate=0.01, number_of_epochs=10, scheduler_power=1, save_at_every=1, max_norm=None, weights_filename=None)` ¶

`save_weights(weights_filename)` ¶

`gaussians_2d` ¶

`init(number_of_elements=10, logger=None)` ¶

`forward(x, y, test=False)` ¶

`load_weights(weights_filename=None, device=torch.device('cpu'))` ¶

`save_weights(weights_filename)` ¶

`global_feature_module` ¶

`init(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())` ¶

`forward(x1, x2)` ¶

`global_transformations` ¶

`init(input_channels, output_channels)` ¶

`forward(x1, x2)` ¶

`multi_layer_perceptron` ¶

`init(dimensions, activation=torch.nn.ReLU(), bias=False, model_type='conventional', siren_multiplier=1.0, input_multiplier=None)` ¶

`forward(x)` ¶

`non_local_layer` ¶

`init(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)` ¶

`forward(x)` ¶

`normalization` ¶

`init(dim=1)` ¶

`forward(x)` ¶

`positional_encoder` ¶

`init(L)` ¶

`forward(x)` ¶

`residual_attention_layer` ¶

`init(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())` ¶

`forward(x0, x1)` ¶

`residual_layer` ¶

`init(input_channels=2, mid_channels=16, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())` ¶

`forward(x)` ¶

`spatial_gate` ¶

`init()` ¶

`channel_pool(x)` ¶

`forward(x)` ¶

`spatially_adaptive_convolution` ¶

`init(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

`forward(x, sv_kernel_feature)` ¶

`spatially_adaptive_module` ¶

`init(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

`forward(x, sv_kernel_feature)` ¶

`spatially_adaptive_unet` ¶

`init(depth=3, dimensions=8, input_channels=6, out_channels=6, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

`forward(sv_kernel, field)` ¶

`spatially_varying_kernel_generation_model` ¶

`init(depth=3, dimensions=8, input_channels=7, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))` ¶

`forward(focal_surface, field)` ¶

`unet` ¶

`init(depth=4, dimensions=64, input_channels=2, output_channels=1, bilinear=False, kernel_size=3, bias=False, activation=torch.nn.ReLU(inplace=True))` ¶

`forward(x)` ¶

`upsample_convtranspose2d_layer` ¶

`init(input_channels, output_channels, kernel_size=2, stride=2, bias=False)` ¶

`forward(x1, x2)` ¶

`upsample_layer` ¶

`init(input_channels, output_channels, kernel_size=3, bias=False, normalization=False, activation=torch.nn.ReLU(), bilinear=True)` ¶