| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
|
|
|
|
| class Conv3x3GNReLU(nn.Module): |
| def __init__(self, in_channels, out_channels, upsample=False): |
| super().__init__() |
| self.upsample = upsample |
| self.block = nn.Sequential( |
| nn.Conv2d( |
| in_channels, out_channels, (3, 3), stride=1, padding=1, bias=False |
| ), |
| nn.GroupNorm(32, out_channels), |
| nn.ReLU(inplace=True), |
| ) |
|
|
| def forward(self, x): |
| x = self.block(x) |
| if self.upsample: |
| x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True) |
| return x |
|
|
|
|
| class FPNBlock(nn.Module): |
| def __init__(self, pyramid_channels, skip_channels): |
| super().__init__() |
| self.skip_conv = nn.Conv2d(skip_channels, pyramid_channels, kernel_size=1) |
|
|
| def forward(self, x, skip=None): |
| x = F.interpolate(x, scale_factor=2, mode="nearest") |
| skip = self.skip_conv(skip) |
| x = x + skip |
| return x |
|
|
|
|
| class SegmentationBlock(nn.Module): |
| def __init__(self, in_channels, out_channels, n_upsamples=0): |
| super().__init__() |
|
|
| blocks = [Conv3x3GNReLU(in_channels, out_channels, upsample=bool(n_upsamples))] |
|
|
| if n_upsamples > 1: |
| for _ in range(1, n_upsamples): |
| blocks.append(Conv3x3GNReLU(out_channels, out_channels, upsample=True)) |
|
|
| self.block = nn.Sequential(*blocks) |
|
|
| def forward(self, x): |
| return self.block(x) |
|
|
|
|
| class MergeBlock(nn.Module): |
| def __init__(self, policy): |
| super().__init__() |
| if policy not in ["add", "cat"]: |
| raise ValueError( |
| "`merge_policy` must be one of: ['add', 'cat'], got {}".format( |
| policy |
| ) |
| ) |
| self.policy = policy |
|
|
| def forward(self, x): |
| if self.policy == 'add': |
| return sum(x) |
| elif self.policy == 'cat': |
| return torch.cat(x, dim=1) |
| else: |
| raise ValueError( |
| "`merge_policy` must be one of: ['add', 'cat'], got {}".format(self.policy) |
| ) |
|
|
|
|
| class FPNDecoder(nn.Module): |
| def __init__( |
| self, |
| encoder_channels, |
| encoder_depth=5, |
| pyramid_channels=256, |
| segmentation_channels=128, |
| dropout=0.2, |
| merge_policy="add", |
| ): |
| super().__init__() |
|
|
| self.out_channels = segmentation_channels if merge_policy == "add" else segmentation_channels * 4 |
| if encoder_depth < 3: |
| raise ValueError("Encoder depth for FPN decoder cannot be less than 3, got {}.".format(encoder_depth)) |
|
|
| encoder_channels = encoder_channels[::-1] |
| encoder_channels = encoder_channels[:encoder_depth + 1] |
|
|
| self.p5 = nn.Conv2d(encoder_channels[0], pyramid_channels, kernel_size=1) |
| self.p4 = FPNBlock(pyramid_channels, encoder_channels[1]) |
| self.p3 = FPNBlock(pyramid_channels, encoder_channels[2]) |
| self.p2 = FPNBlock(pyramid_channels, encoder_channels[3]) |
|
|
| self.seg_blocks = nn.ModuleList([ |
| SegmentationBlock(pyramid_channels, segmentation_channels, n_upsamples=n_upsamples) |
| for n_upsamples in [3, 2, 1, 0] |
| ]) |
|
|
| self.merge = MergeBlock(merge_policy) |
| self.dropout = nn.Dropout2d(p=dropout, inplace=True) |
|
|
| def forward(self, *features): |
| c2, c3, c4, c5 = features[-4:] |
|
|
| p5 = self.p5(c5) |
| p4 = self.p4(p5, c4) |
| p3 = self.p3(p4, c3) |
| p2 = self.p2(p3, c2) |
|
|
| feature_pyramid = [seg_block(p) for seg_block, p in zip(self.seg_blocks, [p5, p4, p3, p2])] |
| x = self.merge(feature_pyramid) |
| x = self.dropout(x) |
|
|
| return x |
|
|