Constructs DeepLabV3 semantic segmentation models with a ResNet backbone as described in Rethinking Atrous Convolution for Semantic Image Segmentation. These models employ atrous spatial pyramid pooling to capture multi-scale context.
model_deeplabv3_resnet50(
pretrained = FALSE,
progress = TRUE,
num_classes = 21,
aux_loss = NULL,
pretrained_backbone = FALSE,
...
)
model_deeplabv3_resnet101(
pretrained = FALSE,
progress = TRUE,
num_classes = 21,
aux_loss = NULL,
pretrained_backbone = FALSE,
...
)
(bool): If TRUE, returns a model pre-trained on ImageNet.
(bool): If TRUE, displays a progress bar of the download to stderr.
Number of output classes.
Logical or NULL. If TRUE
, includes an auxiliary classifier branch.
If NULL
(default), the presence of aux classifier is inferred from pretrained weights.
If TRUE
and pretrained = FALSE
, loads
ImageNet weights for the ResNet backbone.
Other parameters passed to the model implementation.
model_deeplabv3_resnet50()
: DeepLabV3 with ResNet-50 backbone
model_deeplabv3_resnet101()
: DeepLabV3 with ResNet-101 backbone
Semantic image segmentation with 21 output classes by default (COCO).
The models expect input tensors of shape (batch_size, 3, H, W)
. Typical
training uses 520x520 images.
Other semantic_segmentation_model:
model_fcn_resnet
if (FALSE) { # \dontrun{
library(magrittr)
norm_mean <- c(0.485, 0.456, 0.406) # ImageNet normalization constants, see
# https://pytorch.org/vision/stable/models.html
norm_std <- c(0.229, 0.224, 0.225)
# Use a publicly available image of an animal
wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/"
url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg"
img <- base_loader(paste0(wmc,url))
input <- img %>%
transform_to_tensor() %>%
transform_resize(c(520, 520)) %>%
transform_normalize(norm_mean, norm_std)
batch <- input$unsqueeze(1) # Add batch dimension (1, 3, H, W)
# DeepLabV3 with ResNet-50
model <- model_deeplabv3_resnet50(pretrained = TRUE)
model$eval()
output <- model(batch)
# visualize the result
# `draw_segmentation_masks()` turns the torch_float output into a boolean mask internaly:
segmented <- draw_segmentation_masks(input, output$out$squeeze(1))
tensor_image_display(segmented)
# Show most frequent class
mask_id <- output$out$argmax(dim = 2) # (1, H, W)
class_contingency_with_background <- mask_id$view(-1)$bincount()
class_contingency_with_background[1] <- 0L # we clean the counter for background class id 1
top_class_index <- class_contingency_with_background$argmax()$item()
cli::cli_inform("Majority class {.pkg ResNet-50}: {.emph {model$classes[top_class_index]}}")
# DeepLabV3 with ResNet-101 (same steps)
model <- model_deeplabv3_resnet101(pretrained = TRUE)
model$eval()
output <- model(batch)
segmented <- draw_segmentation_masks(input, output$out$squeeze(1))
tensor_image_display(segmented)
mask_id <- output$out$argmax(dim = 2)
class_contingency_with_background <- mask_id$view(-1)$bincount()
class_contingency_with_background[1] <- 0L # we clean the counter for background class id 1
top_class_index <- class_contingency_with_background$argmax()$item()
cli::cli_inform("Majority class {.pkg ResNet-101}: {.emph {model$classes[top_class_index]}}")
} # }