Instructions to use p1atdev/MangaLineExtraction-hf with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use p1atdev/MangaLineExtraction-hf with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-to-image", model="p1atdev/MangaLineExtraction-hf", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("p1atdev/MangaLineExtraction-hf", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """PyTorch MLE (Mnaga Line Extraction) model""" | |
| from dataclasses import dataclass | |
| import torch | |
| import torch.nn as nn | |
| from transformers import PreTrainedModel | |
| from transformers.modeling_outputs import ModelOutput, BaseModelOutput | |
| from transformers.activations import ACT2FN | |
| from .configuration_mle import MLEConfig | |
| class MLEModelOutput(ModelOutput): | |
| last_hidden_state: torch.FloatTensor | None = None | |
| class MLEForAnimeLineExtractionOutput(ModelOutput): | |
| last_hidden_state: torch.FloatTensor | None = None | |
| pixel_values: torch.Tensor | None = None | |
| class MLEBatchNorm(nn.Module): | |
| def __init__( | |
| self, | |
| config: MLEConfig, | |
| in_features: int, | |
| ): | |
| super().__init__() | |
| self.norm = nn.BatchNorm2d(in_features, eps=config.batch_norm_eps) | |
| # the original model uses leaky_relu | |
| if config.hidden_act == "leaky_relu": | |
| self.act_fn = nn.LeakyReLU(negative_slope=config.negative_slope) | |
| else: | |
| self.act_fn = ACT2FN[config.hidden_act] | |
| def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: | |
| hidden_states = self.norm(hidden_states) | |
| hidden_states = self.act_fn(hidden_states) | |
| return hidden_states | |
| class MLEResBlock(nn.Module): | |
| def __init__( | |
| self, | |
| config: MLEConfig, | |
| in_channels: int, | |
| out_channels: int, | |
| stride_size: int, | |
| ): | |
| super().__init__() | |
| self.norm1 = MLEBatchNorm(config, in_channels) | |
| self.conv1 = nn.Conv2d( | |
| in_channels, | |
| out_channels, | |
| config.block_kernel_size, | |
| stride=stride_size, | |
| padding=config.block_kernel_size // 2, | |
| ) | |
| self.norm2 = MLEBatchNorm(config, out_channels) | |
| self.conv2 = nn.Conv2d( | |
| out_channels, | |
| out_channels, | |
| config.block_kernel_size, | |
| stride=1, | |
| padding=config.block_kernel_size // 2, | |
| ) | |
| if in_channels != out_channels or stride_size != 1: | |
| self.resize = nn.Conv2d( | |
| in_channels, | |
| out_channels, | |
| kernel_size=1, | |
| stride=stride_size, | |
| ) | |
| else: | |
| self.resize = None | |
| def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: | |
| output = self.norm1(hidden_states) | |
| output = self.conv1(output) | |
| output = self.norm2(output) | |
| output = self.conv2(output) | |
| if self.resize is not None: | |
| resized_input = self.resize(hidden_states) | |
| output += resized_input | |
| else: | |
| output += hidden_states | |
| return output | |
| class MLEEncoderLayer(nn.Module): | |
| def __init__( | |
| self, | |
| config: MLEConfig, | |
| in_features: int, | |
| out_features: int, | |
| num_layers: int, | |
| stride_sizes: list[int], | |
| ): | |
| super().__init__() | |
| self.blocks = nn.ModuleList( | |
| [ | |
| MLEResBlock( | |
| config, | |
| in_channels=in_features if i == 0 else out_features, | |
| out_channels=out_features, | |
| stride_size=stride_sizes[i], | |
| ) | |
| for i in range(num_layers) | |
| ] | |
| ) | |
| def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: | |
| for block in self.blocks: | |
| hidden_states = block(hidden_states) | |
| return hidden_states | |
| class MLEEncoder(nn.Module): | |
| def __init__( | |
| self, | |
| config: MLEConfig, | |
| ): | |
| super().__init__() | |
| self.layers = nn.ModuleList( | |
| [ | |
| MLEEncoderLayer( | |
| config, | |
| in_features=( | |
| config.in_channels | |
| if i == 0 | |
| else config.in_channels | |
| * config.block_patch_size | |
| * (config.upsample_ratio ** (i - 1)) | |
| ), | |
| out_features=config.in_channels | |
| * config.block_patch_size | |
| * (config.upsample_ratio**i), | |
| num_layers=num_layers, | |
| stride_sizes=( | |
| [ | |
| 1 if i_layer < num_layers - 1 else 2 | |
| for i_layer in range(num_layers) | |
| ] | |
| if i > 0 | |
| else [1 for _ in range(num_layers)] | |
| ), | |
| ) | |
| for i, num_layers in enumerate(config.num_encoder_layers) | |
| ] | |
| ) | |
| def forward( | |
| self, hidden_states: torch.Tensor | |
| ) -> tuple[torch.Tensor, tuple[torch.Tensor, ...]]: | |
| all_hidden_states: tuple[torch.Tensor, ...] = () | |
| for layer in self.layers: | |
| hidden_states = layer(hidden_states) | |
| all_hidden_states += (hidden_states,) | |
| return hidden_states, all_hidden_states | |
| class MLEUpsampleBlock(nn.Module): | |
| def __init__(self, config: MLEConfig, in_features: int, out_features: int): | |
| super().__init__() | |
| self.norm = MLEBatchNorm(config, in_features=in_features) | |
| self.conv = nn.Conv2d( | |
| in_features, | |
| out_features, | |
| config.block_kernel_size, | |
| stride=1, | |
| padding=config.block_kernel_size // 2, | |
| ) | |
| self.upsample = nn.Upsample(scale_factor=config.upsample_ratio) | |
| def forward(self, hidden_states: torch.Tensor): | |
| output = self.norm(hidden_states) | |
| output = self.conv(output) | |
| output = self.upsample(output) | |
| return output | |
| class MLEUpsampleResBlock(nn.Module): | |
| def __init__(self, config: MLEConfig, in_features: int, out_features: int): | |
| super().__init__() | |
| self.upsample = MLEUpsampleBlock( | |
| config, in_features=in_features, out_features=out_features | |
| ) | |
| self.norm = MLEBatchNorm(config, in_features=out_features) | |
| self.conv = nn.Conv2d( | |
| out_features, | |
| out_features, | |
| config.block_kernel_size, | |
| stride=1, | |
| padding=config.block_kernel_size // 2, | |
| ) | |
| if in_features != out_features: | |
| self.resize = nn.Sequential( | |
| nn.Conv2d( | |
| in_features, | |
| out_features, | |
| kernel_size=1, | |
| stride=1, | |
| ), | |
| nn.Upsample(scale_factor=config.upsample_ratio), | |
| ) | |
| else: | |
| self.resize = None | |
| def forward(self, hidden_states: torch.Tensor): | |
| output = self.upsample(hidden_states) | |
| output = self.norm(output) | |
| output = self.conv(output) | |
| if self.resize is not None: | |
| output += self.resize(hidden_states) | |
| return output | |
| class MLEDecoderLayer(nn.Module): | |
| def __init__( | |
| self, | |
| config: MLEConfig, | |
| in_features: int, | |
| out_features: int, | |
| num_layers: int, | |
| ): | |
| super().__init__() | |
| self.blocks = nn.ModuleList( | |
| [ | |
| ( | |
| MLEResBlock( | |
| config, | |
| in_channels=out_features, | |
| out_channels=out_features, | |
| stride_size=1, | |
| ) | |
| if i > 0 | |
| else MLEUpsampleResBlock( | |
| config, | |
| in_features=in_features, | |
| out_features=out_features, | |
| ) | |
| ) | |
| for i in range(num_layers) | |
| ] | |
| ) | |
| def forward( | |
| self, hidden_states: torch.Tensor, shortcut_states: torch.Tensor | |
| ) -> torch.Tensor: | |
| for block in self.blocks: | |
| hidden_states = block(hidden_states) | |
| hidden_states += shortcut_states | |
| return hidden_states | |
| class MLEDecoderHead(nn.Module): | |
| def __init__(self, config: MLEConfig, num_layers: int): | |
| super().__init__() | |
| self.layer = MLEEncoderLayer( | |
| config, | |
| in_features=config.block_patch_size, | |
| out_features=config.last_hidden_channels, | |
| stride_sizes=[1 for _ in range(num_layers)], | |
| num_layers=num_layers, | |
| ) | |
| self.norm = MLEBatchNorm(config, in_features=config.last_hidden_channels) | |
| self.conv = nn.Conv2d( | |
| config.last_hidden_channels, | |
| out_channels=1, | |
| kernel_size=1, | |
| stride=1, | |
| ) | |
| def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: | |
| hidden_states = self.layer(hidden_states) | |
| hidden_states = self.norm(hidden_states) | |
| pixel_values = self.conv(hidden_states) | |
| return pixel_values | |
| class MLEDecoder(nn.Module): | |
| def __init__( | |
| self, | |
| config: MLEConfig, | |
| ): | |
| super().__init__() | |
| encoder_output_channels = ( | |
| config.in_channels | |
| * config.block_patch_size | |
| * (config.upsample_ratio ** (len(config.num_encoder_layers) - 1)) | |
| ) | |
| upsample_ratio = config.upsample_ratio | |
| num_decoder_layers = config.num_decoder_layers | |
| self.layers = nn.ModuleList( | |
| [ | |
| ( | |
| MLEDecoderLayer( | |
| config, | |
| in_features=encoder_output_channels // (upsample_ratio**i), | |
| out_features=encoder_output_channels | |
| // (upsample_ratio ** (i + 1)), | |
| num_layers=num_layers, | |
| ) | |
| if i < len(num_decoder_layers) - 1 | |
| else MLEDecoderHead( | |
| config, | |
| num_layers=num_layers, | |
| ) | |
| ) | |
| for i, num_layers in enumerate(num_decoder_layers) | |
| ] | |
| ) | |
| def forward( | |
| self, | |
| last_hidden_states: torch.Tensor, | |
| encoder_hidden_states: tuple[torch.Tensor, ...], | |
| ) -> torch.Tensor: | |
| hidden_states = last_hidden_states | |
| num_encoder_hidden_states = len(encoder_hidden_states) # 5 | |
| for i, layer in enumerate(self.layers): | |
| if i < len(self.layers) - 1: | |
| hidden_states = layer( | |
| hidden_states, | |
| # 0, 1, 2, 3, 4 | |
| # ↓ ↓ ↓ ↓ ↓ | |
| # 8, 7, 6, 5, 5 | |
| encoder_hidden_states[num_encoder_hidden_states - 2 - i], | |
| ) | |
| else: | |
| # decoder head | |
| hidden_states = layer(hidden_states) | |
| return hidden_states | |
| class MLEPretrainedModel(PreTrainedModel): | |
| config_class = MLEConfig | |
| base_model_prefix = "model" | |
| supports_gradient_checkpointing = True | |
| class MLEModel(MLEPretrainedModel): | |
| def __init__(self, config: MLEConfig): | |
| super().__init__(config) | |
| self.config = config | |
| self.encoder = MLEEncoder(config) | |
| self.decoder = MLEDecoder(config) | |
| # Initialize weights and apply final processing | |
| self.post_init() | |
| def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: | |
| encoder_output, all_hidden_states = self.encoder(pixel_values) | |
| decoder_output = self.decoder(encoder_output, all_hidden_states) | |
| return decoder_output | |
| class MLEForAnimeLineExtraction(MLEPretrainedModel): | |
| def __init__(self, config: MLEConfig): | |
| super().__init__(config) | |
| self.model = MLEModel(config) | |
| def postprocess(self, output_tensor: torch.Tensor, input_shape: tuple[int, int]): | |
| pixel_values = output_tensor[:, 0, :, :] | |
| pixel_values = torch.clip(pixel_values, 0, 255) | |
| pixel_values = pixel_values[:, 0 : input_shape[0], 0 : input_shape[1]] | |
| return pixel_values | |
| def forward( | |
| self, pixel_values: torch.Tensor, return_dict: bool = True | |
| ) -> tuple[torch.Tensor, ...] | MLEForAnimeLineExtractionOutput: | |
| # height, width | |
| input_image_size = (pixel_values.shape[2], pixel_values.shape[3]) | |
| model_output = self.model(pixel_values) | |
| if not return_dict: | |
| return (model_output, self.postprocess(model_output, input_image_size)) | |
| else: | |
| return MLEForAnimeLineExtractionOutput( | |
| last_hidden_state=model_output, | |
| pixel_values=self.postprocess(model_output, input_image_size), | |
| ) | |