Zero-Shot Image Classification
Transformers
ONNX
Chinese
English
m2_encoder
feature-extraction
multimodal
image-text-retrieval
bilingual
chinese
english
vision-language
custom-code
custom_code
Eval Results (legacy)
Instructions to use malusama/M2-Encoder-0.4B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use malusama/M2-Encoder-0.4B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("zero-shot-image-classification", model="malusama/M2-Encoder-0.4B", trust_remote_code=True) pipe( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png", candidate_labels=["animals", "humans", "landscape"], )# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("malusama/M2-Encoder-0.4B", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
File size: 1,891 Bytes
f471fb4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | from typing import Optional
from transformers import AutoTokenizer
from transformers.processing_utils import ProcessorMixin
from .image_processing_m2_encoder import M2EncoderImageProcessor
class M2EncoderProcessor(ProcessorMixin):
attributes = ["image_processor", "tokenizer"]
image_processor_class = "M2EncoderImageProcessor"
tokenizer_class = ("GLMChineseTokenizer", None)
def __init__(self, image_processor, tokenizer):
self.image_processor = image_processor
self.tokenizer = tokenizer
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
trust_remote_code = kwargs.pop("trust_remote_code", True)
image_processor = M2EncoderImageProcessor.from_pretrained(
pretrained_model_name_or_path, **kwargs
)
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
**kwargs,
)
return cls(image_processor=image_processor, tokenizer=tokenizer)
def __call__(
self,
text=None,
images=None,
padding="max_length",
truncation=True,
max_length: Optional[int] = 52,
return_tensors=None,
**kwargs,
):
encoding = {}
if text is not None:
encoding.update(
self.tokenizer(
text,
padding=padding,
truncation=truncation,
max_length=max_length,
return_special_tokens_mask=True,
return_tensors=return_tensors,
**kwargs,
)
)
if images is not None:
encoding.update(
self.image_processor(images, return_tensors=return_tensors, **kwargs)
)
return encoding
|