| | from typing import Dict, List, Any |
| | from optimum.onnxruntime import ORTModelForSequenceClassification |
| | from transformers import AutoTokenizer |
| | from optimum.pipelines import pipeline |
| |
|
| |
|
| | import torch |
| |
|
| | if torch.backends.cudnn.is_available(): |
| | print("cudnn:", torch.backends.cudnn.version()) |
| |
|
| | class EndpointHandler(): |
| | def __init__(self, path=""): |
| |
|
| | on_cuda = torch.cuda.is_available() |
| | |
| |
|
| | provider = "CPUExecutionProvider" |
| | if on_cuda: |
| | provider = "CUDAExecutionProvider" |
| | |
| | model = ORTModelForSequenceClassification.from_pretrained( |
| | path, |
| | export=False, |
| | provider=provider, |
| | ) |
| | tokenizer = AutoTokenizer.from_pretrained(path) |
| | |
| | device = -1 |
| | if on_cuda: |
| | device = 0 |
| | |
| | self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device) |
| |
|
| |
|
| | def __call__(self, data: Any) -> List[List[Dict[str, float]]]: |
| | """ |
| | Args: |
| | data (:obj:): |
| | includes the input data and the parameters for the inference. |
| | Return: |
| | A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing : |
| | - "label": A string representing what the label/class is. There can be multiple labels. |
| | - "score": A score between 0 and 1 describing how confident the model is for this label/class. |
| | """ |
| | inputs = data.pop("inputs", data) |
| | parameters = data.pop("parameters", dict()) |
| |
|
| | prediction = self.pipeline(inputs, **parameters) |
| |
|
| | return prediction |