Instructions to use sadiqj/camlcoder with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use sadiqj/camlcoder with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="sadiqj/camlcoder", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("sadiqj/camlcoder", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use sadiqj/camlcoder with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "sadiqj/camlcoder" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "sadiqj/camlcoder", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/sadiqj/camlcoder
- SGLang
How to use sadiqj/camlcoder with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "sadiqj/camlcoder" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "sadiqj/camlcoder", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "sadiqj/camlcoder" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "sadiqj/camlcoder", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use sadiqj/camlcoder with Docker Model Runner:
docker model run hf.co/sadiqj/camlcoder
| # Copyright 2022 MosaicML Examples authors | |
| # SPDX-License-Identifier: Apache-2.0 | |
| """GPT Blocks used for the GPT Model.""" | |
| from typing import Optional, Tuple | |
| import torch | |
| import torch.nn as nn | |
| from .attention import MultiheadAttention | |
| from .low_precision_layernorm import LPLayerNorm | |
| class GPTMLP(nn.Module): | |
| def __init__(self, | |
| d_model: int, | |
| mlp_ratio: int, | |
| device: Optional[str] = None): | |
| super().__init__() | |
| self.mlp_up = nn.Linear(d_model, mlp_ratio * d_model, device=device) | |
| self.mlp_act = nn.GELU(approximate='none') | |
| self.mlp_down = nn.Linear(mlp_ratio * d_model, d_model, device=device) | |
| self.mlp_down._is_residual = True # type: ignore | |
| def forward(self, x): | |
| return self.mlp_down(self.mlp_act(self.mlp_up(x))) | |
| class GPTBlock(nn.Module): | |
| def __init__(self, | |
| attn_impl: str, | |
| d_model: int, | |
| n_heads: int, | |
| mlp_ratio: int, | |
| attn_clip_qkv: Optional[float] = None, | |
| attn_qk_ln: bool = False, | |
| softmax_scale: Optional[float] = None, | |
| attn_pdrop: float = 0.0, | |
| alibi: bool = False, | |
| resid_pdrop: float = 0.0, | |
| low_precision_layernorm: bool = False, | |
| device: Optional[str] = None, | |
| **kwargs): | |
| del kwargs # unused, just to capture any extra args from the config | |
| super().__init__() | |
| layernorm_class = LPLayerNorm if low_precision_layernorm else nn.LayerNorm | |
| self.ln_1 = layernorm_class(d_model, device=device) | |
| self.attn = MultiheadAttention( | |
| attn_impl=attn_impl, | |
| attn_clip_qkv=attn_clip_qkv, | |
| attn_qk_ln=attn_qk_ln, | |
| softmax_scale=softmax_scale, | |
| attn_pdrop=attn_pdrop, | |
| d_model=d_model, | |
| n_heads=n_heads, | |
| device=device, | |
| ) | |
| self.ln_2 = layernorm_class(d_model, device=device) | |
| self.mlp = GPTMLP( | |
| d_model=d_model, | |
| mlp_ratio=mlp_ratio, | |
| device=device, | |
| ) | |
| self.resid_attn_dropout = nn.Dropout(resid_pdrop) | |
| self.resid_mlp_dropout = nn.Dropout(resid_pdrop) | |
| def forward( | |
| self, | |
| x: torch.Tensor, | |
| past_key_value: Optional[Tuple[torch.Tensor]] = None, | |
| attn_bias: Optional[torch.Tensor] = None, | |
| attention_mask: Optional[torch.ByteTensor] = None, | |
| is_causal: bool = True, | |
| ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]]]: | |
| a = self.ln_1(x) | |
| b, _, past_key_value = self.attn(a, | |
| past_key_value=past_key_value, | |
| attn_bias=attn_bias, | |
| attention_mask=attention_mask, | |
| is_causal=is_causal) | |
| x = x + self.resid_attn_dropout(b) | |
| m = self.ln_2(x) | |
| n = self.mlp(m) | |
| x = x + self.resid_mlp_dropout(n) | |
| return x, past_key_value | |