| --- |
| license: apache-2.0 |
| --- |
| |
| Here is a code to create this tiny model: |
|
|
| ```python |
| import os |
| |
| from transformers import AutoTokenizer |
| from transformers import Zamba2Config, Zamba2ForCausalLM |
| |
| # === Step 1: Define tiny model config === |
| config = Zamba2Config( |
| d_model=16, |
| n_layer=46, # Match number of Mamba/Hybrid blocks |
| d_state=32, |
| expand=2, |
| conv_kernel=3, |
| vocab_size=50280, |
| hidden_size=16 |
| ) |
| |
| # === Step 2: Create model from config === |
| model = Zamba2ForCausalLM(config) |
| |
| # === Step 3: Load or create tokenizer === |
| # If tokenizer is not specific to Zamba2, reuse any tokenizer (e.g., from Mamba) |
| tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-2.7B") |
| |
| # === Step 4: Save model and tokenizer === |
| output_dir = "./tiny-zamba2" |
| os.makedirs(output_dir, exist_ok=True) |
| model.save_pretrained(output_dir, safe_serialization=False) |
| tokenizer.save_pretrained(output_dir) |
| ``` |