Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

.meta.json +4 -0
README.md +250 -0
chat_template.jinja +195 -0
config.json +56 -0
generation_config.json +11 -0
model.safetensors +3 -0
tokenizer.json +0 -0
tokenizer_config.json +11 -0

.meta.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "torch": "2.11.0+cu126",
+  "transformers": "5.7.0.dev0"
+}

README.md ADDED Viewed

	@@ -0,0 +1,250 @@

+---
+library_name: transformers
+base_model:
+- tencent/Hy3-preview
+---
+This tiny model is intended for debugging. It is randomly initialized using the configuration adapted from [tencent/Hy3-preview](https://huggingface.co/tencent/Hy3-preview).
+| File path | Size |
+|------|------|
+| model.safetensors | 5.4MB |
+### Example usage:
+- vLLM
+```bash
+# Multi-token prediction is supported
+model_id=tiny-random/hy3
+vllm serve $model_id \
+  --tensor-parallel-size 2 \
+  --speculative-config.method mtp \
+  --speculative-config.num_speculative_tokens 1 \
+  --tool-call-parser hy_v3 \
+  --reasoning-parser hy_v3 \
+  --enable-auto-tool-choice
+```
+- SGLang
+```bash
+# Multi-token prediction is supported
+model_id=tiny-random/hy3
+python3 -m sglang.launch_server \
+  --model tencent/Hy3-preview \
+  --tp 2 \
+  --tool-call-parser hunyuan \
+  --reasoning-parser hunyuan \
+  --speculative-num-steps 1 \
+  --speculative-eagle-topk 1 \
+  --speculative-num-draft-tokens 2 \
+  --speculative-algorithm EAGLE
+```
+- Transformers
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "tiny-random/hy3"
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
+messages = [
+    {"role": "user", "content": "Write a short poem about AI."},
+]
+inputs = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    return_tensors="pt",
+    add_generation_prompt=True,
+    reasoning_effort='high',
+)
+print(inputs)
+outputs = model.generate(**inputs.to(model.device), max_new_tokens=32)
+output_text = tokenizer.decode(outputs[0])
+print(output_text)
+```
+### Codes to create this repo:
+<details>
+<summary>Click to expand</summary>
+```python
+import json
+from copy import deepcopy
+from pathlib import Path
+import torch
+import torch.nn as nn
+from huggingface_hub import file_exists, hf_hub_download
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    GenerationConfig,
+    set_seed,
+)
+source_model_id = "tencent/Hy3-preview"
+save_folder = "/tmp/tiny-random/hy3"
+processor = AutoTokenizer.from_pretrained(source_model_id, trust_remote_code=True)
+processor.save_pretrained(save_folder)
+with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f:
+    config_json = json.load(f)
+config_json.update({
+    'expert_hidden_dim': 32,
+    'moe_intermediate_size': 32,
+    'head_dim': 32,
+    'hidden_size': 8,
+    'intermediate_size': 32,
+    'num_attention_heads': 8,
+    'num_hidden_layers': 4,
+    'num_key_value_heads': 4,
+})
+with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
+    json.dump(config_json, f, indent=2)
+config = AutoConfig.from_pretrained(
+    save_folder,
+    trust_remote_code=True,
+)
+print(config)
+torch.set_default_dtype(torch.bfloat16)
+set_seed(42)
+model = AutoModelForCausalLM.from_config(config, trust_remote_code=True).eval().cpu()
+if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
+    model.generation_config = GenerationConfig.from_pretrained(
+        source_model_id, trust_remote_code=True,
+    )
+    model.generation_config.top_k = 40  # original value in source model is -1 , which is invalid
+# mtp
+mtp = deepcopy(model.model.layers[-1])
+mtp.eh_proj = nn.Linear(config.hidden_size * 2, config.hidden_size, bias=False)
+mtp.enorm = nn.RMSNorm(config.hidden_size)
+mtp.hnorm = nn.RMSNorm(config.hidden_size)
+mtp.final_layernorm = nn.RMSNorm(config.hidden_size)
+model.model.layers.append(mtp)
+# init weights
+set_seed(42)
+model = model.cpu().eval()
+n_params = sum(p.numel() for p in model.parameters())
+with torch.no_grad():
+    for name, p in sorted(model.named_parameters()):
+        torch.nn.init.normal_(p, 0, 0.2)
+        print(name, p.shape, p.dtype, f'{p.numel() / n_params * 100: .2f}%')
+# expert bias is in float32
+for i in range(config.first_k_dense_replace, config.num_hidden_layers + 1, 1):
+    model.model.layers[i].mlp.e_score_correction_bias = nn.Parameter(torch.randn_like(
+        model.model.layers[i].mlp.e_score_correction_bias
+    ).float() * 0.002)
+model.save_pretrained(save_folder)
+print(model)
+torch.set_default_dtype(torch.float32)
+```
+</details>
+### Printing the model:
+<details><summary>Click to expand</summary>
+```text
+HYV3ForCausalLM(
+  (model): HYV3Model(
+    (embed_tokens): Embedding(120832, 8, padding_idx=120002)
+    (layers): ModuleList(
+      (0): HYV3DecoderLayer(
+        (self_attn): HYV3Attention(
+          (q_proj): Linear(in_features=8, out_features=256, bias=False)
+          (k_proj): Linear(in_features=8, out_features=128, bias=False)
+          (v_proj): Linear(in_features=8, out_features=128, bias=False)
+          (o_proj): Linear(in_features=256, out_features=8, bias=False)
+          (q_norm): HYV3RMSNorm((32,), eps=1e-05)
+          (k_norm): HYV3RMSNorm((32,), eps=1e-05)
+        )
+        (mlp): HYV3MLP(
+          (gate_proj): Linear(in_features=8, out_features=32, bias=False)
+          (up_proj): Linear(in_features=8, out_features=32, bias=False)
+          (down_proj): Linear(in_features=32, out_features=8, bias=False)
+          (act_fn): SiLUActivation()
+        )
+        (input_layernorm): HYV3RMSNorm((8,), eps=1e-05)
+        (post_attention_layernorm): HYV3RMSNorm((8,), eps=1e-05)
+      )
+      (1-3): 3 x HYV3DecoderLayer(
+        (self_attn): HYV3Attention(
+          (q_proj): Linear(in_features=8, out_features=256, bias=False)
+          (k_proj): Linear(in_features=8, out_features=128, bias=False)
+          (v_proj): Linear(in_features=8, out_features=128, bias=False)
+          (o_proj): Linear(in_features=256, out_features=8, bias=False)
+          (q_norm): HYV3RMSNorm((32,), eps=1e-05)
+          (k_norm): HYV3RMSNorm((32,), eps=1e-05)
+        )
+        (mlp): HYV3MoE(
+          (gate): HYV3TopKRouter()
+          (experts): HYV3Experts(
+            (act_fn): SiLUActivation()
+          )
+          (shared_experts): HYV3MLP(
+            (gate_proj): Linear(in_features=8, out_features=32, bias=False)
+            (up_proj): Linear(in_features=8, out_features=32, bias=False)
+            (down_proj): Linear(in_features=32, out_features=8, bias=False)
+            (act_fn): SiLUActivation()
+          )
+        )
+        (input_layernorm): HYV3RMSNorm((8,), eps=1e-05)
+        (post_attention_layernorm): HYV3RMSNorm((8,), eps=1e-05)
+      )
+      (4): HYV3DecoderLayer(
+        (self_attn): HYV3Attention(
+          (q_proj): Linear(in_features=8, out_features=256, bias=False)
+          (k_proj): Linear(in_features=8, out_features=128, bias=False)
+          (v_proj): Linear(in_features=8, out_features=128, bias=False)
+          (o_proj): Linear(in_features=256, out_features=8, bias=False)
+          (q_norm): HYV3RMSNorm((32,), eps=1e-05)
+          (k_norm): HYV3RMSNorm((32,), eps=1e-05)
+        )
+        (mlp): HYV3MoE(
+          (gate): HYV3TopKRouter()
+          (experts): HYV3Experts(
+            (act_fn): SiLUActivation()
+          )
+          (shared_experts): HYV3MLP(
+            (gate_proj): Linear(in_features=8, out_features=32, bias=False)
+            (up_proj): Linear(in_features=8, out_features=32, bias=False)
+            (down_proj): Linear(in_features=32, out_features=8, bias=False)
+            (act_fn): SiLUActivation()
+          )
+        )
+        (input_layernorm): HYV3RMSNorm((8,), eps=1e-05)
+        (post_attention_layernorm): HYV3RMSNorm((8,), eps=1e-05)
+        (eh_proj): Linear(in_features=16, out_features=8, bias=False)
+        (enorm): RMSNorm((8,), eps=None, elementwise_affine=True)
+        (hnorm): RMSNorm((8,), eps=None, elementwise_affine=True)
+        (final_layernorm): RMSNorm((8,), eps=None, elementwise_affine=True)
+      )
+    )
+    (norm): HYV3RMSNorm((8,), eps=1e-05)
+    (rotary_emb): HYV3RotaryEmbedding()
+  )
+  (lm_head): Linear(in_features=8, out_features=120832, bias=False)
+)
+```
+</details>
+### Test environment:
+- torch: 2.11.0+cu126
+- transformers: 5.7.0.dev0

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,195 @@

+{#- ----------‑‑‑ special token variables ‑‑‑---------- -#}
+{%- set bos_token = '<｜hy_begin▁of▁sentence｜>' %}
+{%- set pad_token = '<｜hy_▁pad▁｜>' %}
+{%- set user_token = '<｜hy_User｜>' %}
+{%- set assistant_token = '<｜hy_Assistant｜>' %}
+{%- set eos_token = '<｜hy_eos｜>' %}
+{%- set think_begin_token = '<think>' %}
+{%- set think_end_token = '</think>' %}
+{%- set toolcalls_begin_token = '<tool_calls>' %}
+{%- set toolcalls_end_token = '</tool_calls>' %}
+{%- set toolcall_begin_token = '<tool_call>' %}
+{%- set toolcall_end_token = '</tool_call>' %}
+{%- set toolsep_token = '<tool_sep>' %}
+{%- set argkey_begin_token = '<arg_key>' %}
+{%- set argkey_end_token = '</arg_key>' %}
+{%- set argvalue_begin_token = '<arg_value>' %}
+{%- set argvalue_end_token = '</arg_value>' %}
+{%- set toolresponses_begin_token = '<tool_responses>' %}
+{%- set toolresponses_end_token = '</tool_responses>' %}
+{%- set toolresponse_begin_token = '<tool_response>' %}
+{%- set toolresponse_end_token = '</tool_response>' %}
+{%- set reasoning_mode_token = '<｜reasoning_mode｜>' %}
+{#- ----------‑‑‑ hyperparameters variables ‑‑‑---------- -#}
+{%- if not add_generation_prompt is defined %}
+    {%- set add_generation_prompt = false %}
+{%- endif %}
+{%- if not interleaved_thinking is defined %}
+    {%- set interleaved_thinking = false %}
+{%- endif %}
+{%- if not tools %}
+    {%- set interleaved_thinking = false %}
+{%- endif %}
+{%- if not is_training is defined %}
+    {%- set is_training = false %}
+{%- endif %}
+{%- if not reasoning_effort is defined or reasoning_effort not in ['high', 'low', 'no_think'] %}
+    {%- set reasoning_effort = 'no_think' %}
+{%- endif %}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- elif content is none -%}
+        {{- '' }}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- set sp_ns = namespace(system_prompt='', is_first_sp=true) %}
+{%- for message in messages %}
+    {%- if message['role'] == 'system' %}
+        {%- set sp_ns.system_prompt = sp_ns.system_prompt + visible_text(message['content']) %}
+    {%- endif %}
+    {%- if message['role'] == 'user' %}
+        {%- set ns.last_user_index = loop.index0 %}
+    {%- endif %}
+{%- endfor %}
+{%- if reasoning_effort is defined and reasoning_effort is string and reasoning_effort != '' and not tools %}
+    {%- set sp_ns.system_prompt = sp_ns.system_prompt + reasoning_mode_token + 'reasoning_effort:' + reasoning_effort %}
+{%- endif %}
+{{- bos_token }}
+{{- sp_ns.system_prompt }}
+{%- if tools %}
+    {%- if sp_ns.system_prompt != '' %}
+        {{- '\n\n# Tools\n\nYou may call one or more functions to assist with the user query.' }}
+    {%- else %}
+        {{- '# Tools\n\nYou may call one or more functions to assist with the user query.' }}
+    {%- endif %}
+    {{- '\n\nYou are provided with function signatures within <tools></tools> XML tags:' }}
+    {{- '\n<tools>\n' }}
+    {%- for tool in tools %}
+        {%- if loop.index0 > 0 %}
+            {{- '\n' }}
+        {%- endif %}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- '\n</tools>\n\n' }}
+    {{- 'For function call returns, you should first print ' + toolcalls_begin_token + '\n' }}
+    {{- 'For each function call, you should return object like:\n' }}
+    {{- toolcall_begin_token + '{function-name}' + toolsep_token + '\n' }}
+    {{- argkey_begin_token + '{arg-key-1}' + argkey_end_token + '\n' }}
+    {{- argvalue_begin_token + '{arg-value-1}' + argvalue_end_token + '\n' }}
+    {{- argkey_begin_token + '{arg-key-2}' + argkey_end_token + '\n' }}
+    {{- argvalue_begin_token + '{arg-value-2}' + argvalue_end_token + '\n' }}
+    {{- '...\n' }}
+    {{- toolcall_end_token + '\n' }}
+    {%- if reasoning_effort is defined and reasoning_effort is string and reasoning_effort != '' %}
+        {{- 'At the end of function call returns, you should print ' + toolcalls_end_token + reasoning_mode_token + 'reasoning_effort:' + reasoning_effort }}
+    {%- else %}
+        {{- 'At the end of function call returns, you should print ' + toolcalls_end_token }}
+    {%- endif %}
+{%- endif %}
+{%- set prev_ns = namespace(is_tool=false, is_tool_first=true) %}
+{%- set last_ns = namespace(last_is_assistant=false) %}
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {%- if prev_ns.is_tool %}
+            {{- toolresponses_end_token }}
+        {%- endif %}
+        {{- user_token + visible_text(message['content']) }}
+        {%- set prev_ns.is_tool = false %}
+    {%- endif %}
+    {%- if message['role'] == 'assistant' %}
+        {%- if 'reasoning_content' in message and message['reasoning_content'] is string %}
+            {%- set rc = message['reasoning_content'] %}
+        {%- elif 'reasoning' in message and message['reasoning'] is string %}
+            {%- set rc = message['reasoning'] %}
+        {%- else %}
+            {%- set rc = none %}
+        {%- endif %}
+        {%- if is_training %}
+            {%- if rc is not none %}
+                {%- set content = think_begin_token + rc + think_end_token + visible_text(message['content']) %}
+            {%- else %}
+                {%- set content = think_begin_token + think_end_token + visible_text(message['content']) %}
+            {%- endif %}
+        {%- else %}
+            {%- if interleaved_thinking %}
+                {%- if loop.index0 > ns.last_user_index and rc is not none %}
+                    {%- set content = think_begin_token + rc + think_end_token + visible_text(message['content']) %}
+                {%- else %}
+                    {%- set content = think_begin_token + think_end_token + visible_text(message['content']) %}
+                {%- endif %}
+            {%- else %}
+                {%- set content = think_begin_token + think_end_token + visible_text(message['content']) %}
+            {%- endif %}
+        {%- endif %}
+        {%- if prev_ns.is_tool %}
+            {{- toolresponses_end_token }}
+        {%- endif %}
+        {{- assistant_token }}
+        {%- if message['tool_calls'] is defined and message['tool_calls'] %}
+            {%- set prev_ns.is_tool_first = true %}
+            {{- content }}
+            {{- toolcalls_begin_token + '\n' }}
+            {%- for tool in message['tool_calls'] %}
+                {%- set arguments = tool['function']['arguments'] %}
+                {{- toolcall_begin_token + tool['function']['name'] + toolsep_token + '\n' }}
+                {%- for key, value in arguments.items() %}
+                    {{- argkey_begin_token + key + argkey_end_token + '\n' }}
+                    {%- if value is not string %}
+                        {%- set value = value | tojson(ensure_ascii=False) %}
+                    {%- endif %}
+                    {{- argvalue_begin_token + value + argvalue_end_token + '\n' }}
+                {%- endfor %}
+                {{- toolcall_end_token + '\n' }}
+            {%- endfor %}
+            {{- toolcalls_end_token + eos_token }}
+        {%- else %}
+            {%- if not loop.last or is_training %}
+                {{- content + eos_token }}
+            {%- else %}
+                {{- content }}
+            {%- endif %}
+        {%- endif %}
+        {%- set prev_ns.is_tool = false %}
+    {%- endif %}
+    {%- if message['role'] == 'tool' %}
+        {%- set prev_ns.is_tool = true %}
+        {%- if prev_ns.is_tool_first %}
+            {{- toolresponses_begin_token + '\n' }}
+            {%- set prev_ns.is_tool_first = false %}
+        {%- endif %}
+        {{- toolresponse_begin_token + '\n' + visible_text(message['content']) + '\n' + toolresponse_end_token + '\n' }}
+    {%- endif %}
+    {%- if loop.last and message['role'] == 'assistant' %}
+        {%- set last_ns.last_is_assistant = true %}
+    {%- endif %}
+{%- endfor %}
+{%- if prev_ns.is_tool %}
+    {{- toolresponses_end_token }}
+{%- endif %}
+{%- if add_generation_prompt %}
+    {%- if not last_ns.last_is_assistant %}
+        {%- if reasoning_effort is defined and reasoning_effort in ['low', 'high'] %}
+            {{- assistant_token + think_begin_token }}
+        {%- elif reasoning_effort is defined and reasoning_effort == 'no_think' %}
+            {{- assistant_token + think_begin_token + think_end_token }}
+        {%- else %}
+            {{- assistant_token }}
+        {%- endif %}
+    {%- endif %}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "architectures": [
+    "HYV3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 120000,
+  "dtype": "bfloat16",
+  "enable_attention_fp32_softmax": false,
+  "enable_lm_head_fp32": true,
+  "enable_moe_fp32_combine": false,
+  "eod_token_id": 120026,
+  "eos_token_id": 120025,
+  "expert_hidden_dim": 32,
+  "first_k_dense_replace": 1,
+  "head_dim": 32,
+  "hidden_act": "silu",
+  "hidden_size": 8,
+  "initializer_range": 0.006,
+  "intermediate_size": 32,
+  "max_position_embeddings": 262144,
+  "mlp_bias": false,
+  "mlp_layer_types": [
+    "dense",
+    "sparse",
+    "sparse",
+    "sparse"
+  ],
+  "model_type": "hy_v3",
+  "moe_intermediate_size": 32,
+  "moe_router_enable_expert_bias": true,
+  "moe_router_use_sigmoid": true,
+  "num_attention_heads": 8,
+  "num_experts": 192,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 4,
+  "num_key_value_heads": 4,
+  "num_nextn_predict_layers": 1,
+  "num_shared_experts": 1,
+  "output_router_logits": true,
+  "pad_token_id": 120002,
+  "qk_norm": true,
+  "rms_norm_eps": 1e-05,
+  "rope_parameters": {
+    "rope_theta": 11158840.0,
+    "rope_type": "default"
+  },
+  "route_norm": true,
+  "router_scaling_factor": 2.826,
+  "sep_token_id": 120007,
+  "tie_word_embeddings": false,
+  "transformers_version": "5.7.0.dev0",
+  "use_cache": true,
+  "use_grouped_mm": false,
+  "vocab_size": 120832
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "bos_token_id": 120000,
+  "do_sample": true,
+  "eos_token_id": 120025,
+  "pad_token_id": 120002,
+  "temperature": 0.9,
+  "top_k": 40,
+  "top_p": 1,
+  "transformers_version": "5.7.0.dev0",
+  "trust_remote_code": true
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d14912ca0a2c0a4487ff365b54caf64ef7885fc06059c225bc0668e7013055f8
+size 5401352

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<｜hy_begin▁of▁sentence｜>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<｜hy_eos｜>",
+  "is_local": false,
+  "local_files_only": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<｜hy_▁pad▁｜>",
+  "tokenizer_class": "TokenizersBackend"
+}