| --- |
| base_model: DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B |
| library_name: transformers |
| license: mit |
| tags: |
| - safe |
| languages: |
| - en |
| - zh |
| pipeline_tag: text-generation |
| --- |
| |
| # RealSafe-R1-1.5B |
|
|
| This repository contains the model card based on the paper [](https://huggingface.co/papers/2504.10081). |
|
|
| # File information |
|
|
| The repository contains the following file information: |
|
|
| Filename: tokenizer.json |
| Content: "Content of the file is larger than 50 KB, too long to display." |
|
|
| Filename: all_results.json |
| Content: { |
| "epoch": 0.9978021978021978, |
| "total_flos": 7339342036992.0, |
| "train_loss": 1.2485807309591823, |
| "train_runtime": 995.4655, |
| "train_samples_per_second": 14.624, |
| "train_steps_per_second": 0.228 |
| } |
|
|
| Filename: generation_config.json |
| Content: { |
| "_from_model_config": true, |
| "bos_token_id": 151646, |
| "do_sample": true, |
| "eos_token_id": 151643, |
| "temperature": 0.6, |
| "top_p": 0.95, |
| "transformers_version": "4.45.2" |
| } |
| |
| Filename: train_results.json |
| Content: { |
| "epoch": 0.9978021978021978, |
| "total_flos": 7339342036992.0, |
| "train_loss": 1.2485807309591823, |
| "train_runtime": 995.4655, |
| "train_samples_per_second": 14.624, |
| "train_steps_per_second": 0.228 |
| } |
| |
| Filename: special_tokens_map.json |
| Content: { |
| "bos_token": { |
| "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false |
| }, |
| "eos_token": { |
| "content": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false |
| }, |
| "pad_token": { |
| "content": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false |
| } |
| } |
| |
| Filename: trainer_state.json |
| Content: { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9978021978021978, |
| "eval_steps": 500, |
| "global_step": 227, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.9978021978021978, |
| "step": 227, |
| "total_flos": 7339342036992.0, |
| "train_loss": 1.2485807309591823, |
| "train_runtime": 995.4655, |
| "train_samples_per_second": 14.624, |
| "train_steps_per_second": 0.228 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 227, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7339342036992.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
| |
| Filename: tokenizer_config.json |
| Content: { |
| "add_bos_token": true, |
| "add_eos_token": false, |
| "add_prefix_space": null, |
| "added_tokens_decoder": { |
| "151643": { |
| "content": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "151644": { |
| "content": "<\uff5cUser\uff5c>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151645": { |
| "content": "<\uff5cAssistant\uff5c>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151646": { |
| "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "151647": { |
| "content": "<|EOT|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151648": { |
| "content": "<think>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151649": { |
| "content": "</think>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151650": { |
| "content": "<|quad_start|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "151651": { |
| "content": "<|quad_end|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "151652": { |
| "content": "<|vision_start|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "151653": { |
| "content": "<|vision_end|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "151654": { |
| "content": "<|vision_pad|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "151655": { |
| "content": "<|image_pad|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "151656": { |
| "content": "<|video_pad|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "151657": { |
| "content": "<tool_call>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151658": { |
| "content": "</tool_call>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151659": { |
| "content": "<|fim_prefix|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151660": { |
| "content": "<|fim_middle|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151661": { |
| "content": "<|fim_suffix|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151662": { |
| "content": "<|fim_pad|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151663": { |
| "content": "<|repo_name|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| }, |
| "151664": { |
| "content": "<|file_sep|>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": false |
| } |
| }, |
| "bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", |
| "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\ |
| ' + '```json' + '\ |
| ' + tool['function']['arguments'] + '\ |
| ' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\ |
| ' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\ |
| ' + '```json' + '\ |
| ' + tool['function']['arguments'] + '\ |
| ' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\ |
| <\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c><think>\ |
| '}}{% endif %}", |
| "clean_up_tokenization_spaces": false, |
| "eos_token": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| "legacy": true, |
| "model_max_length": 4096, |
| "pad_token": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| "padding_side": "right", |
| "sp_model_kwargs": {}, |
| "split_special_tokens": false, |
| "tokenizer_class": "LlamaTokenizer", |
| "unk_token": null, |
| "use_default_system_prompt": false |
| } |
| |
| Filename: config.json |
| Content: { |
| "_name_or_path": "/nfs2/models/DeepSeek-R1-Distill-Qwen-1.5B/", |
| "architectures": [ |
| "Qwen2ForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 151646, |
| "eos_token_id": 151643, |
| "hidden_act": "silu", |
| "hidden_size": 1536, |
| "initializer_range": 0.02, |
| "intermediate_size": 8960, |
| "max_position_embeddings": 131072, |
| "max_window_layers": 21, |
| "model_type": "qwen2", |
| "num_attention_heads": 12, |
| "num_hidden_layers": 28, |
| "num_key_value_heads": 2, |
| "rms_norm_eps": 1e-06, |
| "rope_scaling": null, |
| "rope_theta": 10000, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.45.2", |
| "use_cache": false, |
| "use_mrope": false, |
| "use_sliding_window": false, |
| "vocab_size": 151936 |
| } |