gyung commited on
Commit
98fd2bf
·
verified ·
1 Parent(s): a13cf72

Add files using upload-large-folder tool

Browse files
Files changed (23) hide show
  1. .gitattributes +9 -0
  2. stage1d-hrm-fastcap-repeat3-step750000/all_config.yaml +46 -0
  3. stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.0.pt +3 -0
  4. stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.1.pt +3 -0
  5. stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.2.pt +3 -0
  6. stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.3.pt +3 -0
  7. stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.4.pt +3 -0
  8. stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.5.pt +3 -0
  9. stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.6.pt +3 -0
  10. stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.7.pt +3 -0
  11. stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/.metadata +3 -0
  12. stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__0_0.distcp +3 -0
  13. stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__1_0.distcp +3 -0
  14. stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__2_0.distcp +3 -0
  15. stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__3_0.distcp +3 -0
  16. stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__4_0.distcp +3 -0
  17. stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__5_0.distcp +3 -0
  18. stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__6_0.distcp +3 -0
  19. stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__7_0.distcp +3 -0
  20. stage1d-hrm-fastcap-repeat3-step750000/latest_checkpoint.txt +1 -0
  21. stage1d-hrm-fastcap-repeat3-step750000/step_750000_info.json +8 -0
  22. stage1d-hrm-fastcap-repeat3-step750000/train_metadata.yaml +13 -0
  23. stage1d-hrm-fastcap-repeat3-step750000/upload_manifest.json +1 -0
.gitattributes CHANGED
@@ -906,3 +906,12 @@ stage1d-hrm-fastcap-repeat3-step740000/fsdp2_step_740000/__6_0.distcp filter=lfs
906
  stage1d-hrm-fastcap-repeat3-step740000/fsdp2_step_740000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
907
  stage1d-hrm-fastcap-repeat3-step740000/fsdp2_step_740000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
908
  stage1d-hrm-fastcap-repeat3-step740000/fsdp2_step_740000/.metadata filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
906
  stage1d-hrm-fastcap-repeat3-step740000/fsdp2_step_740000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
907
  stage1d-hrm-fastcap-repeat3-step740000/fsdp2_step_740000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
908
  stage1d-hrm-fastcap-repeat3-step740000/fsdp2_step_740000/.metadata filter=lfs diff=lfs merge=lfs -text
909
+ stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
910
+ stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/.metadata filter=lfs diff=lfs merge=lfs -text
911
+ stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
912
+ stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
913
+ stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
914
+ stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
915
+ stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
916
+ stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
917
+ stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
stage1d-hrm-fastcap-repeat3-step750000/all_config.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ arch:
2
+ H_cycles: 2
3
+ H_override: {}
4
+ L_cycles: 3
5
+ bp_max_steps: 5
6
+ bp_warmup_ratio: 0.2
7
+ expansion: 4
8
+ half_layers: true
9
+ head: lm_head@LMHead
10
+ hidden_size: 1536
11
+ init_type: lecun_normal
12
+ n_layers: 32
13
+ name: baselines.hrm_nocarry_bp_warmup@HierarchicalReasoningModel
14
+ norm_eps: 1.0e-06
15
+ norm_type: pre
16
+ num_heads: 12
17
+ pos_emb_type: rope
18
+ rope_theta: 10000.0
19
+ beta1: 0.9
20
+ beta2: 0.95
21
+ checkpoint_interval: 1
22
+ checkpoint_keep_last: 2
23
+ checkpoint_path: /home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage1d-hrm-fastcap-repeat3-gbs180
24
+ checkpoint_step_interval: 10000
25
+ data:
26
+ path: /home/work/.data/hrm_text_prepared/koterm_hrm_cleaned_fastcap_stage1_v1
27
+ target_only: true
28
+ ema: 0.9999
29
+ epochs: 1
30
+ fwd_bwd_dtype: bfloat16
31
+ global_batch_size: 180224
32
+ log_interval: 5
33
+ lr: 0.00022
34
+ lr_min_ratio: 1.0
35
+ lr_warmup_steps: 2000
36
+ project_name: KoHRM-Text
37
+ resume_epoch: null
38
+ resume_from: /home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage4c-korean-tool-finance-repeat2-gbs180
39
+ resume_step: null
40
+ resume_step_offset: 702956
41
+ run_name: KoHRM-Text-1.4B-stage1d-hrm-fastcap-repeat3
42
+ seed: 0
43
+ skip_batches: 0
44
+ total_steps_override: 934306
45
+ weight_decay: 0.1
46
+ weights_only_resume_from_ema: false
stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b247efd27c5d7e9b011044aa8b173ea5b936afbe33eed2086a1b5cdc855bdca9
3
+ size 1333
stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cb13dd000f56320fa80aa39cd1f019b994d95c55572bfa9ba54376197d237be
3
+ size 1333
stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e5070bfe759199f05a9a1515e7f050c0a64b2c39ddd224f5044064f14d445c
3
+ size 1333
stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f7c5a9a7b7e9d6a9437206335761736fb77c33f629582419545410082fa6ae0
3
+ size 1333
stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ce4a7411df02960a1db8e9b4ce54abe70d5694c2b5eaca22eadde7f0696a376
3
+ size 1333
stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a5a460ef346ff4d58c362d21cbe0d2351a3b73f8737068356841b7566ce9301
3
+ size 1333
stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a117e0a53f2bd2fa957c69de8aebd4f7867eb840f65c26af5d4e4ea0b20f5b6
3
+ size 1333
stage1d-hrm-fastcap-repeat3-step750000/carry_step_750000.7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a258dd4a0adeeeec293147c306aee9d77586f87908fad7c9540dafcb8e1a9bc
3
+ size 1333
stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7954f0b94cc99906d1a01f8bf38a300f38725c4aaa99e21601441cc36bf528de
3
+ size 983812
stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26fb11ca08197ac75da1833d010f625589743f68315f2be818a340fc0722f043
3
+ size 2769065329
stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef84a446c9d28ac52f9b3445192b6e196fac417abb3adfa7b7baf9e69f3bfb5b
3
+ size 2769090643
stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456d21edb5761ec854ffb84c976bacbf492318f2d70f7faf78a18c0f1a3cf246
3
+ size 2769090643
stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6bf713d5be85ec182037db503a9eb32c0bb619a1479ef0c41e8054e7dd2e6f
3
+ size 2769090643
stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:690a8f7415951fe7f35d007a8ae6b4440da35c77e06b74c0e66bf7085bfbcab3
3
+ size 2769090643
stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d336f36e2531125377d3575f9959b66c3535861f51fd9e8cb2045e4d5fbcbc
3
+ size 2769090643
stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f7a9b27301048d6b29da8c577e0699ef754f787cfa68aaeee5cb47cd0d51a3c
3
+ size 2769091588
stage1d-hrm-fastcap-repeat3-step750000/fsdp2_step_750000/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8bb1fa8ad7669b81926be94abba7dff94e605d87ab0e24843c8b52be7d85235
3
+ size 2769098756
stage1d-hrm-fastcap-repeat3-step750000/latest_checkpoint.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ step_750000
stage1d-hrm-fastcap-repeat3-step750000/step_750000_info.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tag": "step_750000",
3
+ "global_step": 750000,
4
+ "stage_start_step": 702956,
5
+ "skip_batches_hint": 47044,
6
+ "data_path": "/home/work/.data/hrm_text_prepared/koterm_hrm_cleaned_fastcap_stage1_v1",
7
+ "global_batch_size": 180224
8
+ }
stage1d-hrm-fastcap-repeat3-step750000/train_metadata.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ max_seq_len: 4096
2
+ tokenizer_info:
3
+ boq: <|im_start|>
4
+ condition_mapping:
5
+ cot: <|object_ref_end|>
6
+ direct: <|object_ref_start|>
7
+ noisy: <|quad_start|>
8
+ synth: <|quad_end|>
9
+ eoa: <|box_end|>
10
+ eoq: <|im_end|>
11
+ tokenizer_path: /home/work/.data/huggingface/trained_tokenizers/hrm-ko-terminal-131k-v1/tokenizer.json
12
+ total_length: 14554291763
13
+ vocab_size: 131072
stage1d-hrm-fastcap-repeat3-step750000/upload_manifest.json ADDED
@@ -0,0 +1 @@
 
 
1
+ stage1d-hrm-fastcap-repeat3 step_750000 raw resume checkpoint