| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.009327923137913344, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 982.3750610351562, |
| "epoch": 1.865584627582669e-05, |
| "grad_norm": 0.06827232986688614, |
| "kl": 0.0, |
| "learning_rate": 3.3333333333333335e-07, |
| "loss": 0.0163, |
| "num_tokens": 39753.0, |
| "reward": 0.2668055621907115, |
| "reward_std": 0.258365576621145, |
| "rewards/code_reward": 0.2638888955116272, |
| "rewards/format_reward": 0.02916666679084301, |
| "step": 1 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "epoch": 3.731169255165338e-05, |
| "grad_norm": 0.06825702637434006, |
| "kl": 0.0, |
| "learning_rate": 6.666666666666667e-07, |
| "loss": 0.0163, |
| "step": 2 |
| }, |
| { |
| "clip_ratio": 0.0010449413384776562, |
| "epoch": 5.596753882748006e-05, |
| "grad_norm": 0.06731275469064713, |
| "kl": 0.00015044212341308594, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.0165, |
| "step": 3 |
| }, |
| { |
| "clip_ratio": 0.0009167591924779117, |
| "completion_length": 1241.1250610351562, |
| "epoch": 7.462338510330675e-05, |
| "grad_norm": 0.06577128916978836, |
| "kl": 0.00014638900756835938, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": -0.0213, |
| "num_tokens": 87072.0, |
| "reward": 0.1686033308506012, |
| "reward_std": 0.2493794783949852, |
| "rewards/code_reward": 0.1686033308506012, |
| "rewards/format_reward": 0.0, |
| "step": 4 |
| }, |
| { |
| "clip_ratio": 0.0010596313513815403, |
| "epoch": 9.327923137913344e-05, |
| "grad_norm": 0.06366928666830063, |
| "kl": 0.00015687942504882812, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": -0.0213, |
| "step": 5 |
| }, |
| { |
| "clip_ratio": 0.00101625599199906, |
| "epoch": 0.00011193507765496012, |
| "grad_norm": 0.06929203122854233, |
| "kl": 0.00015020370483398438, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": -0.0213, |
| "step": 6 |
| }, |
| { |
| "clip_ratio": 0.0005019755335524678, |
| "completion_length": 607.7500305175781, |
| "epoch": 0.0001305909239307868, |
| "grad_norm": 0.054728299379348755, |
| "kl": 0.00021600723266601562, |
| "learning_rate": 2.3333333333333336e-06, |
| "loss": 0.0112, |
| "num_tokens": 110922.0, |
| "reward": 0.4583333432674408, |
| "reward_std": 0.14433756470680237, |
| "rewards/code_reward": 0.4583333432674408, |
| "rewards/format_reward": 0.0, |
| "step": 7 |
| }, |
| { |
| "clip_ratio": 0.0005628917133435607, |
| "epoch": 0.0001492467702066135, |
| "grad_norm": 0.05518824979662895, |
| "kl": 0.00023031234741210938, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.011, |
| "step": 8 |
| }, |
| { |
| "clip_ratio": 0.0006851793150417507, |
| "epoch": 0.00016790261648244018, |
| "grad_norm": 0.05371546372771263, |
| "kl": 0.00021839141845703125, |
| "learning_rate": 3e-06, |
| "loss": 0.0113, |
| "step": 9 |
| }, |
| { |
| "clip_ratio": 0.0009316161158494651, |
| "completion_length": 763.25, |
| "epoch": 0.00018655846275826687, |
| "grad_norm": 0.06738901138305664, |
| "kl": 0.00017595291137695312, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": -0.017, |
| "num_tokens": 140040.0, |
| "reward": 0.29250000417232513, |
| "reward_std": 0.4735611826181412, |
| "rewards/code_reward": 0.2916666716337204, |
| "rewards/format_reward": 0.008333333767950535, |
| "step": 10 |
| }, |
| { |
| "clip_ratio": 0.0005913192144362256, |
| "epoch": 0.00020521430903409357, |
| "grad_norm": 0.07004611194133759, |
| "kl": 0.0001888275146484375, |
| "learning_rate": 3.6666666666666666e-06, |
| "loss": -0.0171, |
| "step": 11 |
| }, |
| { |
| "clip_ratio": 0.000849212781758979, |
| "epoch": 0.00022387015530992023, |
| "grad_norm": 0.069913350045681, |
| "kl": 0.00017833709716796875, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": -0.0172, |
| "step": 12 |
| }, |
| { |
| "clip_ratio": 0.0005434846971184015, |
| "completion_length": 622.1250305175781, |
| "epoch": 0.00024252600158574693, |
| "grad_norm": 0.061467599123716354, |
| "kl": 0.00020265579223632812, |
| "learning_rate": 4.333333333333334e-06, |
| "loss": 0.0123, |
| "num_tokens": 165519.0, |
| "reward": 0.31522059440612793, |
| "reward_std": 0.22002992033958435, |
| "rewards/code_reward": 0.3139705955982208, |
| "rewards/format_reward": 0.012500000186264515, |
| "step": 13 |
| }, |
| { |
| "clip_ratio": 0.0004921667859889567, |
| "epoch": 0.0002611818478615736, |
| "grad_norm": 0.0641019269824028, |
| "kl": 0.000213623046875, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 0.0124, |
| "step": 14 |
| }, |
| { |
| "clip_ratio": 0.0007367254002019763, |
| "epoch": 0.0002798376941374003, |
| "grad_norm": 0.13394594192504883, |
| "kl": 0.00019359588623046875, |
| "learning_rate": 5e-06, |
| "loss": 0.0122, |
| "step": 15 |
| }, |
| { |
| "clip_ratio": 0.0015055734547786415, |
| "completion_length": 809.2500305175781, |
| "epoch": 0.000298493540413227, |
| "grad_norm": 0.08201699703931808, |
| "kl": 0.00023174285888671875, |
| "learning_rate": 4.999952797253148e-06, |
| "loss": 0.009, |
| "num_tokens": 200625.0, |
| "reward": 0.363928597420454, |
| "reward_std": 0.2377874432131648, |
| "rewards/code_reward": 0.3526785671710968, |
| "rewards/format_reward": 0.11249998956918716, |
| "step": 16 |
| }, |
| { |
| "clip_ratio": 0.0011205525370314717, |
| "epoch": 0.0003171493866890537, |
| "grad_norm": 0.08107728511095047, |
| "kl": 0.00022745132446289062, |
| "learning_rate": 4.9998111909931225e-06, |
| "loss": 0.0088, |
| "step": 17 |
| }, |
| { |
| "clip_ratio": 0.0010780769807752222, |
| "epoch": 0.00033580523296488035, |
| "grad_norm": 0.08552956581115723, |
| "kl": 0.00022220611572265625, |
| "learning_rate": 4.999575187161439e-06, |
| "loss": 0.0088, |
| "step": 18 |
| }, |
| { |
| "clip_ratio": 0.0009002784790936857, |
| "completion_length": 777.0416870117188, |
| "epoch": 0.0003544610792407071, |
| "grad_norm": 0.08441054821014404, |
| "kl": 0.00020885467529296875, |
| "learning_rate": 4.9992447956603455e-06, |
| "loss": -0.0108, |
| "num_tokens": 234130.0, |
| "reward": 0.6849345862865448, |
| "reward_std": 0.30150339007377625, |
| "rewards/code_reward": 0.6828512400388718, |
| "rewards/format_reward": 0.02083333395421505, |
| "step": 19 |
| }, |
| { |
| "clip_ratio": 0.0011175140971317887, |
| "epoch": 0.00037311692551653374, |
| "grad_norm": 0.08343806117773056, |
| "kl": 0.0002079010009765625, |
| "learning_rate": 4.998820030352409e-06, |
| "loss": -0.0108, |
| "step": 20 |
| }, |
| { |
| "clip_ratio": 0.0009022637677844614, |
| "epoch": 0.0003917727717923604, |
| "grad_norm": 0.0833282396197319, |
| "kl": 0.00020360946655273438, |
| "learning_rate": 4.998300909059929e-06, |
| "loss": -0.0111, |
| "step": 21 |
| }, |
| { |
| "clip_ratio": 0.0015675597824156284, |
| "completion_length": 630.5833435058594, |
| "epoch": 0.00041042861806818713, |
| "grad_norm": 0.08936360478401184, |
| "kl": 0.00019788742065429688, |
| "learning_rate": 4.997687453564198e-06, |
| "loss": 0.0142, |
| "num_tokens": 257484.0, |
| "reward": 0.3370833722874522, |
| "reward_std": 0.252491211052984, |
| "rewards/code_reward": 0.3333333432674408, |
| "rewards/format_reward": 0.037500000558793545, |
| "step": 22 |
| }, |
| { |
| "clip_ratio": 0.001083480688976124, |
| "epoch": 0.0004290844643440138, |
| "grad_norm": 0.08784651011228561, |
| "kl": 0.000202178955078125, |
| "learning_rate": 4.9969796896045775e-06, |
| "loss": 0.014, |
| "step": 23 |
| }, |
| { |
| "clip_ratio": 0.0011372591252438724, |
| "epoch": 0.00044774031061984047, |
| "grad_norm": 0.08748355507850647, |
| "kl": 0.000209808349609375, |
| "learning_rate": 4.996177646877426e-06, |
| "loss": 0.014, |
| "step": 24 |
| }, |
| { |
| "clip_ratio": 0.0014254440320655704, |
| "completion_length": 834.6250305175781, |
| "epoch": 0.0004663961568956672, |
| "grad_norm": 0.07256177812814713, |
| "kl": 0.00018978118896484375, |
| "learning_rate": 4.995281359034851e-06, |
| "loss": 0.0032, |
| "num_tokens": 297543.0, |
| "reward": 0.20448413118720055, |
| "reward_std": 0.30803024023771286, |
| "rewards/code_reward": 0.20198413357138634, |
| "rewards/format_reward": 0.025000001303851604, |
| "step": 25 |
| }, |
| { |
| "clip_ratio": 0.0013226708979345858, |
| "epoch": 0.00048505200317149386, |
| "grad_norm": 0.07238035649061203, |
| "kl": 0.00017786026000976562, |
| "learning_rate": 4.994290863683296e-06, |
| "loss": 0.0033, |
| "step": 26 |
| }, |
| { |
| "clip_ratio": 0.0009347930317744613, |
| "epoch": 0.0005037078494473206, |
| "grad_norm": 0.07114218175411224, |
| "kl": 0.00019788742065429688, |
| "learning_rate": 4.99320620238196e-06, |
| "loss": 0.0033, |
| "step": 27 |
| }, |
| { |
| "clip_ratio": 0.0008377690683118999, |
| "completion_length": 899.4166870117188, |
| "epoch": 0.0005223636957231473, |
| "grad_norm": 0.12414078414440155, |
| "kl": 0.00019073486328125, |
| "learning_rate": 4.99202742064106e-06, |
| "loss": 0.0106, |
| "num_tokens": 331453.0, |
| "reward": 0.15678573166951537, |
| "reward_std": 0.17976204119622707, |
| "rewards/code_reward": 0.1517857164144516, |
| "rewards/format_reward": 0.05000000074505806, |
| "step": 28 |
| }, |
| { |
| "clip_ratio": 0.0005996464460622519, |
| "epoch": 0.0005410195419989739, |
| "grad_norm": 0.07990814745426178, |
| "kl": 0.00018262863159179688, |
| "learning_rate": 4.990754567919917e-06, |
| "loss": 0.0103, |
| "step": 29 |
| }, |
| { |
| "clip_ratio": 0.0008573587401770055, |
| "epoch": 0.0005596753882748006, |
| "grad_norm": 0.0788721814751625, |
| "kl": 0.0001926422119140625, |
| "learning_rate": 4.989387697624881e-06, |
| "loss": 0.0104, |
| "step": 30 |
| }, |
| { |
| "clip_ratio": 0.0007707980694249272, |
| "completion_length": 873.875, |
| "epoch": 0.0005783312345506273, |
| "grad_norm": 0.07595472037792206, |
| "kl": 0.0002765655517578125, |
| "learning_rate": 4.987926867107095e-06, |
| "loss": 0.0066, |
| "num_tokens": 368998.0, |
| "reward": 0.4440600574016571, |
| "reward_std": 0.30831071734428406, |
| "rewards/code_reward": 0.43614333122968674, |
| "rewards/format_reward": 0.07916666753590107, |
| "step": 31 |
| }, |
| { |
| "clip_ratio": 0.0011201543966308236, |
| "epoch": 0.000596987080826454, |
| "grad_norm": 0.0760602131485939, |
| "kl": 0.0002522468566894531, |
| "learning_rate": 4.986372137660078e-06, |
| "loss": 0.0069, |
| "step": 32 |
| }, |
| { |
| "clip_ratio": 0.0011669989908114076, |
| "epoch": 0.0006156429271022807, |
| "grad_norm": 0.07646892964839935, |
| "kl": 0.00030231475830078125, |
| "learning_rate": 4.984723574517165e-06, |
| "loss": 0.0063, |
| "step": 33 |
| }, |
| { |
| "clip_ratio": 0.0003657292982097715, |
| "completion_length": 751.3333435058594, |
| "epoch": 0.0006342987733781074, |
| "grad_norm": 0.05740216001868248, |
| "kl": 0.00026035308837890625, |
| "learning_rate": 4.9829812468487655e-06, |
| "loss": -0.0053, |
| "num_tokens": 402006.0, |
| "reward": 0.7125000059604645, |
| "reward_std": 0.20762184262275696, |
| "rewards/code_reward": 0.7083333283662796, |
| "rewards/format_reward": 0.0416666679084301, |
| "step": 34 |
| }, |
| { |
| "clip_ratio": 0.00030940253054723144, |
| "epoch": 0.000652954619653934, |
| "grad_norm": 0.0546778179705143, |
| "kl": 0.00029468536376953125, |
| "learning_rate": 4.981145227759457e-06, |
| "loss": -0.0054, |
| "step": 35 |
| }, |
| { |
| "clip_ratio": 0.00020408157433848828, |
| "epoch": 0.0006716104659297607, |
| "grad_norm": 0.05456351861357689, |
| "kl": 0.0002956390380859375, |
| "learning_rate": 4.979215594284924e-06, |
| "loss": -0.0054, |
| "step": 36 |
| }, |
| { |
| "clip_ratio": 0.0005608744686469436, |
| "completion_length": 875.6667175292969, |
| "epoch": 0.0006902663122055874, |
| "grad_norm": 0.06957173347473145, |
| "kl": 0.00028324127197265625, |
| "learning_rate": 4.977192427388722e-06, |
| "loss": 0.0157, |
| "num_tokens": 444430.0, |
| "reward": 0.515416668727994, |
| "reward_std": 0.0217901524156332, |
| "rewards/code_reward": 0.5, |
| "rewards/format_reward": 0.15416665375232697, |
| "step": 37 |
| }, |
| { |
| "clip_ratio": 0.0005198217695578933, |
| "epoch": 0.0007089221584814141, |
| "grad_norm": 0.06720055639743805, |
| "kl": 0.0002918243408203125, |
| "learning_rate": 4.9750758119588824e-06, |
| "loss": 0.0157, |
| "step": 38 |
| }, |
| { |
| "clip_ratio": 0.0003698411746881902, |
| "epoch": 0.0007275780047572408, |
| "grad_norm": 0.06269616633653641, |
| "kl": 0.00034332275390625, |
| "learning_rate": 4.972865836804349e-06, |
| "loss": 0.0156, |
| "step": 39 |
| }, |
| { |
| "clip_ratio": 0.0008287752571050078, |
| "completion_length": 668.0416870117188, |
| "epoch": 0.0007462338510330675, |
| "grad_norm": 0.10391846299171448, |
| "kl": 0.00041675567626953125, |
| "learning_rate": 4.970562594651254e-06, |
| "loss": 0.066, |
| "num_tokens": 469319.0, |
| "reward": 0.3462119158357382, |
| "reward_std": 0.24788545817136765, |
| "rewards/code_reward": 0.32662858441472054, |
| "rewards/format_reward": 0.1958333309739828, |
| "step": 40 |
| }, |
| { |
| "clip_ratio": 0.0010252603678964078, |
| "epoch": 0.0007648896973088942, |
| "grad_norm": 0.10473919659852982, |
| "kl": 0.00048065185546875, |
| "learning_rate": 4.968166182139026e-06, |
| "loss": 0.0657, |
| "step": 41 |
| }, |
| { |
| "clip_ratio": 0.0008386686386074871, |
| "epoch": 0.0007835455435847208, |
| "grad_norm": 0.10585241764783859, |
| "kl": 0.0005102157592773438, |
| "learning_rate": 4.9656766998163306e-06, |
| "loss": 0.0657, |
| "step": 42 |
| }, |
| { |
| "clip_ratio": 0.0009505777561571449, |
| "completion_length": 773.8750305175781, |
| "epoch": 0.0008022013898605476, |
| "grad_norm": 0.08152071386575699, |
| "kl": 0.0006093978881835938, |
| "learning_rate": 4.963094252136865e-06, |
| "loss": 0.0317, |
| "num_tokens": 500552.0, |
| "reward": 0.46135416626930237, |
| "reward_std": 0.2722689062356949, |
| "rewards/code_reward": 0.44843751192092896, |
| "rewards/format_reward": 0.12916667759418488, |
| "step": 43 |
| }, |
| { |
| "clip_ratio": 0.001228984270710498, |
| "epoch": 0.0008208572361363743, |
| "grad_norm": 0.08072768896818161, |
| "kl": 0.0006775856018066406, |
| "learning_rate": 4.960418947454958e-06, |
| "loss": 0.0318, |
| "step": 44 |
| }, |
| { |
| "clip_ratio": 0.001452101394534111, |
| "epoch": 0.0008395130824122009, |
| "grad_norm": 0.07877793163061142, |
| "kl": 0.0007939338684082031, |
| "learning_rate": 4.957650898021038e-06, |
| "loss": 0.0316, |
| "step": 45 |
| }, |
| { |
| "clip_ratio": 0.0013901306956540793, |
| "completion_length": 523.6666870117188, |
| "epoch": 0.0008581689286880276, |
| "grad_norm": 0.09457489103078842, |
| "kl": 0.0012969970703125, |
| "learning_rate": 4.954790219976915e-06, |
| "loss": -0.0098, |
| "num_tokens": 523440.0, |
| "reward": 0.4345560818910599, |
| "reward_std": 0.32685674726963043, |
| "rewards/code_reward": 0.4170560836791992, |
| "rewards/format_reward": 0.17500000074505806, |
| "step": 46 |
| }, |
| { |
| "clip_ratio": 0.0014728345850016922, |
| "epoch": 0.0008768247749638543, |
| "grad_norm": 0.09064733982086182, |
| "kl": 0.00167083740234375, |
| "learning_rate": 4.95183703335091e-06, |
| "loss": -0.0102, |
| "step": 47 |
| }, |
| { |
| "clip_ratio": 0.0014083714631851763, |
| "epoch": 0.0008954806212396809, |
| "grad_norm": 0.09516453742980957, |
| "kl": 0.002044677734375, |
| "learning_rate": 4.948791462052819e-06, |
| "loss": -0.0104, |
| "step": 48 |
| }, |
| { |
| "clip_ratio": 0.0019310088246129453, |
| "completion_length": 755.7083740234375, |
| "epoch": 0.0009141364675155077, |
| "grad_norm": 0.07378407567739487, |
| "kl": 0.0006542205810546875, |
| "learning_rate": 4.945653633868716e-06, |
| "loss": 0.0031, |
| "num_tokens": 552221.0, |
| "reward": 0.5968180000782013, |
| "reward_std": 0.36423343420028687, |
| "rewards/code_reward": 0.5947346538305283, |
| "rewards/format_reward": 0.02083333395421505, |
| "step": 49 |
| }, |
| { |
| "clip_ratio": 0.0012188139371573925, |
| "epoch": 0.0009327923137913344, |
| "grad_norm": 0.07792558521032333, |
| "kl": 0.0006256103515625, |
| "learning_rate": 4.942423680455584e-06, |
| "loss": 0.0033, |
| "step": 50 |
| }, |
| { |
| "clip_ratio": 0.0013152067258488387, |
| "epoch": 0.000951448160067161, |
| "grad_norm": 0.07279552519321442, |
| "kl": 0.000690460205078125, |
| "learning_rate": 4.939101737335802e-06, |
| "loss": 0.003, |
| "step": 51 |
| }, |
| { |
| "clip_ratio": 0.0012177950120531023, |
| "completion_length": 1433.25, |
| "epoch": 0.0009701040063429877, |
| "grad_norm": 0.0953638032078743, |
| "kl": 0.0007143020629882812, |
| "learning_rate": 4.935687943891447e-06, |
| "loss": -0.0997, |
| "num_tokens": 610451.0, |
| "reward": 0.01875000074505806, |
| "reward_std": 0.020104273222386837, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.1874999888241291, |
| "step": 52 |
| }, |
| { |
| "clip_ratio": 0.0008826107368804514, |
| "epoch": 0.0009887598526188144, |
| "grad_norm": 0.07305457442998886, |
| "kl": 0.0007915496826171875, |
| "learning_rate": 4.932182443358458e-06, |
| "loss": -0.0999, |
| "step": 53 |
| }, |
| { |
| "clip_ratio": 0.0009481910965405405, |
| "epoch": 0.0010074156988946412, |
| "grad_norm": 0.1027429923415184, |
| "kl": 0.00098419189453125, |
| "learning_rate": 4.928585382820616e-06, |
| "loss": -0.0996, |
| "step": 54 |
| }, |
| { |
| "clip_ratio": 0.0013086027465760708, |
| "completion_length": 899.5416870117188, |
| "epoch": 0.0010260715451704677, |
| "grad_norm": 0.07541598379611969, |
| "kl": 0.00274658203125, |
| "learning_rate": 4.924896913203376e-06, |
| "loss": -0.0237, |
| "num_tokens": 648072.0, |
| "reward": 0.2216584151610732, |
| "reward_std": 0.22969772201031446, |
| "rewards/code_reward": 0.20915842056274414, |
| "rewards/format_reward": 0.125, |
| "step": 55 |
| }, |
| { |
| "clip_ratio": 0.0009090554085560143, |
| "epoch": 0.0010447273914462945, |
| "grad_norm": 0.07311614602804184, |
| "kl": 0.003662109375, |
| "learning_rate": 4.921117189267535e-06, |
| "loss": -0.0236, |
| "step": 56 |
| }, |
| { |
| "clip_ratio": 0.0012668125564232469, |
| "epoch": 0.0010633832377221213, |
| "grad_norm": 0.07483425736427307, |
| "kl": 0.00545501708984375, |
| "learning_rate": 4.917246369602742e-06, |
| "loss": -0.0237, |
| "step": 57 |
| }, |
| { |
| "clip_ratio": 0.0015675598988309503, |
| "completion_length": 691.2916870117188, |
| "epoch": 0.0010820390839979478, |
| "grad_norm": 0.09265889972448349, |
| "kl": 0.0068817138671875, |
| "learning_rate": 4.9132846166208355e-06, |
| "loss": -0.0068, |
| "num_tokens": 678547.0, |
| "reward": 0.2671003444120288, |
| "reward_std": 0.2649538954719901, |
| "rewards/code_reward": 0.2508503496646881, |
| "rewards/format_reward": 0.16249999776482582, |
| "step": 58 |
| }, |
| { |
| "clip_ratio": 0.0011916961520910263, |
| "epoch": 0.0011006949302737746, |
| "grad_norm": 0.07879021763801575, |
| "kl": 0.010772705078125, |
| "learning_rate": 4.9092320965490365e-06, |
| "loss": -0.0072, |
| "step": 59 |
| }, |
| { |
| "clip_ratio": 0.0013531986624002457, |
| "epoch": 0.0011193507765496012, |
| "grad_norm": 0.08185919374227524, |
| "kl": 0.013824462890625, |
| "learning_rate": 4.905088979422971e-06, |
| "loss": -0.0072, |
| "step": 60 |
| }, |
| { |
| "clip_ratio": 0.001054943015333265, |
| "completion_length": 816.0833740234375, |
| "epoch": 0.001138006622825428, |
| "grad_norm": 0.09033277630805969, |
| "kl": 0.011138916015625, |
| "learning_rate": 4.900855439079536e-06, |
| "loss": 0.0434, |
| "num_tokens": 708969.0, |
| "reward": 0.13236112147569656, |
| "reward_std": 0.17511780560016632, |
| "rewards/code_reward": 0.1111111119389534, |
| "rewards/format_reward": 0.21250000223517418, |
| "step": 61 |
| }, |
| { |
| "clip_ratio": 0.001191994990222156, |
| "epoch": 0.0011566624691012545, |
| "grad_norm": 0.11616651713848114, |
| "kl": 0.0153656005859375, |
| "learning_rate": 4.8965316531496055e-06, |
| "loss": 0.0433, |
| "step": 62 |
| }, |
| { |
| "clip_ratio": 0.0013809394440613687, |
| "epoch": 0.0011753183153770813, |
| "grad_norm": 0.08028258383274078, |
| "kl": 0.0198516845703125, |
| "learning_rate": 4.892117803050578e-06, |
| "loss": 0.0432, |
| "step": 63 |
| }, |
| { |
| "clip_ratio": 0.0014273470733314753, |
| "completion_length": 478.12501525878906, |
| "epoch": 0.001193974161652908, |
| "grad_norm": 0.12240626662969589, |
| "kl": 0.02911376953125, |
| "learning_rate": 4.887614073978761e-06, |
| "loss": 0.0251, |
| "num_tokens": 727836.0, |
| "reward": 0.5829170048236847, |
| "reward_std": 0.42754843831062317, |
| "rewards/code_reward": 0.5400003343820572, |
| "rewards/format_reward": 0.42916665971279144, |
| "step": 64 |
| }, |
| { |
| "clip_ratio": 0.001149984309449792, |
| "epoch": 0.0012126300079287346, |
| "grad_norm": 0.11828020960092545, |
| "kl": 0.0277099609375, |
| "learning_rate": 4.883020654901609e-06, |
| "loss": 0.0248, |
| "step": 65 |
| }, |
| { |
| "clip_ratio": 0.001388654694892466, |
| "epoch": 0.0012312858542045614, |
| "grad_norm": 0.15795664489269257, |
| "kl": 0.02325439453125, |
| "learning_rate": 4.878337738549785e-06, |
| "loss": 0.0245, |
| "step": 66 |
| }, |
| { |
| "clip_ratio": 0.0012379828258417547, |
| "completion_length": 1035.7083740234375, |
| "epoch": 0.001249941700480388, |
| "grad_norm": 0.07949040085077286, |
| "kl": 0.0186614990234375, |
| "learning_rate": 4.873565521409082e-06, |
| "loss": 0.0077, |
| "num_tokens": 767177.0, |
| "reward": 0.07208333350718021, |
| "reward_std": 0.15239802561700344, |
| "rewards/code_reward": 0.0416666679084301, |
| "rewards/format_reward": 0.30416667461395264, |
| "step": 67 |
| }, |
| { |
| "clip_ratio": 0.0013320732396095991, |
| "epoch": 0.0012685975467562147, |
| "grad_norm": 0.0713808611035347, |
| "kl": 0.014373779296875, |
| "learning_rate": 4.868704203712173e-06, |
| "loss": 0.0076, |
| "step": 68 |
| }, |
| { |
| "clip_ratio": 0.0018569266539998353, |
| "epoch": 0.0012872533930320415, |
| "grad_norm": 0.08412781357765198, |
| "kl": 0.01141357421875, |
| "learning_rate": 4.86375398943021e-06, |
| "loss": 0.0076, |
| "step": 69 |
| }, |
| { |
| "clip_ratio": 0.0012622788199223578, |
| "completion_length": 733.0416870117188, |
| "epoch": 0.001305909239307868, |
| "grad_norm": 0.08328709751367569, |
| "kl": 0.01409912109375, |
| "learning_rate": 4.858715086264274e-06, |
| "loss": 0.0612, |
| "num_tokens": 795078.0, |
| "reward": 0.9353921413421631, |
| "reward_std": 0.17414550855755806, |
| "rewards/code_reward": 0.9053921699523926, |
| "rewards/format_reward": 0.30000001192092896, |
| "step": 70 |
| }, |
| { |
| "clip_ratio": 0.001602432457730174, |
| "epoch": 0.0013245650855836948, |
| "grad_norm": 0.08351202309131622, |
| "kl": 0.015625, |
| "learning_rate": 4.853587705636646e-06, |
| "loss": 0.0614, |
| "step": 71 |
| }, |
| { |
| "clip_ratio": 0.0013681039854418486, |
| "epoch": 0.0013432209318595214, |
| "grad_norm": 0.08263003826141357, |
| "kl": 0.01739501953125, |
| "learning_rate": 4.84837206268195e-06, |
| "loss": 0.0613, |
| "step": 72 |
| }, |
| { |
| "clip_ratio": 0.0006202208460308611, |
| "completion_length": 748.2083435058594, |
| "epoch": 0.0013618767781353482, |
| "grad_norm": 0.08475489169359207, |
| "kl": 0.01568603515625, |
| "learning_rate": 4.8430683762381195e-06, |
| "loss": 0.0156, |
| "num_tokens": 823439.0, |
| "reward": 0.46898147463798523, |
| "reward_std": 0.3545081913471222, |
| "rewards/code_reward": 0.4293981343507767, |
| "rewards/format_reward": 0.3958333432674408, |
| "step": 73 |
| }, |
| { |
| "clip_ratio": 0.000859813007991761, |
| "epoch": 0.0013805326244111747, |
| "grad_norm": 0.08259416371583939, |
| "kl": 0.016510009765625, |
| "learning_rate": 4.837676868837213e-06, |
| "loss": 0.0154, |
| "step": 74 |
| }, |
| { |
| "clip_ratio": 0.00038046142435632646, |
| "epoch": 0.0013991884706870015, |
| "grad_norm": 0.08513620495796204, |
| "kl": 0.01690673828125, |
| "learning_rate": 4.832197766696085e-06, |
| "loss": 0.0151, |
| "step": 75 |
| }, |
| { |
| "clip_ratio": 0.0009400486887898296, |
| "completion_length": 746.0833435058594, |
| "epoch": 0.0014178443169628283, |
| "grad_norm": 0.08578525483608246, |
| "kl": 0.009674072265625, |
| "learning_rate": 4.826631299706887e-06, |
| "loss": -0.0026, |
| "num_tokens": 857425.0, |
| "reward": 0.7170831263065338, |
| "reward_std": 0.29470987617969513, |
| "rewards/code_reward": 0.6824997961521149, |
| "rewards/format_reward": 0.34583334624767303, |
| "step": 76 |
| }, |
| { |
| "clip_ratio": 0.000981206467258744, |
| "epoch": 0.0014365001632386549, |
| "grad_norm": 0.08752106130123138, |
| "kl": 0.0097503662109375, |
| "learning_rate": 4.820977701427424e-06, |
| "loss": -0.0028, |
| "step": 77 |
| }, |
| { |
| "clip_ratio": 0.001091863785404712, |
| "epoch": 0.0014551560095144816, |
| "grad_norm": 0.08650875091552734, |
| "kl": 0.010467529296875, |
| "learning_rate": 4.81523720907136e-06, |
| "loss": -0.0027, |
| "step": 78 |
| }, |
| { |
| "clip_ratio": 0.0008625364207546227, |
| "completion_length": 843.0417175292969, |
| "epoch": 0.0014738118557903082, |
| "grad_norm": 0.08371932804584503, |
| "kl": 0.01336669921875, |
| "learning_rate": 4.809410063498254e-06, |
| "loss": -0.0214, |
| "num_tokens": 890714.0, |
| "reward": 0.28875000309199095, |
| "reward_std": 0.26341581624001265, |
| "rewards/code_reward": 0.25, |
| "rewards/format_reward": 0.38750001788139343, |
| "step": 79 |
| }, |
| { |
| "clip_ratio": 0.0010699788690544665, |
| "epoch": 0.001492467702066135, |
| "grad_norm": 0.08200695365667343, |
| "kl": 0.0144500732421875, |
| "learning_rate": 4.8034965092034656e-06, |
| "loss": -0.0214, |
| "step": 80 |
| }, |
| { |
| "clip_ratio": 0.001069951627869159, |
| "epoch": 0.0015111235483419617, |
| "grad_norm": 0.08111701160669327, |
| "kl": 0.013519287109375, |
| "learning_rate": 4.797496794307889e-06, |
| "loss": -0.0216, |
| "step": 81 |
| }, |
| { |
| "clip_ratio": 0.0005499664548551664, |
| "completion_length": 688.2083435058594, |
| "epoch": 0.0015297793946177883, |
| "grad_norm": 0.09052547812461853, |
| "kl": 0.0286865234375, |
| "learning_rate": 4.791411170547545e-06, |
| "loss": 0.0283, |
| "num_tokens": 923131.0, |
| "reward": 0.6477343738079071, |
| "reward_std": 0.3908188194036484, |
| "rewards/code_reward": 0.6152343600988388, |
| "rewards/format_reward": 0.32500000298023224, |
| "step": 82 |
| }, |
| { |
| "clip_ratio": 0.0005515173252206296, |
| "epoch": 0.001548435240893615, |
| "grad_norm": 0.08914868533611298, |
| "kl": 0.0316162109375, |
| "learning_rate": 4.785239893263017e-06, |
| "loss": 0.0282, |
| "step": 83 |
| }, |
| { |
| "clip_ratio": 0.0004736810806207359, |
| "epoch": 0.0015670910871694416, |
| "grad_norm": 0.08998391777276993, |
| "kl": 0.0325927734375, |
| "learning_rate": 4.778983221388742e-06, |
| "loss": 0.0282, |
| "step": 84 |
| }, |
| { |
| "clip_ratio": 0.0016970738070085645, |
| "completion_length": 1049.75, |
| "epoch": 0.0015857469334452684, |
| "grad_norm": 0.07974597066640854, |
| "kl": 0.0087890625, |
| "learning_rate": 4.77264141744214e-06, |
| "loss": -0.0355, |
| "num_tokens": 968365.0, |
| "reward": 0.19875000324100256, |
| "reward_std": 0.2349306456744671, |
| "rewards/code_reward": 0.1666666716337204, |
| "rewards/format_reward": 0.32083334028720856, |
| "step": 85 |
| }, |
| { |
| "clip_ratio": 0.001736409030854702, |
| "epoch": 0.0016044027797210952, |
| "grad_norm": 0.0792006105184555, |
| "kl": 0.009368896484375, |
| "learning_rate": 4.766214747512603e-06, |
| "loss": -0.0355, |
| "step": 86 |
| }, |
| { |
| "clip_ratio": 0.0015154886059463024, |
| "epoch": 0.0016230586259969218, |
| "grad_norm": 0.10796553641557693, |
| "kl": 0.009796142578125, |
| "learning_rate": 4.759703481250331e-06, |
| "loss": -0.036, |
| "step": 87 |
| }, |
| { |
| "clip_ratio": 0.0013903837534599006, |
| "completion_length": 792.4583435058594, |
| "epoch": 0.0016417144722727485, |
| "grad_norm": 0.19102805852890015, |
| "kl": 0.0703125, |
| "learning_rate": 4.753107891855015e-06, |
| "loss": 0.0435, |
| "num_tokens": 999360.0, |
| "reward": 0.3778717517852783, |
| "reward_std": 0.32771627604961395, |
| "rewards/code_reward": 0.3512050434947014, |
| "rewards/format_reward": 0.2666666656732559, |
| "step": 88 |
| }, |
| { |
| "clip_ratio": 0.0016154100885614753, |
| "epoch": 0.001660370318548575, |
| "grad_norm": 0.09603425115346909, |
| "kl": 0.0440673828125, |
| "learning_rate": 4.746428256064375e-06, |
| "loss": 0.0434, |
| "step": 89 |
| }, |
| { |
| "clip_ratio": 0.0018210893031209707, |
| "epoch": 0.0016790261648244019, |
| "grad_norm": 0.09434056282043457, |
| "kl": 0.0350341796875, |
| "learning_rate": 4.7396648541425534e-06, |
| "loss": 0.0433, |
| "step": 90 |
| }, |
| { |
| "clip_ratio": 0.0014776080497540534, |
| "completion_length": 766.6666870117188, |
| "epoch": 0.0016976820111002284, |
| "grad_norm": 0.08751969784498215, |
| "kl": 0.012847900390625, |
| "learning_rate": 4.732817969868348e-06, |
| "loss": -0.0055, |
| "num_tokens": 1032760.0, |
| "reward": 0.503094419836998, |
| "reward_std": 0.4136483669281006, |
| "rewards/code_reward": 0.4914277046918869, |
| "rewards/format_reward": 0.11666666716337204, |
| "step": 91 |
| }, |
| { |
| "clip_ratio": 0.0013046148815192282, |
| "epoch": 0.0017163378573760552, |
| "grad_norm": 0.08883315324783325, |
| "kl": 0.010772705078125, |
| "learning_rate": 4.7258878905233095e-06, |
| "loss": -0.0056, |
| "step": 92 |
| }, |
| { |
| "clip_ratio": 0.000994603120489046, |
| "epoch": 0.001734993703651882, |
| "grad_norm": 0.10364013910293579, |
| "kl": 0.010162353515625, |
| "learning_rate": 4.718874906879688e-06, |
| "loss": -0.0057, |
| "step": 93 |
| }, |
| { |
| "clip_ratio": 0.0011711710831150413, |
| "completion_length": 1132.4583740234375, |
| "epoch": 0.0017536495499277085, |
| "grad_norm": 0.07417519390583038, |
| "kl": 0.0053558349609375, |
| "learning_rate": 4.711779313188231e-06, |
| "loss": 0.0018, |
| "num_tokens": 1080639.0, |
| "reward": 0.04583333432674408, |
| "reward_std": 0.026794558390975, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.4583333432674408, |
| "step": 94 |
| }, |
| { |
| "clip_ratio": 0.00107975909486413, |
| "epoch": 0.0017723053962035353, |
| "grad_norm": 0.07358817011117935, |
| "kl": 0.005706787109375, |
| "learning_rate": 4.70460140716584e-06, |
| "loss": 0.0019, |
| "step": 95 |
| }, |
| { |
| "clip_ratio": 0.0008912903140299022, |
| "epoch": 0.0017909612424793619, |
| "grad_norm": 0.08615133166313171, |
| "kl": 0.0057830810546875, |
| "learning_rate": 4.697341489983076e-06, |
| "loss": 0.0019, |
| "step": 96 |
| }, |
| { |
| "clip_ratio": 0.002020051470026374, |
| "completion_length": 727.7500305175781, |
| "epoch": 0.0018096170887551887, |
| "grad_norm": 0.0899001881480217, |
| "kl": 0.01202392578125, |
| "learning_rate": 4.6899998662515215e-06, |
| "loss": -0.001, |
| "num_tokens": 1111077.0, |
| "reward": 0.29144802689552307, |
| "reward_std": 0.33243465423583984, |
| "rewards/code_reward": 0.2614480257034302, |
| "rewards/format_reward": 0.30000001192092896, |
| "step": 97 |
| }, |
| { |
| "clip_ratio": 0.0011579456622712314, |
| "epoch": 0.0018282729350310154, |
| "grad_norm": 0.09250804036855698, |
| "kl": 0.013916015625, |
| "learning_rate": 4.682576844011007e-06, |
| "loss": -0.0011, |
| "step": 98 |
| }, |
| { |
| "clip_ratio": 0.0017355763702653348, |
| "epoch": 0.001846928781306842, |
| "grad_norm": 0.0963953509926796, |
| "kl": 0.015106201171875, |
| "learning_rate": 4.675072734716678e-06, |
| "loss": -0.0013, |
| "step": 99 |
| }, |
| { |
| "clip_ratio": 0.0009290422021877021, |
| "completion_length": 1036.0833740234375, |
| "epoch": 0.0018655846275826688, |
| "grad_norm": 0.09167575091123581, |
| "kl": 0.015167236328125, |
| "learning_rate": 4.667487853225931e-06, |
| "loss": -0.0103, |
| "num_tokens": 1155731.0, |
| "reward": 0.11254538409411907, |
| "reward_std": 0.16347683407366276, |
| "rewards/code_reward": 0.07879538089036942, |
| "rewards/format_reward": 0.3374999910593033, |
| "step": 100 |
| }, |
| { |
| "clip_ratio": 0.0006831295904703438, |
| "epoch": 0.0018842404738584953, |
| "grad_norm": 0.08466409146785736, |
| "kl": 0.016815185546875, |
| "learning_rate": 4.659822517785203e-06, |
| "loss": -0.0103, |
| "step": 101 |
| }, |
| { |
| "clip_ratio": 0.000901765946764499, |
| "epoch": 0.001902896320134322, |
| "grad_norm": 0.09180773049592972, |
| "kl": 0.017242431640625, |
| "learning_rate": 4.6520770500166165e-06, |
| "loss": -0.0105, |
| "step": 102 |
| }, |
| { |
| "clip_ratio": 0.0008878828666638583, |
| "completion_length": 771.9583435058594, |
| "epoch": 0.0019215521664101489, |
| "grad_norm": 0.11708436161279678, |
| "kl": 0.029144287109375, |
| "learning_rate": 4.644251774904487e-06, |
| "loss": 0.0148, |
| "num_tokens": 1188838.0, |
| "reward": 0.03181159868836403, |
| "reward_std": 0.02184708882123232, |
| "rewards/code_reward": 0.0018115942366421223, |
| "rewards/format_reward": 0.29999998211860657, |
| "step": 103 |
| }, |
| { |
| "clip_ratio": 0.0006120741018094122, |
| "epoch": 0.0019402080126859754, |
| "grad_norm": 0.10810239613056183, |
| "kl": 0.03369140625, |
| "learning_rate": 4.636347020781684e-06, |
| "loss": 0.0148, |
| "step": 104 |
| }, |
| { |
| "clip_ratio": 0.0008727995736990124, |
| "epoch": 0.0019588638589618022, |
| "grad_norm": 0.11280914396047592, |
| "kl": 0.0435791015625, |
| "learning_rate": 4.6283631193158605e-06, |
| "loss": 0.0145, |
| "step": 105 |
| }, |
| { |
| "clip_ratio": 0.0009058912692125887, |
| "completion_length": 914.5417175292969, |
| "epoch": 0.0019775197052376288, |
| "grad_norm": 0.07587506622076035, |
| "kl": 0.0174560546875, |
| "learning_rate": 4.620300405495532e-06, |
| "loss": 0.0523, |
| "num_tokens": 1222319.0, |
| "reward": 0.15707794576883316, |
| "reward_std": 0.2763161137700081, |
| "rewards/code_reward": 0.12624460831284523, |
| "rewards/format_reward": 0.3083333447575569, |
| "step": 106 |
| }, |
| { |
| "clip_ratio": 0.0009446362673770636, |
| "epoch": 0.0019961755515134553, |
| "grad_norm": 0.07568401843309402, |
| "kl": 0.01898193359375, |
| "learning_rate": 4.612159217616022e-06, |
| "loss": 0.0523, |
| "step": 107 |
| }, |
| { |
| "clip_ratio": 0.0011543052969500422, |
| "epoch": 0.0020148313977892823, |
| "grad_norm": 0.07626564055681229, |
| "kl": 0.02130126953125, |
| "learning_rate": 4.603939897265268e-06, |
| "loss": 0.0524, |
| "step": 108 |
| }, |
| { |
| "clip_ratio": 0.0008169686479959637, |
| "completion_length": 897.7500305175781, |
| "epoch": 0.002033487244065109, |
| "grad_norm": 0.08059542626142502, |
| "kl": 0.02227783203125, |
| "learning_rate": 4.595642789309492e-06, |
| "loss": 0.0107, |
| "num_tokens": 1259393.0, |
| "reward": 0.4133833199739456, |
| "reward_std": 0.4549361765384674, |
| "rewards/code_reward": 0.3917166441679001, |
| "rewards/format_reward": 0.21666666865348816, |
| "step": 109 |
| }, |
| { |
| "clip_ratio": 0.0010060299537144601, |
| "epoch": 0.0020521430903409354, |
| "grad_norm": 0.08048728853464127, |
| "kl": 0.0216064453125, |
| "learning_rate": 4.587268241878724e-06, |
| "loss": 0.0106, |
| "step": 110 |
| }, |
| { |
| "clip_ratio": 0.0012289943406358361, |
| "epoch": 0.0020707989366167624, |
| "grad_norm": 0.07647143304347992, |
| "kl": 0.019775390625, |
| "learning_rate": 4.578816606352205e-06, |
| "loss": 0.0107, |
| "step": 111 |
| }, |
| { |
| "clip_ratio": 0.0013880816695746034, |
| "completion_length": 1007.6667175292969, |
| "epoch": 0.002089454782892589, |
| "grad_norm": 0.07475873827934265, |
| "kl": 0.01617431640625, |
| "learning_rate": 4.570288237343632e-06, |
| "loss": 0.0233, |
| "num_tokens": 1298889.0, |
| "reward": 0.16264833509922028, |
| "reward_std": 0.22893914580345154, |
| "rewards/code_reward": 0.14723165333271027, |
| "rewards/format_reward": 0.15416666120290756, |
| "step": 112 |
| }, |
| { |
| "clip_ratio": 0.0013335386465769261, |
| "epoch": 0.0021081106291684156, |
| "grad_norm": 0.07221872359514236, |
| "kl": 0.01495361328125, |
| "learning_rate": 4.561683492686289e-06, |
| "loss": 0.0231, |
| "step": 113 |
| }, |
| { |
| "clip_ratio": 0.0011128429905511439, |
| "epoch": 0.0021267664754442426, |
| "grad_norm": 0.11232966184616089, |
| "kl": 0.013580322265625, |
| "learning_rate": 4.5530027334180285e-06, |
| "loss": 0.0232, |
| "step": 114 |
| }, |
| { |
| "clip_ratio": 0.0007180829998105764, |
| "completion_length": 760.375, |
| "epoch": 0.002145422321720069, |
| "grad_norm": 0.08500614017248154, |
| "kl": 0.0218505859375, |
| "learning_rate": 4.544246323766122e-06, |
| "loss": 0.0176, |
| "num_tokens": 1329738.0, |
| "reward": 0.42050473392009735, |
| "reward_std": 0.4651963710784912, |
| "rewards/code_reward": 0.37967143952846527, |
| "rewards/format_reward": 0.40833334624767303, |
| "step": 115 |
| }, |
| { |
| "clip_ratio": 0.0013084964302834123, |
| "epoch": 0.0021640781679958957, |
| "grad_norm": 0.09566804766654968, |
| "kl": 0.01910400390625, |
| "learning_rate": 4.535414631131983e-06, |
| "loss": 0.0174, |
| "step": 116 |
| }, |
| { |
| "clip_ratio": 0.0012152747949585319, |
| "epoch": 0.0021827340142717222, |
| "grad_norm": 0.09403965622186661, |
| "kl": 0.01824951171875, |
| "learning_rate": 4.526508026075746e-06, |
| "loss": 0.0175, |
| "step": 117 |
| }, |
| { |
| "clip_ratio": 0.00110370441689156, |
| "completion_length": 913.9583740234375, |
| "epoch": 0.0022013898605475492, |
| "grad_norm": 0.09698626399040222, |
| "kl": 0.022411346435546875, |
| "learning_rate": 4.517526882300721e-06, |
| "loss": -0.0229, |
| "num_tokens": 1372757.0, |
| "reward": 0.3174999728798866, |
| "reward_std": 0.27699745260179043, |
| "rewards/code_reward": 0.2916666567325592, |
| "rewards/format_reward": 0.25833334028720856, |
| "step": 118 |
| }, |
| { |
| "clip_ratio": 0.0013115269830450416, |
| "epoch": 0.002220045706823376, |
| "grad_norm": 0.09875179827213287, |
| "kl": 0.019748687744140625, |
| "learning_rate": 4.508471576637713e-06, |
| "loss": -0.0228, |
| "step": 119 |
| }, |
| { |
| "clip_ratio": 0.0011575823882594705, |
| "epoch": 0.0022387015530992023, |
| "grad_norm": 0.10077529400587082, |
| "kl": 0.01830291748046875, |
| "learning_rate": 4.499342489029211e-06, |
| "loss": -0.0231, |
| "step": 120 |
| }, |
| { |
| "clip_ratio": 0.0006889864162076265, |
| "completion_length": 498.3750305175781, |
| "epoch": 0.0022573573993750293, |
| "grad_norm": 0.11392726749181747, |
| "kl": 0.025848388671875, |
| "learning_rate": 4.490140002513449e-06, |
| "loss": 0.027, |
| "num_tokens": 1393730.0, |
| "reward": 0.9718478322029114, |
| "reward_std": 0.2110026255249977, |
| "rewards/code_reward": 0.9293478429317474, |
| "rewards/format_reward": 0.42499999701976776, |
| "step": 121 |
| }, |
| { |
| "clip_ratio": 0.0005561001598834991, |
| "epoch": 0.002276013245650856, |
| "grad_norm": 0.10952252149581909, |
| "kl": 0.025726318359375, |
| "learning_rate": 4.48086450320833e-06, |
| "loss": 0.0267, |
| "step": 122 |
| }, |
| { |
| "clip_ratio": 0.0006027935305610299, |
| "epoch": 0.0022946690919266825, |
| "grad_norm": 0.1085001602768898, |
| "kl": 0.0263671875, |
| "learning_rate": 4.4715163802952266e-06, |
| "loss": 0.0269, |
| "step": 123 |
| }, |
| { |
| "clip_ratio": 0.001191554736578837, |
| "completion_length": 732.75, |
| "epoch": 0.002313324938202509, |
| "grad_norm": 0.08821776509284973, |
| "kl": 0.031219482421875, |
| "learning_rate": 4.462096026002655e-06, |
| "loss": 0.0062, |
| "num_tokens": 1422824.0, |
| "reward": 0.03973765671253204, |
| "reward_std": 0.03781243972480297, |
| "rewards/code_reward": 0.00848765391856432, |
| "rewards/format_reward": 0.3125000074505806, |
| "step": 124 |
| }, |
| { |
| "clip_ratio": 0.001063963572960347, |
| "epoch": 0.002331980784478336, |
| "grad_norm": 0.09551744163036346, |
| "kl": 0.0391845703125, |
| "learning_rate": 4.4526038355898144e-06, |
| "loss": 0.0061, |
| "step": 125 |
| }, |
| { |
| "clip_ratio": 0.0010462789214216173, |
| "epoch": 0.0023506366307541626, |
| "grad_norm": 0.08740315586328506, |
| "kl": 0.03314208984375, |
| "learning_rate": 4.4430402073300035e-06, |
| "loss": 0.0058, |
| "step": 126 |
| }, |
| { |
| "clip_ratio": 0.0009691159357316792, |
| "completion_length": 1037.7500610351562, |
| "epoch": 0.002369292477029989, |
| "grad_norm": 0.07823047786951065, |
| "kl": 0.0126953125, |
| "learning_rate": 4.433405542493909e-06, |
| "loss": -0.0015, |
| "num_tokens": 1459130.0, |
| "reward": 0.6776870489120483, |
| "reward_std": 0.10291638784110546, |
| "rewards/code_reward": 0.6343537420034409, |
| "rewards/format_reward": 0.4333333447575569, |
| "step": 127 |
| }, |
| { |
| "clip_ratio": 0.001145587011706084, |
| "epoch": 0.002387948323305816, |
| "grad_norm": 0.07594677060842514, |
| "kl": 0.012725830078125, |
| "learning_rate": 4.4237002453327734e-06, |
| "loss": -0.0017, |
| "step": 128 |
| }, |
| { |
| "clip_ratio": 0.0008910182514227927, |
| "epoch": 0.0024066041695816427, |
| "grad_norm": 0.07640928775072098, |
| "kl": 0.0137939453125, |
| "learning_rate": 4.4139247230614245e-06, |
| "loss": -0.0019, |
| "step": 129 |
| }, |
| { |
| "clip_ratio": 0.0011884173727594316, |
| "completion_length": 1120.7916870117188, |
| "epoch": 0.0024252600158574692, |
| "grad_norm": 0.07030566036701202, |
| "kl": 0.014190673828125, |
| "learning_rate": 4.404079385841201e-06, |
| "loss": 0.0265, |
| "num_tokens": 1506729.0, |
| "reward": 0.7509399354457855, |
| "reward_std": 0.3766524940729141, |
| "rewards/code_reward": 0.7180232405662537, |
| "rewards/format_reward": 0.3291666656732559, |
| "step": 130 |
| }, |
| { |
| "clip_ratio": 0.0011372754233889282, |
| "epoch": 0.002443915862133296, |
| "grad_norm": 0.07563882321119308, |
| "kl": 0.015655517578125, |
| "learning_rate": 4.394164646762734e-06, |
| "loss": 0.0264, |
| "step": 131 |
| }, |
| { |
| "clip_ratio": 0.0009669371065683663, |
| "epoch": 0.002462571708409123, |
| "grad_norm": 0.07320787757635117, |
| "kl": 0.014190673828125, |
| "learning_rate": 4.384180921828618e-06, |
| "loss": 0.0263, |
| "step": 132 |
| }, |
| { |
| "clip_ratio": 0.0012862688163295388, |
| "completion_length": 733.4166870117188, |
| "epoch": 0.0024812275546849494, |
| "grad_norm": 0.09955161809921265, |
| "kl": 0.013763427734375, |
| "learning_rate": 4.374128629935955e-06, |
| "loss": 0.0072, |
| "num_tokens": 1537483.0, |
| "reward": 0.5765306204557419, |
| "reward_std": 0.14132879488170147, |
| "rewards/code_reward": 0.5348639450967312, |
| "rewards/format_reward": 0.4166666716337204, |
| "step": 133 |
| }, |
| { |
| "clip_ratio": 0.0008828279678709805, |
| "epoch": 0.002499883400960776, |
| "grad_norm": 0.10158013552427292, |
| "kl": 0.014129638671875, |
| "learning_rate": 4.364008192858781e-06, |
| "loss": 0.0072, |
| "step": 134 |
| }, |
| { |
| "clip_ratio": 0.0012895396212115884, |
| "epoch": 0.002518539247236603, |
| "grad_norm": 0.08935025334358215, |
| "kl": 0.01409912109375, |
| "learning_rate": 4.353820035230366e-06, |
| "loss": 0.007, |
| "step": 135 |
| }, |
| { |
| "clip_ratio": 0.000699867494404316, |
| "completion_length": 921.0833435058594, |
| "epoch": 0.0025371950935124295, |
| "grad_norm": 0.07964594662189484, |
| "kl": 0.00946044921875, |
| "learning_rate": 4.3435645845254e-06, |
| "loss": 0.0365, |
| "num_tokens": 1577169.0, |
| "reward": 0.04625000339001417, |
| "reward_std": 0.0623414209112525, |
| "rewards/code_reward": 0.02083333395421505, |
| "rewards/format_reward": 0.2541666626930237, |
| "step": 136 |
| }, |
| { |
| "clip_ratio": 0.0003799215774051845, |
| "epoch": 0.002555850939788256, |
| "grad_norm": 0.08275993913412094, |
| "kl": 0.010284423828125, |
| "learning_rate": 4.333242271042054e-06, |
| "loss": 0.0363, |
| "step": 137 |
| }, |
| { |
| "clip_ratio": 0.00039069188642315567, |
| "epoch": 0.002574506786064083, |
| "grad_norm": 0.08476851880550385, |
| "kl": 0.0107574462890625, |
| "learning_rate": 4.32285352788393e-06, |
| "loss": 0.0363, |
| "step": 138 |
| }, |
| { |
| "clip_ratio": 0.0006457642884925008, |
| "completion_length": 873.1250305175781, |
| "epoch": 0.0025931626323399096, |
| "grad_norm": 0.11129351705312729, |
| "kl": 0.015472412109375, |
| "learning_rate": 4.312398790941882e-06, |
| "loss": 0.0132, |
| "num_tokens": 1613100.0, |
| "reward": 0.2220833506435156, |
| "reward_std": 0.12337806262075901, |
| "rewards/code_reward": 0.17291666567325592, |
| "rewards/format_reward": 0.49166665971279144, |
| "step": 139 |
| }, |
| { |
| "clip_ratio": 0.0009015932446345687, |
| "epoch": 0.002611818478615736, |
| "grad_norm": 0.09786652773618698, |
| "kl": 0.01611328125, |
| "learning_rate": 4.301878498875735e-06, |
| "loss": 0.0132, |
| "step": 140 |
| }, |
| { |
| "clip_ratio": 0.000771370017901063, |
| "epoch": 0.0026304743248915627, |
| "grad_norm": 0.09219111502170563, |
| "kl": 0.017333984375, |
| "learning_rate": 4.291293093095873e-06, |
| "loss": 0.0128, |
| "step": 141 |
| }, |
| { |
| "clip_ratio": 0.00134377705398947, |
| "completion_length": 758.8333740234375, |
| "epoch": 0.0026491301711673897, |
| "grad_norm": 0.09211676567792892, |
| "kl": 0.048370361328125, |
| "learning_rate": 4.280643017744723e-06, |
| "loss": 0.0087, |
| "num_tokens": 1640192.0, |
| "reward": 0.4495016485452652, |
| "reward_std": 0.13479312881827354, |
| "rewards/code_reward": 0.3999183177947998, |
| "rewards/format_reward": 0.4958333224058151, |
| "step": 142 |
| }, |
| { |
| "clip_ratio": 0.0010106293557328172, |
| "epoch": 0.0026677860174432163, |
| "grad_norm": 0.0978454202413559, |
| "kl": 0.055938720703125, |
| "learning_rate": 4.269928719678117e-06, |
| "loss": 0.0087, |
| "step": 143 |
| }, |
| { |
| "clip_ratio": 0.0008834551408654079, |
| "epoch": 0.002686441863719043, |
| "grad_norm": 0.09563044458627701, |
| "kl": 0.056365966796875, |
| "learning_rate": 4.2591506484465426e-06, |
| "loss": 0.0087, |
| "step": 144 |
| }, |
| { |
| "clip_ratio": 0.0010417406738270074, |
| "completion_length": 811.8333740234375, |
| "epoch": 0.00270509770999487, |
| "grad_norm": 0.09586314111948013, |
| "kl": 0.0595703125, |
| "learning_rate": 4.248309256276283e-06, |
| "loss": 0.0134, |
| "num_tokens": 1674328.0, |
| "reward": 0.4281481206417084, |
| "reward_std": 0.351884625852108, |
| "rewards/code_reward": 0.39814815670251846, |
| "rewards/format_reward": 0.29999999701976776, |
| "step": 145 |
| }, |
| { |
| "clip_ratio": 0.0013275437522679567, |
| "epoch": 0.0027237535562706964, |
| "grad_norm": 0.08591298758983612, |
| "kl": 0.050048828125, |
| "learning_rate": 4.23740499805044e-06, |
| "loss": 0.0134, |
| "step": 146 |
| }, |
| { |
| "clip_ratio": 0.0016137410420924425, |
| "epoch": 0.002742409402546523, |
| "grad_norm": 0.08595118671655655, |
| "kl": 0.03662109375, |
| "learning_rate": 4.22643833128985e-06, |
| "loss": 0.0129, |
| "step": 147 |
| }, |
| { |
| "clip_ratio": 0.00041602051351219416, |
| "completion_length": 819.7083740234375, |
| "epoch": 0.0027610652488223495, |
| "grad_norm": 0.09021999686956406, |
| "kl": 0.03216552734375, |
| "learning_rate": 4.215409716133885e-06, |
| "loss": 0.0092, |
| "num_tokens": 1707417.0, |
| "reward": 0.42956283316016197, |
| "reward_std": 0.21959564834833145, |
| "rewards/code_reward": 0.38414610736072063, |
| "rewards/format_reward": 0.4541666805744171, |
| "step": 148 |
| }, |
| { |
| "clip_ratio": 0.0007702489383518696, |
| "epoch": 0.0027797210950981765, |
| "grad_norm": 0.08686275035142899, |
| "kl": 0.023193359375, |
| "learning_rate": 4.204319615321151e-06, |
| "loss": 0.0092, |
| "step": 149 |
| }, |
| { |
| "clip_ratio": 0.0006736835566698574, |
| "epoch": 0.002798376941374003, |
| "grad_norm": 0.08748073130846024, |
| "kl": 0.01898193359375, |
| "learning_rate": 4.193168494170065e-06, |
| "loss": 0.009, |
| "step": 150 |
| }, |
| { |
| "clip_ratio": 0.0005513402575161308, |
| "completion_length": 924.9166870117188, |
| "epoch": 0.0028170327876498296, |
| "grad_norm": 0.08234385401010513, |
| "kl": 0.008636474609375, |
| "learning_rate": 4.181956820559339e-06, |
| "loss": -0.0217, |
| "num_tokens": 1751527.0, |
| "reward": 0.5591950714588165, |
| "reward_std": 0.18445394933223724, |
| "rewards/code_reward": 0.5187783911824226, |
| "rewards/format_reward": 0.40416665375232697, |
| "step": 151 |
| }, |
| { |
| "clip_ratio": 0.0006712005124427378, |
| "epoch": 0.0028356886339256566, |
| "grad_norm": 0.0800977274775505, |
| "kl": 0.006988525390625, |
| "learning_rate": 4.170685064908342e-06, |
| "loss": -0.0219, |
| "step": 152 |
| }, |
| { |
| "clip_ratio": 0.0005624309997074306, |
| "epoch": 0.002854344480201483, |
| "grad_norm": 0.08224301040172577, |
| "kl": 0.0066986083984375, |
| "learning_rate": 4.159353700157365e-06, |
| "loss": -0.022, |
| "step": 153 |
| }, |
| { |
| "clip_ratio": 0.00046220331569202244, |
| "completion_length": 557.3333435058594, |
| "epoch": 0.0028730003264773097, |
| "grad_norm": 0.09233321249485016, |
| "kl": 0.028167724609375, |
| "learning_rate": 4.14796320174778e-06, |
| "loss": -0.013, |
| "num_tokens": 1781007.0, |
| "reward": 0.6308333575725555, |
| "reward_std": 0.449993371963501, |
| "rewards/code_reward": 0.5833333283662796, |
| "rewards/format_reward": 0.4750000089406967, |
| "step": 154 |
| }, |
| { |
| "clip_ratio": 0.0002166997583117336, |
| "epoch": 0.0028916561727531367, |
| "grad_norm": 0.0909167230129242, |
| "kl": 0.02734375, |
| "learning_rate": 4.136514047602087e-06, |
| "loss": -0.0133, |
| "step": 155 |
| }, |
| { |
| "clip_ratio": 0.00029903331596869975, |
| "epoch": 0.0029103120190289633, |
| "grad_norm": 0.09201790392398834, |
| "kl": 0.0225372314453125, |
| "learning_rate": 4.1250067181038635e-06, |
| "loss": -0.0133, |
| "step": 156 |
| }, |
| { |
| "clip_ratio": 0.000859909545397386, |
| "completion_length": 1166.7083740234375, |
| "epoch": 0.00292896786530479, |
| "grad_norm": 0.12645356357097626, |
| "kl": 0.0063934326171875, |
| "learning_rate": 4.113441696077608e-06, |
| "loss": 0.0052, |
| "num_tokens": 1827500.0, |
| "reward": 0.04448845051229, |
| "reward_std": 0.028481798246502876, |
| "rewards/code_reward": 0.009488449431955814, |
| "rewards/format_reward": 0.3499999940395355, |
| "step": 157 |
| }, |
| { |
| "clip_ratio": 0.0006146969681140035, |
| "epoch": 0.0029476237115806164, |
| "grad_norm": 0.09026872366666794, |
| "kl": 0.00640869140625, |
| "learning_rate": 4.101819466768484e-06, |
| "loss": 0.0051, |
| "step": 158 |
| }, |
| { |
| "clip_ratio": 0.0006830913771409541, |
| "epoch": 0.0029662795578564434, |
| "grad_norm": 0.09458412230014801, |
| "kl": 0.007568359375, |
| "learning_rate": 4.0901405178219535e-06, |
| "loss": 0.005, |
| "step": 159 |
| }, |
| { |
| "clip_ratio": 0.0009271332528442144, |
| "completion_length": 902.75, |
| "epoch": 0.00298493540413227, |
| "grad_norm": 0.09097299724817276, |
| "kl": 0.0082550048828125, |
| "learning_rate": 4.078405339263326e-06, |
| "loss": 0.0325, |
| "num_tokens": 1863974.0, |
| "reward": 0.3194986581802368, |
| "reward_std": 0.23043666034936905, |
| "rewards/code_reward": 0.28074865974485874, |
| "rewards/format_reward": 0.38750001788139343, |
| "step": 160 |
| }, |
| { |
| "clip_ratio": 0.001312960172072053, |
| "epoch": 0.0030035912504080965, |
| "grad_norm": 0.08883295208215714, |
| "kl": 0.00946044921875, |
| "learning_rate": 4.06661442347719e-06, |
| "loss": 0.0322, |
| "step": 161 |
| }, |
| { |
| "clip_ratio": 0.00119069468928501, |
| "epoch": 0.0030222470966839235, |
| "grad_norm": 0.08946386724710464, |
| "kl": 0.010009765625, |
| "learning_rate": 4.054768265186758e-06, |
| "loss": 0.0322, |
| "step": 162 |
| }, |
| { |
| "clip_ratio": 0.0016622182447463274, |
| "completion_length": 831.4583435058594, |
| "epoch": 0.00304090294295975, |
| "grad_norm": 0.07739204913377762, |
| "kl": 0.0092010498046875, |
| "learning_rate": 4.0428673614331036e-06, |
| "loss": -0.0206, |
| "num_tokens": 1901017.0, |
| "reward": 0.39075271785259247, |
| "reward_std": 0.4852019101381302, |
| "rewards/code_reward": 0.36116939783096313, |
| "rewards/format_reward": 0.2958333343267441, |
| "step": 163 |
| }, |
| { |
| "clip_ratio": 0.0012105648347642273, |
| "epoch": 0.0030595587892355766, |
| "grad_norm": 0.07764855027198792, |
| "kl": 0.00982666015625, |
| "learning_rate": 4.030912211554316e-06, |
| "loss": -0.0211, |
| "step": 164 |
| }, |
| { |
| "clip_ratio": 0.001361995586194098, |
| "epoch": 0.003078214635511403, |
| "grad_norm": 0.0787842720746994, |
| "kl": 0.0101318359375, |
| "learning_rate": 4.018903317164539e-06, |
| "loss": -0.0208, |
| "step": 165 |
| }, |
| { |
| "clip_ratio": 0.00047827488742768764, |
| "completion_length": 970.7916870117188, |
| "epoch": 0.00309687048178723, |
| "grad_norm": 0.09784507006406784, |
| "kl": 0.010040283203125, |
| "learning_rate": 4.006841182132932e-06, |
| "loss": 0.0032, |
| "num_tokens": 1933292.0, |
| "reward": 0.7487499713897705, |
| "reward_std": 0.4054149240255356, |
| "rewards/code_reward": 0.7083333432674408, |
| "rewards/format_reward": 0.40416666865348816, |
| "step": 166 |
| }, |
| { |
| "clip_ratio": 0.0005554754170589149, |
| "epoch": 0.0031155263280630567, |
| "grad_norm": 0.09923358261585236, |
| "kl": 0.01019287109375, |
| "learning_rate": 3.9947263125625195e-06, |
| "loss": 0.0032, |
| "step": 167 |
| }, |
| { |
| "clip_ratio": 0.00030202302150428295, |
| "epoch": 0.0031341821743388833, |
| "grad_norm": 0.09393054246902466, |
| "kl": 0.0109100341796875, |
| "learning_rate": 3.982559216768967e-06, |
| "loss": 0.0029, |
| "step": 168 |
| }, |
| { |
| "clip_ratio": 0.0007342632743529975, |
| "completion_length": 645.7500305175781, |
| "epoch": 0.0031528380206147103, |
| "grad_norm": 0.09612008929252625, |
| "kl": 0.01934814453125, |
| "learning_rate": 3.970340405259245e-06, |
| "loss": -0.0236, |
| "num_tokens": 1956074.0, |
| "reward": 0.6863030195236206, |
| "reward_std": 0.3077765703201294, |
| "rewards/code_reward": 0.6533863097429276, |
| "rewards/format_reward": 0.3291666731238365, |
| "step": 169 |
| }, |
| { |
| "clip_ratio": 0.0007756684790365398, |
| "epoch": 0.003171493866890537, |
| "grad_norm": 0.0964791551232338, |
| "kl": 0.02117919921875, |
| "learning_rate": 3.958070390710214e-06, |
| "loss": -0.0234, |
| "step": 170 |
| }, |
| { |
| "clip_ratio": 0.0010250475606881082, |
| "epoch": 0.0031901497131663634, |
| "grad_norm": 0.0937555730342865, |
| "kl": 0.02099609375, |
| "learning_rate": 3.945749687947109e-06, |
| "loss": -0.0239, |
| "step": 171 |
| }, |
| { |
| "clip_ratio": 0.001571565808262676, |
| "completion_length": 856.1666870117188, |
| "epoch": 0.0032088055594421904, |
| "grad_norm": 0.09850820899009705, |
| "kl": 0.014801025390625, |
| "learning_rate": 3.933378813921942e-06, |
| "loss": 0.0194, |
| "num_tokens": 1987470.0, |
| "reward": 0.048796952702105045, |
| "reward_std": 0.04604018107056618, |
| "rewards/code_reward": 0.021713614463806152, |
| "rewards/format_reward": 0.2708333358168602, |
| "step": 172 |
| }, |
| { |
| "clip_ratio": 0.0017044495907612145, |
| "epoch": 0.003227461405718017, |
| "grad_norm": 0.10293813049793243, |
| "kl": 0.01483154296875, |
| "learning_rate": 3.920958287691811e-06, |
| "loss": 0.0193, |
| "step": 173 |
| }, |
| { |
| "clip_ratio": 0.0016478158067911863, |
| "epoch": 0.0032461172519938435, |
| "grad_norm": 0.09644638746976852, |
| "kl": 0.017425537109375, |
| "learning_rate": 3.908488630397121e-06, |
| "loss": 0.0189, |
| "step": 174 |
| }, |
| { |
| "clip_ratio": 0.0009133272105827928, |
| "completion_length": 797.375, |
| "epoch": 0.00326477309826967, |
| "grad_norm": 0.08966498076915741, |
| "kl": 0.018890380859375, |
| "learning_rate": 3.8959703652397175e-06, |
| "loss": 0.0182, |
| "num_tokens": 2022255.0, |
| "reward": 0.4206944704055786, |
| "reward_std": 0.16624127328395844, |
| "rewards/code_reward": 0.3861111178994179, |
| "rewards/format_reward": 0.34583333134651184, |
| "step": 175 |
| }, |
| { |
| "clip_ratio": 0.0008382981468457729, |
| "epoch": 0.003283428944545497, |
| "grad_norm": 0.09205059707164764, |
| "kl": 0.019439697265625, |
| "learning_rate": 3.883404017460935e-06, |
| "loss": 0.0181, |
| "step": 176 |
| }, |
| { |
| "clip_ratio": 0.000650079753540922, |
| "epoch": 0.0033020847908213236, |
| "grad_norm": 0.10440210998058319, |
| "kl": 0.0201416015625, |
| "learning_rate": 3.870790114319559e-06, |
| "loss": 0.0182, |
| "step": 177 |
| }, |
| { |
| "clip_ratio": 0.00037280093238223344, |
| "completion_length": 798.9166870117188, |
| "epoch": 0.00332074063709715, |
| "grad_norm": 0.09877095371484756, |
| "kl": 0.0088043212890625, |
| "learning_rate": 3.858129185069701e-06, |
| "loss": 0.001, |
| "num_tokens": 2058181.0, |
| "reward": 0.6779166460037231, |
| "reward_std": 0.4303756207227707, |
| "rewards/code_reward": 0.625, |
| "rewards/format_reward": 0.5291666686534882, |
| "step": 178 |
| }, |
| { |
| "clip_ratio": 0.0005341586947906762, |
| "epoch": 0.003339396483372977, |
| "grad_norm": 0.09430207312107086, |
| "kl": 0.0088958740234375, |
| "learning_rate": 3.845421760938597e-06, |
| "loss": 0.0011, |
| "step": 179 |
| }, |
| { |
| "clip_ratio": 0.00040165129757951945, |
| "epoch": 0.0033580523296488037, |
| "grad_norm": 0.08880569040775299, |
| "kl": 0.0091552734375, |
| "learning_rate": 3.832668375104312e-06, |
| "loss": 0.0008, |
| "step": 180 |
| }, |
| { |
| "clip_ratio": 0.0010204909485764802, |
| "completion_length": 789.0833435058594, |
| "epoch": 0.0033767081759246303, |
| "grad_norm": 0.0856408104300499, |
| "kl": 0.0240478515625, |
| "learning_rate": 3.8198695626733725e-06, |
| "loss": 0.0168, |
| "num_tokens": 2085147.0, |
| "reward": 0.0904554259032011, |
| "reward_std": 0.16545533016324043, |
| "rewards/code_reward": 0.050872091203927994, |
| "rewards/format_reward": 0.3958333283662796, |
| "step": 181 |
| }, |
| { |
| "clip_ratio": 0.0013127815909683704, |
| "epoch": 0.003395364022200457, |
| "grad_norm": 0.08718743920326233, |
| "kl": 0.02423095703125, |
| "learning_rate": 3.8070258606583156e-06, |
| "loss": 0.0167, |
| "step": 182 |
| }, |
| { |
| "clip_ratio": 0.0010590656311251223, |
| "epoch": 0.003414019868476284, |
| "grad_norm": 0.08608557283878326, |
| "kl": 0.02398681640625, |
| "learning_rate": 3.7941378079551544e-06, |
| "loss": 0.0163, |
| "step": 183 |
| }, |
| { |
| "clip_ratio": 0.00044808804523199797, |
| "completion_length": 838.3333435058594, |
| "epoch": 0.0034326757147521104, |
| "grad_norm": 0.08882498741149902, |
| "kl": 0.04205322265625, |
| "learning_rate": 3.7812059453207677e-06, |
| "loss": -0.0138, |
| "num_tokens": 2118659.0, |
| "reward": 0.5362499505281448, |
| "reward_std": 0.020687240175902843, |
| "rewards/code_reward": 0.5, |
| "rewards/format_reward": 0.36249999701976776, |
| "step": 184 |
| }, |
| { |
| "clip_ratio": 0.0009204872185364366, |
| "epoch": 0.003451331561027937, |
| "grad_norm": 0.08916648477315903, |
| "kl": 0.044921875, |
| "learning_rate": 3.768230815350213e-06, |
| "loss": -0.0138, |
| "step": 185 |
| }, |
| { |
| "clip_ratio": 0.0004569297598209232, |
| "epoch": 0.003469987407303764, |
| "grad_norm": 0.08778577297925949, |
| "kl": 0.0452880859375, |
| "learning_rate": 3.7552129624539557e-06, |
| "loss": -0.0139, |
| "step": 186 |
| }, |
| { |
| "clip_ratio": 0.0006422713631764054, |
| "completion_length": 820.6666870117188, |
| "epoch": 0.0034886432535795905, |
| "grad_norm": 0.07521483302116394, |
| "kl": 0.02374267578125, |
| "learning_rate": 3.7421529328350316e-06, |
| "loss": -0.0011, |
| "num_tokens": 2154951.0, |
| "reward": 0.3243994116783142, |
| "reward_std": 0.16106001287698746, |
| "rewards/code_reward": 0.26398275047540665, |
| "rewards/format_reward": 0.6041666567325592, |
| "step": 187 |
| }, |
| { |
| "clip_ratio": 0.0007444258662872016, |
| "epoch": 0.003507299099855417, |
| "grad_norm": 0.07639120519161224, |
| "kl": 0.022552490234375, |
| "learning_rate": 3.7290512744661274e-06, |
| "loss": -0.0014, |
| "step": 188 |
| }, |
| { |
| "clip_ratio": 0.0008128004556056112, |
| "epoch": 0.003525954946131244, |
| "grad_norm": 0.07584098726511002, |
| "kl": 0.02203369140625, |
| "learning_rate": 3.715908537066589e-06, |
| "loss": -0.0016, |
| "step": 189 |
| }, |
| { |
| "clip_ratio": 0.0006247111014090478, |
| "completion_length": 782.2500610351562, |
| "epoch": 0.0035446107924070706, |
| "grad_norm": 0.09555865079164505, |
| "kl": 0.02532958984375, |
| "learning_rate": 3.7027252720793538e-06, |
| "loss": -0.0372, |
| "num_tokens": 2188473.0, |
| "reward": 0.640339195728302, |
| "reward_std": 0.48519615828990936, |
| "rewards/code_reward": 0.5903392136096954, |
| "rewards/format_reward": 0.5, |
| "step": 190 |
| }, |
| { |
| "clip_ratio": 0.0004575684142764658, |
| "epoch": 0.003563266638682897, |
| "grad_norm": 0.09690264612436295, |
| "kl": 0.0252685546875, |
| "learning_rate": 3.689502032647817e-06, |
| "loss": -0.0374, |
| "step": 191 |
| }, |
| { |
| "clip_ratio": 0.0003101055626757443, |
| "epoch": 0.0035819224849587238, |
| "grad_norm": 0.08903127908706665, |
| "kl": 0.02032470703125, |
| "learning_rate": 3.6762393735926245e-06, |
| "loss": -0.0377, |
| "step": 192 |
| }, |
| { |
| "clip_ratio": 0.00029177512624301016, |
| "completion_length": 791.9583435058594, |
| "epoch": 0.0036005783312345508, |
| "grad_norm": 0.10319360345602036, |
| "kl": 0.0065460205078125, |
| "learning_rate": 3.6629378513883852e-06, |
| "loss": 0.0565, |
| "num_tokens": 2221952.0, |
| "reward": 0.5221759006381035, |
| "reward_std": 0.40614573657512665, |
| "rewards/code_reward": 0.4759259298443794, |
| "rewards/format_reward": 0.4624999910593033, |
| "step": 193 |
| }, |
| { |
| "clip_ratio": 0.0005471540498547256, |
| "epoch": 0.0036192341775103773, |
| "grad_norm": 0.07854603230953217, |
| "kl": 0.00677490234375, |
| "learning_rate": 3.6495980241403307e-06, |
| "loss": 0.0563, |
| "step": 194 |
| }, |
| { |
| "clip_ratio": 0.0007658963440917432, |
| "epoch": 0.003637890023786204, |
| "grad_norm": 0.1044902577996254, |
| "kl": 0.0064697265625, |
| "learning_rate": 3.636220451560896e-06, |
| "loss": 0.0565, |
| "step": 195 |
| }, |
| { |
| "clip_ratio": 0.001431856129784137, |
| "completion_length": 845.7916870117188, |
| "epoch": 0.003656545870062031, |
| "grad_norm": 0.08788370341062546, |
| "kl": 0.015106201171875, |
| "learning_rate": 3.622805694946235e-06, |
| "loss": 0.0278, |
| "num_tokens": 2256867.0, |
| "reward": 0.55375000461936, |
| "reward_std": 0.04919072613120079, |
| "rewards/code_reward": 0.5, |
| "rewards/format_reward": 0.5374999940395355, |
| "step": 196 |
| }, |
| { |
| "clip_ratio": 0.0012035187100991607, |
| "epoch": 0.0036752017163378574, |
| "grad_norm": 0.08841948956251144, |
| "kl": 0.014556884765625, |
| "learning_rate": 3.609354317152667e-06, |
| "loss": 0.028, |
| "step": 197 |
| }, |
| { |
| "clip_ratio": 0.0011701789626386017, |
| "epoch": 0.003693857562613684, |
| "grad_norm": 0.08651836961507797, |
| "kl": 0.015625, |
| "learning_rate": 3.595866882573063e-06, |
| "loss": 0.0275, |
| "step": 198 |
| }, |
| { |
| "clip_ratio": 0.0006608768162550405, |
| "completion_length": 812.9583435058594, |
| "epoch": 0.0037125134088895105, |
| "grad_norm": 0.09646130353212357, |
| "kl": 0.010162353515625, |
| "learning_rate": 3.5823439571131675e-06, |
| "loss": -0.0052, |
| "num_tokens": 2286290.0, |
| "reward": 0.09254807978868484, |
| "reward_std": 0.06599834933876991, |
| "rewards/code_reward": 0.030048077926039696, |
| "rewards/format_reward": 0.625, |
| "step": 199 |
| }, |
| { |
| "clip_ratio": 0.000630842725513503, |
| "epoch": 0.0037311692551653375, |
| "grad_norm": 0.09436467289924622, |
| "kl": 0.0110015869140625, |
| "learning_rate": 3.5687861081678477e-06, |
| "loss": -0.0052, |
| "step": 200 |
| }, |
| { |
| "clip_ratio": 0.0007917208422441036, |
| "epoch": 0.003749825101441164, |
| "grad_norm": 0.09504929929971695, |
| "kl": 0.012542724609375, |
| "learning_rate": 3.555193904597291e-06, |
| "loss": -0.0053, |
| "step": 201 |
| }, |
| { |
| "clip_ratio": 0.0005567904736381024, |
| "completion_length": 846.1250305175781, |
| "epoch": 0.0037684809477169907, |
| "grad_norm": 0.08655253797769547, |
| "kl": 0.0111083984375, |
| "learning_rate": 3.541567916703138e-06, |
| "loss": 0.0168, |
| "num_tokens": 2316833.0, |
| "reward": 0.46897655725479126, |
| "reward_std": 0.23874285072088242, |
| "rewards/code_reward": 0.4368932172656059, |
| "rewards/format_reward": 0.32083334028720856, |
| "step": 202 |
| }, |
| { |
| "clip_ratio": 0.0003927100624423474, |
| "epoch": 0.0037871367939928176, |
| "grad_norm": 0.08548427373170853, |
| "kl": 0.01123046875, |
| "learning_rate": 3.5279087162045517e-06, |
| "loss": 0.0168, |
| "step": 203 |
| }, |
| { |
| "clip_ratio": 0.0006460061413235962, |
| "epoch": 0.003805792640268644, |
| "grad_norm": 0.08546783775091171, |
| "kl": 0.011199951171875, |
| "learning_rate": 3.5142168762142265e-06, |
| "loss": 0.0165, |
| "step": 204 |
| }, |
| { |
| "clip_ratio": 0.0005783264059573412, |
| "completion_length": 671.1666870117188, |
| "epoch": 0.0038244484865444708, |
| "grad_norm": 0.2734091579914093, |
| "kl": 0.020843505859375, |
| "learning_rate": 3.500492971214347e-06, |
| "loss": 0.008, |
| "num_tokens": 2344797.0, |
| "reward": 0.5388242453336716, |
| "reward_std": 0.3791409581899643, |
| "rewards/code_reward": 0.4925742968916893, |
| "rewards/format_reward": 0.4625000059604645, |
| "step": 205 |
| }, |
| { |
| "clip_ratio": 0.0003238898643758148, |
| "epoch": 0.0038431043328202978, |
| "grad_norm": 0.08638158440589905, |
| "kl": 0.021759033203125, |
| "learning_rate": 3.48673757703248e-06, |
| "loss": 0.0078, |
| "step": 206 |
| }, |
| { |
| "clip_ratio": 0.00025367678608745337, |
| "epoch": 0.0038617601790961243, |
| "grad_norm": 0.08271009474992752, |
| "kl": 0.02239990234375, |
| "learning_rate": 3.472951270817418e-06, |
| "loss": 0.0077, |
| "step": 207 |
| }, |
| { |
| "clip_ratio": 0.0011410006263758987, |
| "completion_length": 863.6250610351562, |
| "epoch": 0.003880416025371951, |
| "grad_norm": 0.0871393084526062, |
| "kl": 0.009246826171875, |
| "learning_rate": 3.4591346310149578e-06, |
| "loss": 0.0031, |
| "num_tokens": 2380560.0, |
| "reward": 0.07268582284450531, |
| "reward_std": 0.057141270488500595, |
| "rewards/code_reward": 0.028519157320261, |
| "rewards/format_reward": 0.4416666626930237, |
| "step": 208 |
| }, |
| { |
| "clip_ratio": 0.0012201684294268489, |
| "epoch": 0.0038990718716477774, |
| "grad_norm": 0.08084117621183395, |
| "kl": 0.00958251953125, |
| "learning_rate": 3.445288237343632e-06, |
| "loss": 0.0031, |
| "step": 209 |
| }, |
| { |
| "clip_ratio": 0.0008913867350202054, |
| "epoch": 0.0039177277179236044, |
| "grad_norm": 0.08293391019105911, |
| "kl": 0.009307861328125, |
| "learning_rate": 3.4314126707703895e-06, |
| "loss": 0.003, |
| "step": 210 |
| }, |
| { |
| "clip_ratio": 0.0013628143933601677, |
| "completion_length": 695.0000305175781, |
| "epoch": 0.003936383564199431, |
| "grad_norm": 0.10922668874263763, |
| "kl": 0.0301513671875, |
| "learning_rate": 3.4175085134862128e-06, |
| "loss": 0.0343, |
| "num_tokens": 2409312.0, |
| "reward": 0.2725326791405678, |
| "reward_std": 0.31870152056217194, |
| "rewards/code_reward": 0.24836601875722408, |
| "rewards/format_reward": 0.24166666716337204, |
| "step": 211 |
| }, |
| { |
| "clip_ratio": 0.0010863369097933173, |
| "epoch": 0.0039550394104752576, |
| "grad_norm": 0.09670348465442657, |
| "kl": 0.024658203125, |
| "learning_rate": 3.4035763488816953e-06, |
| "loss": 0.0341, |
| "step": 212 |
| }, |
| { |
| "clip_ratio": 0.0009245816036127508, |
| "epoch": 0.003973695256751084, |
| "grad_norm": 0.0888768658041954, |
| "kl": 0.02496337890625, |
| "learning_rate": 3.3896167615225594e-06, |
| "loss": 0.0339, |
| "step": 213 |
| }, |
| { |
| "clip_ratio": 0.0007956430781632662, |
| "completion_length": 803.4583435058594, |
| "epoch": 0.003992351103026911, |
| "grad_norm": 0.08351018279790878, |
| "kl": 0.010467529296875, |
| "learning_rate": 3.375630337125133e-06, |
| "loss": -0.0005, |
| "num_tokens": 2439047.0, |
| "reward": 0.2963353507220745, |
| "reward_std": 0.2816299609839916, |
| "rewards/code_reward": 0.25050200801342726, |
| "rewards/format_reward": 0.4583333432674408, |
| "step": 214 |
| }, |
| { |
| "clip_ratio": 0.0009151631966233253, |
| "epoch": 0.004011006949302738, |
| "grad_norm": 0.0963514894247055, |
| "kl": 0.010101318359375, |
| "learning_rate": 3.361617662531772e-06, |
| "loss": -0.0006, |
| "step": 215 |
| }, |
| { |
| "clip_ratio": 0.0005119523120811209, |
| "epoch": 0.004029662795578565, |
| "grad_norm": 0.08659033477306366, |
| "kl": 0.0107421875, |
| "learning_rate": 3.347579325686237e-06, |
| "loss": -0.0011, |
| "step": 216 |
| }, |
| { |
| "clip_ratio": 0.0005694587889593095, |
| "completion_length": 548.8750305175781, |
| "epoch": 0.004048318641854391, |
| "grad_norm": 0.09764964878559113, |
| "kl": 0.015594482421875, |
| "learning_rate": 3.333515915609027e-06, |
| "loss": 0.0127, |
| "num_tokens": 2463224.0, |
| "reward": 0.8187499940395355, |
| "reward_std": 0.4159911423921585, |
| "rewards/code_reward": 0.75, |
| "rewards/format_reward": 0.6875, |
| "step": 217 |
| }, |
| { |
| "clip_ratio": 0.0006374841468641534, |
| "epoch": 0.004066974488130218, |
| "grad_norm": 0.09726841002702713, |
| "kl": 0.01580810546875, |
| "learning_rate": 3.3194280223726616e-06, |
| "loss": 0.0127, |
| "step": 218 |
| }, |
| { |
| "clip_ratio": 0.0008805101679172367, |
| "epoch": 0.004085630334406044, |
| "grad_norm": 0.09693842381238937, |
| "kl": 0.016326904296875, |
| "learning_rate": 3.305316237076927e-06, |
| "loss": 0.0126, |
| "step": 219 |
| }, |
| { |
| "clip_ratio": 0.001065359654603526, |
| "completion_length": 682.2083435058594, |
| "epoch": 0.004104286180681871, |
| "grad_norm": 0.11235346645116806, |
| "kl": 0.00555419921875, |
| "learning_rate": 3.291181151824071e-06, |
| "loss": 0.0219, |
| "num_tokens": 2487829.0, |
| "reward": 0.851570725440979, |
| "reward_std": 0.13773459196090698, |
| "rewards/code_reward": 0.7932373583316803, |
| "rewards/format_reward": 0.5833333134651184, |
| "step": 220 |
| }, |
| { |
| "clip_ratio": 0.0010035349405370653, |
| "epoch": 0.0041229420269576975, |
| "grad_norm": 0.10121559351682663, |
| "kl": 0.0056304931640625, |
| "learning_rate": 3.27702335969396e-06, |
| "loss": 0.0218, |
| "step": 221 |
| }, |
| { |
| "clip_ratio": 0.0007600956014357507, |
| "epoch": 0.004141597873233525, |
| "grad_norm": 0.09749292582273483, |
| "kl": 0.0055694580078125, |
| "learning_rate": 3.2628434547191985e-06, |
| "loss": 0.0217, |
| "step": 222 |
| }, |
| { |
| "clip_ratio": 0.00039983812894206494, |
| "completion_length": 822.5416870117188, |
| "epoch": 0.0041602537195093514, |
| "grad_norm": 0.08266626298427582, |
| "kl": 0.0115966796875, |
| "learning_rate": 3.2486420318601973e-06, |
| "loss": -0.0072, |
| "num_tokens": 2524514.0, |
| "reward": 0.765346884727478, |
| "reward_std": 0.41189485788345337, |
| "rewards/code_reward": 0.6945134997367859, |
| "rewards/format_reward": 0.7083333283662796, |
| "step": 223 |
| }, |
| { |
| "clip_ratio": 0.00040171146974898875, |
| "epoch": 0.004178909565785178, |
| "grad_norm": 0.08238179981708527, |
| "kl": 0.0108184814453125, |
| "learning_rate": 3.2344196869802187e-06, |
| "loss": -0.0073, |
| "step": 224 |
| }, |
| { |
| "clip_ratio": 0.0007332674576900899, |
| "epoch": 0.0041975654120610046, |
| "grad_norm": 0.08556170016527176, |
| "kl": 0.0115203857421875, |
| "learning_rate": 3.2201770168203694e-06, |
| "loss": -0.0072, |
| "step": 225 |
| }, |
| { |
| "clip_ratio": 0.0010465582308825105, |
| "completion_length": 897.4583435058594, |
| "epoch": 0.004216221258336831, |
| "grad_norm": 0.0795939639210701, |
| "kl": 0.010223388671875, |
| "learning_rate": 3.205914618974563e-06, |
| "loss": 0.0444, |
| "num_tokens": 2561713.0, |
| "reward": 0.5436052978038788, |
| "reward_std": 0.36193977296352386, |
| "rewards/code_reward": 0.49235527217388153, |
| "rewards/format_reward": 0.5125000178813934, |
| "step": 226 |
| }, |
| { |
| "clip_ratio": 0.001006987877190113, |
| "epoch": 0.004234877104612658, |
| "grad_norm": 0.0830455869436264, |
| "kl": 0.00970458984375, |
| "learning_rate": 3.1916330918644496e-06, |
| "loss": 0.0443, |
| "step": 227 |
| }, |
| { |
| "clip_ratio": 0.0006825106975156814, |
| "epoch": 0.004253532950888485, |
| "grad_norm": 0.07931126654148102, |
| "kl": 0.009521484375, |
| "learning_rate": 3.177333034714303e-06, |
| "loss": 0.0445, |
| "step": 228 |
| }, |
| { |
| "clip_ratio": 0.0010472153662703931, |
| "completion_length": 1060.8333740234375, |
| "epoch": 0.004272188797164312, |
| "grad_norm": 0.08189205825328827, |
| "kl": 0.01904296875, |
| "learning_rate": 3.1630150475258813e-06, |
| "loss": 0.0354, |
| "num_tokens": 2602557.0, |
| "reward": 0.5575000084936619, |
| "reward_std": 0.09106174670159817, |
| "rewards/code_reward": 0.520833333954215, |
| "rewards/format_reward": 0.36666667461395264, |
| "step": 229 |
| }, |
| { |
| "clip_ratio": 0.0012223481899127364, |
| "epoch": 0.004290844643440138, |
| "grad_norm": 0.07629529386758804, |
| "kl": 0.01898193359375, |
| "learning_rate": 3.148679731053252e-06, |
| "loss": 0.0355, |
| "step": 230 |
| }, |
| { |
| "clip_ratio": 0.0009530899405945092, |
| "epoch": 0.004309500489715965, |
| "grad_norm": 0.07555654644966125, |
| "kl": 0.01959228515625, |
| "learning_rate": 3.1343276867775805e-06, |
| "loss": 0.0352, |
| "step": 231 |
| }, |
| { |
| "clip_ratio": 0.000721571734175086, |
| "completion_length": 631.0, |
| "epoch": 0.004328156335991791, |
| "grad_norm": 0.08854309469461441, |
| "kl": 0.01287841796875, |
| "learning_rate": 3.1199595168819043e-06, |
| "loss": -0.014, |
| "num_tokens": 2627097.0, |
| "reward": 0.7598794400691986, |
| "reward_std": 0.4630916863679886, |
| "rewards/code_reward": 0.6998794674873352, |
| "rewards/format_reward": 0.6000000238418579, |
| "step": 232 |
| }, |
| { |
| "clip_ratio": 0.0004376633296487853, |
| "epoch": 0.004346812182267618, |
| "grad_norm": 0.1049780622124672, |
| "kl": 0.01348876953125, |
| "learning_rate": 3.105575824225852e-06, |
| "loss": -0.0143, |
| "step": 233 |
| }, |
| { |
| "clip_ratio": 0.00047352669935207814, |
| "epoch": 0.0043654680285434445, |
| "grad_norm": 0.08706912398338318, |
| "kl": 0.01336669921875, |
| "learning_rate": 3.091177212320363e-06, |
| "loss": -0.0145, |
| "step": 234 |
| }, |
| { |
| "clip_ratio": 0.00033213794813491404, |
| "completion_length": 1132.4166870117188, |
| "epoch": 0.004384123874819272, |
| "grad_norm": 0.08825133740901947, |
| "kl": 0.01216888427734375, |
| "learning_rate": 3.0767642853023538e-06, |
| "loss": -0.0049, |
| "num_tokens": 2672143.0, |
| "reward": 0.09307292103767395, |
| "reward_std": 0.049834271892905235, |
| "rewards/code_reward": 0.039322917349636555, |
| "rewards/format_reward": 0.5375000089406967, |
| "step": 235 |
| }, |
| { |
| "clip_ratio": 0.0003840103163383901, |
| "epoch": 0.0044027797210950985, |
| "grad_norm": 0.09255920350551605, |
| "kl": 0.01175689697265625, |
| "learning_rate": 3.062337647909376e-06, |
| "loss": -0.0051, |
| "step": 236 |
| }, |
| { |
| "clip_ratio": 0.00038397236494347453, |
| "epoch": 0.004421435567370925, |
| "grad_norm": 0.08827126771211624, |
| "kl": 0.010589599609375, |
| "learning_rate": 3.04789790545424e-06, |
| "loss": -0.0052, |
| "step": 237 |
| }, |
| { |
| "clip_ratio": 0.0007554197800345719, |
| "completion_length": 795.25, |
| "epoch": 0.004440091413646752, |
| "grad_norm": 0.09881194680929184, |
| "kl": 0.02008056640625, |
| "learning_rate": 3.033445663799621e-06, |
| "loss": 0.0047, |
| "num_tokens": 2704009.0, |
| "reward": 0.5524518974125385, |
| "reward_std": 0.056896304711699486, |
| "rewards/code_reward": 0.5012019231216982, |
| "rewards/format_reward": 0.5125000029802322, |
| "step": 238 |
| }, |
| { |
| "clip_ratio": 0.0010150580055778846, |
| "epoch": 0.004458747259922578, |
| "grad_norm": 0.09783502668142319, |
| "kl": 0.021484375, |
| "learning_rate": 3.018981529332633e-06, |
| "loss": 0.0046, |
| "step": 239 |
| }, |
| { |
| "clip_ratio": 0.0011725955409929156, |
| "epoch": 0.004477403106198405, |
| "grad_norm": 0.09466077387332916, |
| "kl": 0.0211181640625, |
| "learning_rate": 3.00450610893939e-06, |
| "loss": 0.0045, |
| "step": 240 |
| }, |
| { |
| "clip_ratio": 0.0011200214212294668, |
| "completion_length": 709.3750152587891, |
| "epoch": 0.004496058952474231, |
| "grad_norm": 0.11234831809997559, |
| "kl": 0.014251708984375, |
| "learning_rate": 2.9900200099795396e-06, |
| "loss": -0.001, |
| "num_tokens": 2735914.0, |
| "reward": 0.33983870036900043, |
| "reward_std": 0.2842044336721301, |
| "rewards/code_reward": 0.29233869910240173, |
| "rewards/format_reward": 0.4750000089406967, |
| "step": 241 |
| }, |
| { |
| "clip_ratio": 0.0012173219583928585, |
| "epoch": 0.004514714798750059, |
| "grad_norm": 0.10929913818836212, |
| "kl": 0.015838623046875, |
| "learning_rate": 2.9755238402607826e-06, |
| "loss": -0.0009, |
| "step": 242 |
| }, |
| { |
| "clip_ratio": 0.0010618427768349648, |
| "epoch": 0.004533370645025885, |
| "grad_norm": 0.1097099706530571, |
| "kl": 0.017913818359375, |
| "learning_rate": 2.961018208013367e-06, |
| "loss": -0.0014, |
| "step": 243 |
| }, |
| { |
| "clip_ratio": 0.0015606118831783533, |
| "completion_length": 740.5000305175781, |
| "epoch": 0.004552026491301712, |
| "grad_norm": 0.1023816242814064, |
| "kl": 0.010467529296875, |
| "learning_rate": 2.9465037218645694e-06, |
| "loss": -0.0155, |
| "num_tokens": 2764042.0, |
| "reward": 0.062079211696982384, |
| "reward_std": 0.04148745723068714, |
| "rewards/code_reward": 0.000412541237892583, |
| "rewards/format_reward": 0.6166666448116302, |
| "step": 244 |
| }, |
| { |
| "clip_ratio": 0.0012834252556785941, |
| "epoch": 0.004570682337577538, |
| "grad_norm": 0.09794943034648895, |
| "kl": 0.01129150390625, |
| "learning_rate": 2.9319809908131604e-06, |
| "loss": -0.0154, |
| "step": 245 |
| }, |
| { |
| "clip_ratio": 0.0014966255985200405, |
| "epoch": 0.004589338183853365, |
| "grad_norm": 0.09920386970043182, |
| "kl": 0.011505126953125, |
| "learning_rate": 2.917450624203847e-06, |
| "loss": -0.0159, |
| "step": 246 |
| }, |
| { |
| "clip_ratio": 0.0009015223768074065, |
| "completion_length": 675.5416870117188, |
| "epoch": 0.0046079940301291915, |
| "grad_norm": 0.08201450854539871, |
| "kl": 0.02886962890625, |
| "learning_rate": 2.9029132317017118e-06, |
| "loss": 0.0237, |
| "num_tokens": 2792831.0, |
| "reward": 0.4770751856267452, |
| "reward_std": 0.21596147678792477, |
| "rewards/code_reward": 0.41707515716552734, |
| "rewards/format_reward": 0.5999999940395355, |
| "step": 247 |
| }, |
| { |
| "clip_ratio": 0.0009108328085858375, |
| "epoch": 0.004626649876405018, |
| "grad_norm": 0.0822167694568634, |
| "kl": 0.0335693359375, |
| "learning_rate": 2.888369423266629e-06, |
| "loss": 0.0241, |
| "step": 248 |
| }, |
| { |
| "clip_ratio": 0.0011067874147556722, |
| "epoch": 0.0046453057226808455, |
| "grad_norm": 0.08312199264764786, |
| "kl": 0.0335693359375, |
| "learning_rate": 2.8738198091276712e-06, |
| "loss": 0.0238, |
| "step": 249 |
| }, |
| { |
| "clip_ratio": 0.0007156841747928411, |
| "completion_length": 705.1666870117188, |
| "epoch": 0.004663961568956672, |
| "grad_norm": 0.09860284626483917, |
| "kl": 0.033416748046875, |
| "learning_rate": 2.859264999757509e-06, |
| "loss": 0.0451, |
| "num_tokens": 2822631.0, |
| "reward": 0.5245833247900009, |
| "reward_std": 0.4078162908554077, |
| "rewards/code_reward": 0.4583333358168602, |
| "rewards/format_reward": 0.6624999940395355, |
| "step": 250 |
| }, |
| { |
| "clip_ratio": 0.0007305167673621327, |
| "epoch": 0.004682617415232499, |
| "grad_norm": 0.10016681998968124, |
| "kl": 0.0286865234375, |
| "learning_rate": 2.8447056058467928e-06, |
| "loss": 0.0451, |
| "step": 251 |
| }, |
| { |
| "clip_ratio": 0.0007170947210397571, |
| "epoch": 0.004701273261508325, |
| "grad_norm": 0.09779990464448929, |
| "kl": 0.029632568359375, |
| "learning_rate": 2.830142238278531e-06, |
| "loss": 0.045, |
| "step": 252 |
| }, |
| { |
| "clip_ratio": 0.000481658848002553, |
| "completion_length": 590.2916870117188, |
| "epoch": 0.004719929107784152, |
| "grad_norm": 154.7192840576172, |
| "kl": 92.0189208984375, |
| "learning_rate": 2.81557550810246e-06, |
| "loss": 0.9582, |
| "num_tokens": 2852470.0, |
| "reward": 0.5958333238959312, |
| "reward_std": 0.3366524577140808, |
| "rewards/code_reward": 0.541666679084301, |
| "rewards/format_reward": 0.5416666716337204, |
| "step": 253 |
| }, |
| { |
| "clip_ratio": 0.0004959633661201224, |
| "epoch": 0.004738584954059978, |
| "grad_norm": 25.69236946105957, |
| "kl": 15.9564208984375, |
| "learning_rate": 2.8010060265094026e-06, |
| "loss": 0.1926, |
| "step": 254 |
| }, |
| { |
| "clip_ratio": 0.0010467918764334172, |
| "epoch": 0.004757240800335805, |
| "grad_norm": 2.245051383972168, |
| "kl": 1.70458984375, |
| "learning_rate": 2.786434404805629e-06, |
| "loss": 0.0514, |
| "step": 255 |
| }, |
| { |
| "clip_ratio": 0.0009520899475319311, |
| "completion_length": 900.1250305175781, |
| "epoch": 0.004775896646611632, |
| "grad_norm": 0.0822591632604599, |
| "kl": 0.01397705078125, |
| "learning_rate": 2.771861254387199e-06, |
| "loss": -0.0279, |
| "num_tokens": 2891485.0, |
| "reward": 0.3816172480583191, |
| "reward_std": 0.25385507196187973, |
| "rewards/code_reward": 0.3220338970422745, |
| "rewards/format_reward": 0.5958333313465118, |
| "step": 256 |
| }, |
| { |
| "clip_ratio": 0.001093231316190213, |
| "epoch": 0.004794552492887459, |
| "grad_norm": 0.08108334243297577, |
| "kl": 0.01324462890625, |
| "learning_rate": 2.7572871867143204e-06, |
| "loss": -0.0278, |
| "step": 257 |
| }, |
| { |
| "clip_ratio": 0.0013529035350074992, |
| "epoch": 0.004813208339163285, |
| "grad_norm": 0.07985206693410873, |
| "kl": 0.01263427734375, |
| "learning_rate": 2.742712813285681e-06, |
| "loss": -0.0277, |
| "step": 258 |
| }, |
| { |
| "clip_ratio": 0.001162135595222935, |
| "completion_length": 1206.5833740234375, |
| "epoch": 0.004831864185439112, |
| "grad_norm": 0.06389827281236649, |
| "kl": 0.0082855224609375, |
| "learning_rate": 2.7281387456128017e-06, |
| "loss": -0.0507, |
| "num_tokens": 2938767.0, |
| "reward": 0.10169872269034386, |
| "reward_std": 0.16118879988789558, |
| "rewards/code_reward": 0.051282052882015705, |
| "rewards/format_reward": 0.5041666701436043, |
| "step": 259 |
| }, |
| { |
| "clip_ratio": 0.0009780275577213615, |
| "epoch": 0.0048505200317149385, |
| "grad_norm": 0.06325838714838028, |
| "kl": 0.008026123046875, |
| "learning_rate": 2.7135655951943716e-06, |
| "loss": -0.0507, |
| "step": 260 |
| }, |
| { |
| "clip_ratio": 0.0008303393551614136, |
| "epoch": 0.004869175877990765, |
| "grad_norm": 0.06411907821893692, |
| "kl": 0.00754547119140625, |
| "learning_rate": 2.698993973490598e-06, |
| "loss": -0.0507, |
| "step": 261 |
| }, |
| { |
| "clip_ratio": 0.0009404766315128654, |
| "completion_length": 799.3333435058594, |
| "epoch": 0.004887831724266592, |
| "grad_norm": 0.07781538367271423, |
| "kl": 0.00923919677734375, |
| "learning_rate": 2.6844244918975416e-06, |
| "loss": 0.0603, |
| "num_tokens": 2969987.0, |
| "reward": 0.532083310186863, |
| "reward_std": 0.1838482804596424, |
| "rewards/code_reward": 0.4583333432674408, |
| "rewards/format_reward": 0.737500011920929, |
| "step": 262 |
| }, |
| { |
| "clip_ratio": 0.0007310473010875285, |
| "epoch": 0.004906487570542419, |
| "grad_norm": 0.08621031045913696, |
| "kl": 0.0091094970703125, |
| "learning_rate": 2.66985776172147e-06, |
| "loss": 0.0604, |
| "step": 263 |
| }, |
| { |
| "clip_ratio": 0.0009290947928093374, |
| "epoch": 0.004925143416818246, |
| "grad_norm": 0.07831931114196777, |
| "kl": 0.00958251953125, |
| "learning_rate": 2.6552943941532088e-06, |
| "loss": 0.0602, |
| "step": 264 |
| }, |
| { |
| "clip_ratio": 0.0010500443167984486, |
| "completion_length": 902.4167175292969, |
| "epoch": 0.004943799263094072, |
| "grad_norm": 0.09909704327583313, |
| "kl": 0.013946533203125, |
| "learning_rate": 2.6407350002424927e-06, |
| "loss": -0.0336, |
| "num_tokens": 3008697.0, |
| "reward": 0.30541668087244034, |
| "reward_std": 0.2829319443553686, |
| "rewards/code_reward": 0.25, |
| "rewards/format_reward": 0.5541666597127914, |
| "step": 265 |
| }, |
| { |
| "clip_ratio": 0.0010122012172359973, |
| "epoch": 0.004962455109369899, |
| "grad_norm": 0.07800783216953278, |
| "kl": 0.014617919921875, |
| "learning_rate": 2.626180190872329e-06, |
| "loss": -0.0339, |
| "step": 266 |
| }, |
| { |
| "clip_ratio": 0.0009586416417732835, |
| "epoch": 0.004981110955645725, |
| "grad_norm": 0.08270706981420517, |
| "kl": 0.013427734375, |
| "learning_rate": 2.611630576733372e-06, |
| "loss": -0.034, |
| "step": 267 |
| }, |
| { |
| "clip_ratio": 0.0006794014479964972, |
| "completion_length": 852.8750610351562, |
| "epoch": 0.004999766801921552, |
| "grad_norm": 0.0892496407032013, |
| "kl": 0.01239013671875, |
| "learning_rate": 2.5970867682982885e-06, |
| "loss": -0.0121, |
| "num_tokens": 3046734.0, |
| "reward": 0.6499094069004059, |
| "reward_std": 0.21050915122032166, |
| "rewards/code_reward": 0.5765760540962219, |
| "rewards/format_reward": 0.7333333194255829, |
| "step": 268 |
| }, |
| { |
| "clip_ratio": 0.0004855144943576306, |
| "epoch": 0.005018422648197379, |
| "grad_norm": 0.08327967673540115, |
| "kl": 0.011993408203125, |
| "learning_rate": 2.582549375796154e-06, |
| "loss": -0.0124, |
| "step": 269 |
| }, |
| { |
| "clip_ratio": 0.000626907596597448, |
| "epoch": 0.005037078494473206, |
| "grad_norm": 0.08495430648326874, |
| "kl": 0.0126953125, |
| "learning_rate": 2.568019009186841e-06, |
| "loss": -0.0121, |
| "step": 270 |
| }, |
| { |
| "clip_ratio": 0.0007202730048447847, |
| "completion_length": 977.8333435058594, |
| "epoch": 0.005055734340749032, |
| "grad_norm": 0.09023862332105637, |
| "kl": 0.024383544921875, |
| "learning_rate": 2.5534962781354317e-06, |
| "loss": 0.0087, |
| "num_tokens": 3088262.0, |
| "reward": 0.26893792301416397, |
| "reward_std": 0.38880206644535065, |
| "rewards/code_reward": 0.19893791526556015, |
| "rewards/format_reward": 0.7000000178813934, |
| "step": 271 |
| }, |
| { |
| "clip_ratio": 0.0009908578940667212, |
| "epoch": 0.005074390187024859, |
| "grad_norm": 0.09044025838375092, |
| "kl": 0.0234375, |
| "learning_rate": 2.538981791986634e-06, |
| "loss": 0.0088, |
| "step": 272 |
| }, |
| { |
| "clip_ratio": 0.0006488285725936294, |
| "epoch": 0.0050930460333006855, |
| "grad_norm": 0.0915515273809433, |
| "kl": 0.022735595703125, |
| "learning_rate": 2.524476159739218e-06, |
| "loss": 0.0086, |
| "step": 273 |
| }, |
| { |
| "clip_ratio": 0.00046395487152040005, |
| "completion_length": 707.9583740234375, |
| "epoch": 0.005111701879576512, |
| "grad_norm": 0.08589129149913788, |
| "kl": 0.02691650390625, |
| "learning_rate": 2.5099799900204607e-06, |
| "loss": -0.013, |
| "num_tokens": 3119713.0, |
| "reward": 0.7045832872390747, |
| "reward_std": 0.39258189499378204, |
| "rewards/code_reward": 0.6250000149011612, |
| "rewards/format_reward": 0.7958333194255829, |
| "step": 274 |
| }, |
| { |
| "clip_ratio": 0.0006349159230012447, |
| "epoch": 0.005130357725852339, |
| "grad_norm": 0.08367536962032318, |
| "kl": 0.02716064453125, |
| "learning_rate": 2.4954938910606108e-06, |
| "loss": -0.013, |
| "step": 275 |
| }, |
| { |
| "clip_ratio": 0.0007932094158604741, |
| "epoch": 0.005149013572128166, |
| "grad_norm": 0.08294366300106049, |
| "kl": 0.024322509765625, |
| "learning_rate": 2.481018470667368e-06, |
| "loss": -0.0131, |
| "step": 276 |
| }, |
| { |
| "clip_ratio": 0.0012548710801638663, |
| "completion_length": 581.9166870117188, |
| "epoch": 0.005167669418403993, |
| "grad_norm": 0.09755374491214752, |
| "kl": 0.02392578125, |
| "learning_rate": 2.4665543362003802e-06, |
| "loss": 0.0183, |
| "num_tokens": 3144827.0, |
| "reward": 0.7179166376590729, |
| "reward_std": 0.27089906856417656, |
| "rewards/code_reward": 0.6666666716337204, |
| "rewards/format_reward": 0.5124999731779099, |
| "step": 277 |
| }, |
| { |
| "clip_ratio": 0.001595219480805099, |
| "epoch": 0.005186325264679819, |
| "grad_norm": 0.09835170209407806, |
| "kl": 0.0220947265625, |
| "learning_rate": 2.4521020945457615e-06, |
| "loss": 0.0181, |
| "step": 278 |
| }, |
| { |
| "clip_ratio": 0.0019812395912595093, |
| "epoch": 0.005204981110955646, |
| "grad_norm": 0.0999624952673912, |
| "kl": 0.0208740234375, |
| "learning_rate": 2.4376623520906255e-06, |
| "loss": 0.0182, |
| "step": 279 |
| }, |
| { |
| "clip_ratio": 0.0011003862309735268, |
| "completion_length": 1171.5833435058594, |
| "epoch": 0.005223636957231472, |
| "grad_norm": 0.07472976297140121, |
| "kl": 0.009613037109375, |
| "learning_rate": 2.4232357146976478e-06, |
| "loss": 0.0011, |
| "num_tokens": 3189013.0, |
| "reward": 0.5404510945081711, |
| "reward_std": 0.15828495100140572, |
| "rewards/code_reward": 0.4721178039908409, |
| "rewards/format_reward": 0.6833333224058151, |
| "step": 280 |
| }, |
| { |
| "clip_ratio": 0.0007794810517225415, |
| "epoch": 0.005242292803507299, |
| "grad_norm": 0.07738126814365387, |
| "kl": 0.009857177734375, |
| "learning_rate": 2.408822787679637e-06, |
| "loss": 0.0013, |
| "step": 281 |
| }, |
| { |
| "clip_ratio": 0.0008492782071698457, |
| "epoch": 0.005260948649783125, |
| "grad_norm": 0.07533033192157745, |
| "kl": 0.009613037109375, |
| "learning_rate": 2.3944241757741475e-06, |
| "loss": 0.001, |
| "step": 282 |
| }, |
| { |
| "clip_ratio": 0.0003291434477432631, |
| "completion_length": 672.625, |
| "epoch": 0.005279604496058953, |
| "grad_norm": 0.07457376271486282, |
| "kl": 0.012908935546875, |
| "learning_rate": 2.380040483118097e-06, |
| "loss": -0.0139, |
| "num_tokens": 3212848.0, |
| "reward": 1.0279166400432587, |
| "reward_std": 0.17488877102732658, |
| "rewards/code_reward": 0.9583333432674408, |
| "rewards/format_reward": 0.6958333253860474, |
| "step": 283 |
| }, |
| { |
| "clip_ratio": 5.457324004964903e-05, |
| "epoch": 0.005298260342334779, |
| "grad_norm": 0.07336500287055969, |
| "kl": 0.0133056640625, |
| "learning_rate": 2.365672313222419e-06, |
| "loss": -0.0141, |
| "step": 284 |
| }, |
| { |
| "clip_ratio": 0.0002531533937144559, |
| "epoch": 0.005316916188610606, |
| "grad_norm": 0.07265036553144455, |
| "kl": 0.0135498046875, |
| "learning_rate": 2.351320268946749e-06, |
| "loss": -0.014, |
| "step": 285 |
| }, |
| { |
| "clip_ratio": 0.0013167554279789329, |
| "completion_length": 909.7083740234375, |
| "epoch": 0.0053355720348864325, |
| "grad_norm": 0.11532127857208252, |
| "kl": 0.01422119140625, |
| "learning_rate": 2.336984952474119e-06, |
| "loss": -0.0194, |
| "num_tokens": 3247833.0, |
| "reward": 0.09151961281895638, |
| "reward_std": 0.09498994797468185, |
| "rewards/code_reward": 0.04901960864663124, |
| "rewards/format_reward": 0.42499999701976776, |
| "step": 286 |
| }, |
| { |
| "clip_ratio": 0.0011718517343979329, |
| "epoch": 0.005354227881162259, |
| "grad_norm": 0.10550913959741592, |
| "kl": 0.013946533203125, |
| "learning_rate": 2.322666965285697e-06, |
| "loss": -0.0197, |
| "step": 287 |
| }, |
| { |
| "clip_ratio": 0.0012868313351646066, |
| "epoch": 0.005372883727438086, |
| "grad_norm": 0.10481560975313187, |
| "kl": 0.015716552734375, |
| "learning_rate": 2.3083669081355507e-06, |
| "loss": -0.0198, |
| "step": 288 |
| }, |
| { |
| "clip_ratio": 0.00034923299972433597, |
| "completion_length": 1332.0833740234375, |
| "epoch": 0.005391539573713912, |
| "grad_norm": 0.06407570093870163, |
| "kl": 0.0085906982421875, |
| "learning_rate": 2.2940853810254377e-06, |
| "loss": 0.0465, |
| "num_tokens": 3299579.0, |
| "reward": 0.05910714715719223, |
| "reward_std": 0.04962867684662342, |
| "rewards/code_reward": 0.0053571430034935474, |
| "rewards/format_reward": 0.5374999940395355, |
| "step": 289 |
| }, |
| { |
| "clip_ratio": 0.0005735456070397049, |
| "epoch": 0.00541019541998974, |
| "grad_norm": 0.08031246811151505, |
| "kl": 0.00830078125, |
| "learning_rate": 2.2798229831796313e-06, |
| "loss": 0.0464, |
| "step": 290 |
| }, |
| { |
| "clip_ratio": 0.0007879029144532979, |
| "epoch": 0.005428851266265566, |
| "grad_norm": 0.0649939477443695, |
| "kl": 0.0087432861328125, |
| "learning_rate": 2.2655803130197816e-06, |
| "loss": 0.0464, |
| "step": 291 |
| }, |
| { |
| "clip_ratio": 0.00107748550362885, |
| "completion_length": 1028.0833740234375, |
| "epoch": 0.005447507112541393, |
| "grad_norm": 0.10151123255491257, |
| "kl": 0.0071563720703125, |
| "learning_rate": 2.2513579681398034e-06, |
| "loss": 0.0503, |
| "num_tokens": 3348829.0, |
| "reward": 0.1927850916981697, |
| "reward_std": 0.3257390707731247, |
| "rewards/code_reward": 0.13903508707880974, |
| "rewards/format_reward": 0.5375000089406967, |
| "step": 292 |
| }, |
| { |
| "clip_ratio": 0.0008358334598597139, |
| "epoch": 0.005466162958817219, |
| "grad_norm": 0.09055967628955841, |
| "kl": 0.0071868896484375, |
| "learning_rate": 2.237156545280803e-06, |
| "loss": 0.0504, |
| "step": 293 |
| }, |
| { |
| "clip_ratio": 0.0009988098172470927, |
| "epoch": 0.005484818805093046, |
| "grad_norm": 0.08410565555095673, |
| "kl": 0.0072174072265625, |
| "learning_rate": 2.2229766403060403e-06, |
| "loss": 0.0501, |
| "step": 294 |
| }, |
| { |
| "clip_ratio": 0.001458263781387359, |
| "completion_length": 681.2083740234375, |
| "epoch": 0.005503474651368872, |
| "grad_norm": 0.09818808734416962, |
| "kl": 0.01514434814453125, |
| "learning_rate": 2.2088188481759305e-06, |
| "loss": 0.0187, |
| "num_tokens": 3378258.0, |
| "reward": 0.5579166747629642, |
| "reward_std": 0.045613983646035194, |
| "rewards/code_reward": 0.5, |
| "rewards/format_reward": 0.5791666805744171, |
| "step": 295 |
| }, |
| { |
| "clip_ratio": 0.0013955137401353568, |
| "epoch": 0.005522130497644699, |
| "grad_norm": 0.09592823684215546, |
| "kl": 0.017864227294921875, |
| "learning_rate": 2.194683762923073e-06, |
| "loss": 0.0188, |
| "step": 296 |
| }, |
| { |
| "clip_ratio": 0.0017046461580321193, |
| "epoch": 0.005540786343920526, |
| "grad_norm": 0.09802346676588058, |
| "kl": 0.01699066162109375, |
| "learning_rate": 2.1805719776273387e-06, |
| "loss": 0.0187, |
| "step": 297 |
| }, |
| { |
| "clip_ratio": 0.0009031847293954343, |
| "completion_length": 919.2917175292969, |
| "epoch": 0.005559442190196353, |
| "grad_norm": 0.08558425307273865, |
| "kl": 0.012786865234375, |
| "learning_rate": 2.166484084390974e-06, |
| "loss": -0.0303, |
| "num_tokens": 3419881.0, |
| "reward": 0.5293938517570496, |
| "reward_std": 0.3867318332195282, |
| "rewards/code_reward": 0.4823105186223984, |
| "rewards/format_reward": 0.47083334624767303, |
| "step": 298 |
| }, |
| { |
| "clip_ratio": 0.0009096206922549754, |
| "epoch": 0.0055780980364721795, |
| "grad_norm": 0.0866839662194252, |
| "kl": 0.0133056640625, |
| "learning_rate": 2.1524206743137636e-06, |
| "loss": -0.0303, |
| "step": 299 |
| }, |
| { |
| "clip_ratio": 0.00072934762283694, |
| "epoch": 0.005596753882748006, |
| "grad_norm": 0.08566750586032867, |
| "kl": 0.0135498046875, |
| "learning_rate": 2.1383823374682287e-06, |
| "loss": -0.0304, |
| "step": 300 |
| }, |
| { |
| "clip_ratio": 0.0010719166893977672, |
| "completion_length": 849.1666870117188, |
| "epoch": 0.005615409729023833, |
| "grad_norm": 0.09118139743804932, |
| "kl": 0.0116424560546875, |
| "learning_rate": 2.124369662874868e-06, |
| "loss": 0.0616, |
| "num_tokens": 3455405.0, |
| "reward": 0.5117070078849792, |
| "reward_std": 0.4537804424762726, |
| "rewards/code_reward": 0.43629033863544464, |
| "rewards/format_reward": 0.7541666626930237, |
| "step": 301 |
| }, |
| { |
| "clip_ratio": 0.0009527777438051999, |
| "epoch": 0.005634065575299659, |
| "grad_norm": 0.09360919892787933, |
| "kl": 0.0117034912109375, |
| "learning_rate": 2.110383238477441e-06, |
| "loss": 0.0618, |
| "step": 302 |
| }, |
| { |
| "clip_ratio": 0.0010622217669151723, |
| "epoch": 0.005652721421575487, |
| "grad_norm": 0.09148411452770233, |
| "kl": 0.0118560791015625, |
| "learning_rate": 2.096423651118305e-06, |
| "loss": 0.0616, |
| "step": 303 |
| }, |
| { |
| "clip_ratio": 0.0005411781894508749, |
| "completion_length": 1098.7083740234375, |
| "epoch": 0.005671377267851313, |
| "grad_norm": 0.08525364845991135, |
| "kl": 0.014556884765625, |
| "learning_rate": 2.082491486513788e-06, |
| "loss": 0.0016, |
| "num_tokens": 3502546.0, |
| "reward": 0.3728175610303879, |
| "reward_std": 0.3381016403436661, |
| "rewards/code_reward": 0.321984238922596, |
| "rewards/format_reward": 0.5083333253860474, |
| "step": 304 |
| }, |
| { |
| "clip_ratio": 0.0007919937779661268, |
| "epoch": 0.00569003311412714, |
| "grad_norm": 0.40698713064193726, |
| "kl": 0.01470947265625, |
| "learning_rate": 2.0685873292296116e-06, |
| "loss": 0.0018, |
| "step": 305 |
| }, |
| { |
| "clip_ratio": 0.0005383995739975944, |
| "epoch": 0.005708688960402966, |
| "grad_norm": 0.49188467860221863, |
| "kl": 0.014678955078125, |
| "learning_rate": 2.054711762656369e-06, |
| "loss": 0.0018, |
| "step": 306 |
| }, |
| { |
| "clip_ratio": 0.0010625849245116115, |
| "completion_length": 683.9583435058594, |
| "epoch": 0.005727344806678793, |
| "grad_norm": 0.10668221861124039, |
| "kl": 0.014434814453125, |
| "learning_rate": 2.040865368985044e-06, |
| "loss": -0.0085, |
| "num_tokens": 3530901.0, |
| "reward": 0.776462584733963, |
| "reward_std": 0.17944780550897121, |
| "rewards/code_reward": 0.7243793159723282, |
| "rewards/format_reward": 0.5208333432674408, |
| "step": 307 |
| }, |
| { |
| "clip_ratio": 0.0016164338449016213, |
| "epoch": 0.005746000652954619, |
| "grad_norm": 0.10843304544687271, |
| "kl": 0.0142822265625, |
| "learning_rate": 2.027048729182583e-06, |
| "loss": -0.0086, |
| "step": 308 |
| }, |
| { |
| "clip_ratio": 0.001167797192465514, |
| "epoch": 0.005764656499230446, |
| "grad_norm": 0.10622025281190872, |
| "kl": 0.014434814453125, |
| "learning_rate": 2.0132624229675205e-06, |
| "loss": -0.0089, |
| "step": 309 |
| }, |
| { |
| "clip_ratio": 0.0011982419237028807, |
| "completion_length": 876.9166870117188, |
| "epoch": 0.005783312345506273, |
| "grad_norm": 0.09164712578058243, |
| "kl": 0.021636962890625, |
| "learning_rate": 1.9995070287856546e-06, |
| "loss": -0.0053, |
| "num_tokens": 3564103.0, |
| "reward": 0.5786574631929398, |
| "reward_std": 0.41006001830101013, |
| "rewards/code_reward": 0.5207407623529434, |
| "rewards/format_reward": 0.5791666507720947, |
| "step": 310 |
| }, |
| { |
| "clip_ratio": 0.001371737860608846, |
| "epoch": 0.0058019681917821, |
| "grad_norm": 0.09242158383131027, |
| "kl": 0.022064208984375, |
| "learning_rate": 1.985783123785774e-06, |
| "loss": -0.0055, |
| "step": 311 |
| }, |
| { |
| "clip_ratio": 0.001422962493961677, |
| "epoch": 0.0058206240380579265, |
| "grad_norm": 0.09199241548776627, |
| "kl": 0.021728515625, |
| "learning_rate": 1.9720912837954486e-06, |
| "loss": -0.0054, |
| "step": 312 |
| }, |
| { |
| "clip_ratio": 0.0006902147724758834, |
| "completion_length": 927.7500305175781, |
| "epoch": 0.005839279884333753, |
| "grad_norm": 0.09657740592956543, |
| "kl": 0.0142822265625, |
| "learning_rate": 1.958432083296862e-06, |
| "loss": -0.0334, |
| "num_tokens": 3595285.0, |
| "reward": 0.4076350927352905, |
| "reward_std": 0.3373851850628853, |
| "rewards/code_reward": 0.3297183997929096, |
| "rewards/format_reward": 0.7791666686534882, |
| "step": 313 |
| }, |
| { |
| "clip_ratio": 0.0006798989197704941, |
| "epoch": 0.00585793573060958, |
| "grad_norm": 0.09554401785135269, |
| "kl": 0.013671875, |
| "learning_rate": 1.9448060954027093e-06, |
| "loss": -0.0335, |
| "step": 314 |
| }, |
| { |
| "clip_ratio": 0.0008473133784718812, |
| "epoch": 0.005876591576885406, |
| "grad_norm": 0.09273131936788559, |
| "kl": 0.014678955078125, |
| "learning_rate": 1.931213891832153e-06, |
| "loss": -0.0337, |
| "step": 315 |
| }, |
| { |
| "clip_ratio": 0.0005323913064785302, |
| "completion_length": 834.8333740234375, |
| "epoch": 0.005895247423161233, |
| "grad_norm": 0.10354404151439667, |
| "kl": 0.025360107421875, |
| "learning_rate": 1.9176560428868336e-06, |
| "loss": -0.0085, |
| "num_tokens": 3639501.0, |
| "reward": 0.64698725938797, |
| "reward_std": 0.4041699767112732, |
| "rewards/code_reward": 0.5749039202928543, |
| "rewards/format_reward": 0.7208333015441895, |
| "step": 316 |
| }, |
| { |
| "clip_ratio": 0.0008116684039123356, |
| "epoch": 0.00591390326943706, |
| "grad_norm": 0.10160063952207565, |
| "kl": 0.025299072265625, |
| "learning_rate": 1.9041331174269373e-06, |
| "loss": -0.0088, |
| "step": 317 |
| }, |
| { |
| "clip_ratio": 0.0005085803277324885, |
| "epoch": 0.005932559115712887, |
| "grad_norm": 0.09548759460449219, |
| "kl": 0.0244140625, |
| "learning_rate": 1.8906456828473341e-06, |
| "loss": -0.009, |
| "step": 318 |
| }, |
| { |
| "clip_ratio": 0.0008387799316551536, |
| "completion_length": 1620.8750915527344, |
| "epoch": 0.005951214961988713, |
| "grad_norm": 0.07884962111711502, |
| "kl": 0.0074615478515625, |
| "learning_rate": 1.8771943050537656e-06, |
| "loss": 0.0221, |
| "num_tokens": 3698802.0, |
| "reward": 0.38234737887978554, |
| "reward_std": 0.20284880511462688, |
| "rewards/code_reward": 0.30693069100379944, |
| "rewards/format_reward": 0.7541666626930237, |
| "step": 319 |
| }, |
| { |
| "clip_ratio": 0.0007986939453985542, |
| "epoch": 0.00596987080826454, |
| "grad_norm": 0.07618702203035355, |
| "kl": 0.007415771484375, |
| "learning_rate": 1.8637795484391046e-06, |
| "loss": 0.0221, |
| "step": 320 |
| }, |
| { |
| "clip_ratio": 0.0007583920087199658, |
| "epoch": 0.0059885266545403664, |
| "grad_norm": 0.0829911157488823, |
| "kl": 0.0075225830078125, |
| "learning_rate": 1.8504019758596698e-06, |
| "loss": 0.0221, |
| "step": 321 |
| }, |
| { |
| "clip_ratio": 0.001169055758509785, |
| "completion_length": 795.7916717529297, |
| "epoch": 0.006007182500816193, |
| "grad_norm": 0.14611609280109406, |
| "kl": 0.019012451171875, |
| "learning_rate": 1.8370621486116163e-06, |
| "loss": 0.023, |
| "num_tokens": 3734797.0, |
| "reward": 0.645012378692627, |
| "reward_std": 0.2191050536930561, |
| "rewards/code_reward": 0.5820957124233246, |
| "rewards/format_reward": 0.6291666924953461, |
| "step": 322 |
| }, |
| { |
| "clip_ratio": 0.0009497803403064609, |
| "epoch": 0.0060258383470920196, |
| "grad_norm": 0.10970307886600494, |
| "kl": 0.02032470703125, |
| "learning_rate": 1.823760626407377e-06, |
| "loss": 0.0227, |
| "step": 323 |
| }, |
| { |
| "clip_ratio": 0.0009026691550388932, |
| "epoch": 0.006044494193367847, |
| "grad_norm": 0.11829708516597748, |
| "kl": 0.0189208984375, |
| "learning_rate": 1.8104979673521838e-06, |
| "loss": 0.023, |
| "step": 324 |
| }, |
| { |
| "clip_ratio": 0.0008327370742335916, |
| "completion_length": 874.0416870117188, |
| "epoch": 0.0060631500396436736, |
| "grad_norm": 0.09914888441562653, |
| "kl": 0.02264404296875, |
| "learning_rate": 1.7972747279206482e-06, |
| "loss": -0.0067, |
| "num_tokens": 3769226.0, |
| "reward": 0.4188089519739151, |
| "reward_std": 0.19916179403662682, |
| "rewards/code_reward": 0.36964227352291346, |
| "rewards/format_reward": 0.49166665971279144, |
| "step": 325 |
| }, |
| { |
| "clip_ratio": 0.0008456225623376667, |
| "epoch": 0.0060818058859195, |
| "grad_norm": 0.09671460092067719, |
| "kl": 0.02288818359375, |
| "learning_rate": 1.7840914629334122e-06, |
| "loss": -0.0064, |
| "step": 326 |
| }, |
| { |
| "clip_ratio": 0.0010657160892151296, |
| "epoch": 0.006100461732195327, |
| "grad_norm": 0.09105102717876434, |
| "kl": 0.02294921875, |
| "learning_rate": 1.7709487255338731e-06, |
| "loss": -0.0066, |
| "step": 327 |
| }, |
| { |
| "clip_ratio": 0.0010840718168765306, |
| "completion_length": 422.5833435058594, |
| "epoch": 0.006119117578471153, |
| "grad_norm": 0.13927879929542542, |
| "kl": 0.019073486328125, |
| "learning_rate": 1.7578470671649684e-06, |
| "loss": 0.0094, |
| "num_tokens": 3789448.0, |
| "reward": 0.5294444337487221, |
| "reward_std": 0.17376149259507656, |
| "rewards/code_reward": 0.4444444179534912, |
| "rewards/format_reward": 0.8499999940395355, |
| "step": 328 |
| }, |
| { |
| "clip_ratio": 0.000780029222369194, |
| "epoch": 0.00613777342474698, |
| "grad_norm": 0.13586357235908508, |
| "kl": 0.021209716796875, |
| "learning_rate": 1.744787037546045e-06, |
| "loss": 0.0092, |
| "step": 329 |
| }, |
| { |
| "clip_ratio": 0.001488116628024727, |
| "epoch": 0.006156429271022806, |
| "grad_norm": 0.16369539499282837, |
| "kl": 0.02215576171875, |
| "learning_rate": 1.731769184649788e-06, |
| "loss": 0.0091, |
| "step": 330 |
| }, |
| { |
| "clip_ratio": 0.0005380379006965086, |
| "completion_length": 776.1250305175781, |
| "epoch": 0.006175085117298634, |
| "grad_norm": 0.0828891322016716, |
| "kl": 0.02215576171875, |
| "learning_rate": 1.7187940546792325e-06, |
| "loss": -0.0154, |
| "num_tokens": 3825619.0, |
| "reward": 0.6713247746229172, |
| "reward_std": 0.09931758604943752, |
| "rewards/code_reward": 0.5779914557933807, |
| "rewards/format_reward": 0.9333333075046539, |
| "step": 331 |
| }, |
| { |
| "clip_ratio": 0.0008244940836448222, |
| "epoch": 0.00619374096357446, |
| "grad_norm": 0.08281542360782623, |
| "kl": 0.02386474609375, |
| "learning_rate": 1.7058621920448465e-06, |
| "loss": -0.0155, |
| "step": 332 |
| }, |
| { |
| "clip_ratio": 0.0006863593007437885, |
| "epoch": 0.006212396809850287, |
| "grad_norm": 0.08620591461658478, |
| "kl": 0.027679443359375, |
| "learning_rate": 1.6929741393416855e-06, |
| "loss": -0.0155, |
| "step": 333 |
| }, |
| { |
| "clip_ratio": 0.0011899169767275453, |
| "completion_length": 866.6250305175781, |
| "epoch": 0.0062310526561261135, |
| "grad_norm": 0.09219539165496826, |
| "kl": 0.034088134765625, |
| "learning_rate": 1.6801304373266286e-06, |
| "loss": 0.0009, |
| "num_tokens": 3860470.0, |
| "reward": 0.526757076382637, |
| "reward_std": 0.33820731937885284, |
| "rewards/code_reward": 0.45467372983694077, |
| "rewards/format_reward": 0.7208333313465118, |
| "step": 334 |
| }, |
| { |
| "clip_ratio": 0.0009668752900324762, |
| "epoch": 0.00624970850240194, |
| "grad_norm": 0.09143849462270737, |
| "kl": 0.030029296875, |
| "learning_rate": 1.667331624895689e-06, |
| "loss": 0.0004, |
| "step": 335 |
| }, |
| { |
| "clip_ratio": 0.0009758240194059908, |
| "epoch": 0.006268364348677767, |
| "grad_norm": 0.09234081953763962, |
| "kl": 0.03564453125, |
| "learning_rate": 1.6545782390614037e-06, |
| "loss": 0.0005, |
| "step": 336 |
| }, |
| { |
| "clip_ratio": 0.0009786576265469193, |
| "completion_length": 767.2083740234375, |
| "epoch": 0.006287020194953594, |
| "grad_norm": 0.11128581315279007, |
| "kl": 0.0589599609375, |
| "learning_rate": 1.6418708149302992e-06, |
| "loss": 0.0293, |
| "num_tokens": 3900099.0, |
| "reward": 0.5688367038965225, |
| "reward_std": 0.1789155676960945, |
| "rewards/code_reward": 0.49841997027397156, |
| "rewards/format_reward": 0.7041666805744171, |
| "step": 337 |
| }, |
| { |
| "clip_ratio": 0.0011234881239943206, |
| "epoch": 0.0063056760412294206, |
| "grad_norm": 0.11240074783563614, |
| "kl": 0.0528564453125, |
| "learning_rate": 1.6292098856804423e-06, |
| "loss": 0.029, |
| "step": 338 |
| }, |
| { |
| "clip_ratio": 0.0006711997266393155, |
| "epoch": 0.006324331887505247, |
| "grad_norm": 0.10572442412376404, |
| "kl": 0.05133056640625, |
| "learning_rate": 1.6165959825390661e-06, |
| "loss": 0.0289, |
| "step": 339 |
| }, |
| { |
| "clip_ratio": 0.0007651688065379858, |
| "completion_length": 662.3333587646484, |
| "epoch": 0.006342987733781074, |
| "grad_norm": 0.09474088996648788, |
| "kl": 0.0198974609375, |
| "learning_rate": 1.604029634760284e-06, |
| "loss": 0.0135, |
| "num_tokens": 3926987.0, |
| "reward": 0.8191105425357819, |
| "reward_std": 0.3644227534532547, |
| "rewards/code_reward": 0.7049438655376434, |
| "rewards/format_reward": 1.1416666507720947, |
| "step": 340 |
| }, |
| { |
| "clip_ratio": 0.0006743039994034916, |
| "epoch": 0.0063616435800569, |
| "grad_norm": 0.09659333527088165, |
| "kl": 0.020263671875, |
| "learning_rate": 1.59151136960288e-06, |
| "loss": 0.0139, |
| "step": 341 |
| }, |
| { |
| "clip_ratio": 0.0006922298634890467, |
| "epoch": 0.006380299426332727, |
| "grad_norm": 0.09575100243091583, |
| "kl": 0.01959228515625, |
| "learning_rate": 1.5790417123081903e-06, |
| "loss": 0.0135, |
| "step": 342 |
| }, |
| { |
| "clip_ratio": 0.0005754130106652156, |
| "completion_length": 964.3333740234375, |
| "epoch": 0.006398955272608553, |
| "grad_norm": 0.08601634949445724, |
| "kl": 0.012359619140625, |
| "learning_rate": 1.5666211860780583e-06, |
| "loss": -0.0431, |
| "num_tokens": 3969595.0, |
| "reward": 0.43568913638591766, |
| "reward_std": 0.2704745829105377, |
| "rewards/code_reward": 0.3594391196966171, |
| "rewards/format_reward": 0.762499988079071, |
| "step": 343 |
| }, |
| { |
| "clip_ratio": 0.0005838508659508079, |
| "epoch": 0.006417611118884381, |
| "grad_norm": 0.08448081463575363, |
| "kl": 0.01171875, |
| "learning_rate": 1.5542503120528918e-06, |
| "loss": -0.043, |
| "step": 344 |
| }, |
| { |
| "clip_ratio": 0.0007864527578931302, |
| "epoch": 0.006436266965160207, |
| "grad_norm": 0.08665861189365387, |
| "kl": 0.011932373046875, |
| "learning_rate": 1.5419296092897866e-06, |
| "loss": -0.0432, |
| "step": 345 |
| }, |
| { |
| "clip_ratio": 0.0012224867241457105, |
| "completion_length": 830.8750305175781, |
| "epoch": 0.006454922811436034, |
| "grad_norm": 0.08804712444543839, |
| "kl": 0.03411865234375, |
| "learning_rate": 1.529659594740755e-06, |
| "loss": 0.0054, |
| "num_tokens": 3999184.0, |
| "reward": 0.12666667252779007, |
| "reward_std": 0.1735054012387991, |
| "rewards/code_reward": 0.0416666679084301, |
| "rewards/format_reward": 0.8499999642372131, |
| "step": 346 |
| }, |
| { |
| "clip_ratio": 0.0012100348249077797, |
| "epoch": 0.0064735786577118605, |
| "grad_norm": 0.08527618646621704, |
| "kl": 0.03369140625, |
| "learning_rate": 1.5174407832310338e-06, |
| "loss": 0.0053, |
| "step": 347 |
| }, |
| { |
| "clip_ratio": 0.0010465410014148802, |
| "epoch": 0.006492234503987687, |
| "grad_norm": 0.08590537309646606, |
| "kl": 0.03118896484375, |
| "learning_rate": 1.5052736874374815e-06, |
| "loss": 0.0055, |
| "step": 348 |
| }, |
| { |
| "clip_ratio": 0.001526809181086719, |
| "completion_length": 865.7500610351562, |
| "epoch": 0.006510890350263514, |
| "grad_norm": 0.09376516193151474, |
| "kl": 0.0206298828125, |
| "learning_rate": 1.4931588178670695e-06, |
| "loss": 0.0089, |
| "num_tokens": 4031674.0, |
| "reward": 0.2808653935790062, |
| "reward_std": 0.27811707369983196, |
| "rewards/code_reward": 0.21794871985912323, |
| "rewards/format_reward": 0.6291666775941849, |
| "step": 349 |
| }, |
| { |
| "clip_ratio": 0.000961648824159056, |
| "epoch": 0.00652954619653934, |
| "grad_norm": 0.10522089153528214, |
| "kl": 0.02142333984375, |
| "learning_rate": 1.4810966828354605e-06, |
| "loss": 0.0089, |
| "step": 350 |
| }, |
| { |
| "clip_ratio": 0.0010438500612508506, |
| "epoch": 0.006548202042815168, |
| "grad_norm": 0.09141746908426285, |
| "kl": 0.0203857421875, |
| "learning_rate": 1.469087788445684e-06, |
| "loss": 0.0084, |
| "step": 351 |
| }, |
| { |
| "clip_ratio": 0.0010631512850522995, |
| "completion_length": 1097.8333740234375, |
| "epoch": 0.006566857889090994, |
| "grad_norm": 0.08537644892930984, |
| "kl": 0.014739990234375, |
| "learning_rate": 1.4571326385668965e-06, |
| "loss": -0.0225, |
| "num_tokens": 4071702.0, |
| "reward": 0.3495173156261444, |
| "reward_std": 0.350162148475647, |
| "rewards/code_reward": 0.312017310410738, |
| "rewards/format_reward": 0.3749999925494194, |
| "step": 352 |
| }, |
| { |
| "clip_ratio": 0.0007791414973326027, |
| "epoch": 0.006585513735366821, |
| "grad_norm": 0.08616505563259125, |
| "kl": 0.015045166015625, |
| "learning_rate": 1.4452317348132434e-06, |
| "loss": -0.0226, |
| "step": 353 |
| }, |
| { |
| "clip_ratio": 0.0008178194111678749, |
| "epoch": 0.006604169581642647, |
| "grad_norm": 0.08682885020971298, |
| "kl": 0.014923095703125, |
| "learning_rate": 1.4333855765228104e-06, |
| "loss": -0.0226, |
| "step": 354 |
| }, |
| { |
| "clip_ratio": 0.0010338413121644408, |
| "completion_length": 677.5, |
| "epoch": 0.006622825427918474, |
| "grad_norm": 0.08901394158601761, |
| "kl": 0.0438232421875, |
| "learning_rate": 1.421594660736675e-06, |
| "loss": 0.0347, |
| "num_tokens": 4095762.0, |
| "reward": 0.6164166405797005, |
| "reward_std": 0.12990031018853188, |
| "rewards/code_reward": 0.539749950170517, |
| "rewards/format_reward": 0.7666666805744171, |
| "step": 355 |
| }, |
| { |
| "clip_ratio": 0.0012019489658996463, |
| "epoch": 0.0066414812741943, |
| "grad_norm": 0.08767423033714294, |
| "kl": 0.03948974609375, |
| "learning_rate": 1.4098594821780476e-06, |
| "loss": 0.0344, |
| "step": 356 |
| }, |
| { |
| "clip_ratio": 0.001214035670273006, |
| "epoch": 0.006660137120470127, |
| "grad_norm": 0.09001511335372925, |
| "kl": 0.038970947265625, |
| "learning_rate": 1.3981805332315174e-06, |
| "loss": 0.0346, |
| "step": 357 |
| }, |
| { |
| "clip_ratio": 0.0010124409454874694, |
| "completion_length": 1344.5417175292969, |
| "epoch": 0.006678792966745954, |
| "grad_norm": 0.07684619724750519, |
| "kl": 0.02001190185546875, |
| "learning_rate": 1.3865583039223929e-06, |
| "loss": 0.0339, |
| "num_tokens": 4142671.0, |
| "reward": 0.7462417930364609, |
| "reward_std": 0.22497505508363247, |
| "rewards/code_reward": 0.6674917489290237, |
| "rewards/format_reward": 0.7874999940395355, |
| "step": 358 |
| }, |
| { |
| "clip_ratio": 0.0007824611384421587, |
| "epoch": 0.006697448813021781, |
| "grad_norm": 0.0674465075135231, |
| "kl": 0.0182037353515625, |
| "learning_rate": 1.374993281896137e-06, |
| "loss": 0.0337, |
| "step": 359 |
| }, |
| { |
| "clip_ratio": 0.0010956324986182153, |
| "epoch": 0.0067161046592976075, |
| "grad_norm": 0.0694521889090538, |
| "kl": 0.01786041259765625, |
| "learning_rate": 1.3634859523979134e-06, |
| "loss": 0.034, |
| "step": 360 |
| }, |
| { |
| "clip_ratio": 0.0011445771087892354, |
| "completion_length": 637.5833740234375, |
| "epoch": 0.006734760505573434, |
| "grad_norm": 0.11650668829679489, |
| "kl": 0.012542724609375, |
| "learning_rate": 1.3520367982522208e-06, |
| "loss": -0.0048, |
| "num_tokens": 4168125.0, |
| "reward": 0.3813568465411663, |
| "reward_std": 0.24762929044663906, |
| "rewards/code_reward": 0.29594019055366516, |
| "rewards/format_reward": 0.8541666567325592, |
| "step": 361 |
| }, |
| { |
| "clip_ratio": 0.0014873360050842166, |
| "epoch": 0.006753416351849261, |
| "grad_norm": 0.11154331266880035, |
| "kl": 0.01220703125, |
| "learning_rate": 1.3406462998426358e-06, |
| "loss": -0.0047, |
| "step": 362 |
| }, |
| { |
| "clip_ratio": 0.0012732939212583005, |
| "epoch": 0.006772072198125087, |
| "grad_norm": 0.11064095795154572, |
| "kl": 0.01312255859375, |
| "learning_rate": 1.3293149350916595e-06, |
| "loss": -0.0045, |
| "step": 363 |
| }, |
| { |
| "clip_ratio": 0.0010022299247793853, |
| "completion_length": 581.25, |
| "epoch": 0.006790728044400914, |
| "grad_norm": 0.20353944599628448, |
| "kl": 0.017822265625, |
| "learning_rate": 1.3180431794406623e-06, |
| "loss": 0.0111, |
| "num_tokens": 4198479.0, |
| "reward": 0.10175329819321632, |
| "reward_std": 0.05031761899590492, |
| "rewards/code_reward": 0.033003296703100204, |
| "rewards/format_reward": 0.6875, |
| "step": 364 |
| }, |
| { |
| "clip_ratio": 0.0008599580323789269, |
| "epoch": 0.006809383890676741, |
| "grad_norm": 0.1254553347826004, |
| "kl": 0.0179443359375, |
| "learning_rate": 1.3068315058299358e-06, |
| "loss": 0.0109, |
| "step": 365 |
| }, |
| { |
| "clip_ratio": 0.0007180549946497194, |
| "epoch": 0.006828039736952568, |
| "grad_norm": 0.11537165939807892, |
| "kl": 0.01995849609375, |
| "learning_rate": 1.2956803846788503e-06, |
| "loss": 0.011, |
| "step": 366 |
| }, |
| { |
| "clip_ratio": 0.0006652901647612453, |
| "completion_length": 934.2917175292969, |
| "epoch": 0.006846695583228394, |
| "grad_norm": 0.09258420765399933, |
| "kl": 0.011566162109375, |
| "learning_rate": 1.284590283866116e-06, |
| "loss": 0.0124, |
| "num_tokens": 4241782.0, |
| "reward": 0.21692460775375366, |
| "reward_std": 0.24195446819067, |
| "rewards/code_reward": 0.14484127657487988, |
| "rewards/format_reward": 0.7208333313465118, |
| "step": 367 |
| }, |
| { |
| "clip_ratio": 0.0009541537729091942, |
| "epoch": 0.006865351429504221, |
| "grad_norm": 0.09379203617572784, |
| "kl": 0.011322021484375, |
| "learning_rate": 1.2735616687101518e-06, |
| "loss": 0.0125, |
| "step": 368 |
| }, |
| { |
| "clip_ratio": 0.0010697944671846926, |
| "epoch": 0.006884007275780047, |
| "grad_norm": 0.09176654368638992, |
| "kl": 0.01129150390625, |
| "learning_rate": 1.2625950019495614e-06, |
| "loss": 0.0124, |
| "step": 369 |
| }, |
| { |
| "clip_ratio": 0.000756454566726461, |
| "completion_length": 1099.1250610351562, |
| "epoch": 0.006902663122055874, |
| "grad_norm": 0.07225238531827927, |
| "kl": 0.0067138671875, |
| "learning_rate": 1.251690743723718e-06, |
| "loss": 0.016, |
| "num_tokens": 4282729.0, |
| "reward": 0.1934523843228817, |
| "reward_std": 0.11385035887360573, |
| "rewards/code_reward": 0.10303572192788124, |
| "rewards/format_reward": 0.9041666388511658, |
| "step": 370 |
| }, |
| { |
| "clip_ratio": 0.000768135127145797, |
| "epoch": 0.006921318968331701, |
| "grad_norm": 0.0729527473449707, |
| "kl": 0.0072021484375, |
| "learning_rate": 1.2408493515534581e-06, |
| "loss": 0.0163, |
| "step": 371 |
| }, |
| { |
| "clip_ratio": 0.0006400739657692611, |
| "epoch": 0.006939974814607528, |
| "grad_norm": 0.07251974940299988, |
| "kl": 0.007110595703125, |
| "learning_rate": 1.2300712803218834e-06, |
| "loss": 0.0159, |
| "step": 372 |
| }, |
| { |
| "clip_ratio": 0.0016507117543369532, |
| "completion_length": 431.79168701171875, |
| "epoch": 0.0069586306608833545, |
| "grad_norm": 0.11167676746845245, |
| "kl": 0.014739990234375, |
| "learning_rate": 1.2193569822552772e-06, |
| "loss": -0.0044, |
| "num_tokens": 4303508.0, |
| "reward": 0.47044751420617104, |
| "reward_std": 0.23989063128829002, |
| "rewards/code_reward": 0.3850308656692505, |
| "rewards/format_reward": 0.8541666567325592, |
| "step": 373 |
| }, |
| { |
| "clip_ratio": 0.001415064267348498, |
| "epoch": 0.006977286507159181, |
| "grad_norm": 0.10820432007312775, |
| "kl": 0.016204833984375, |
| "learning_rate": 1.2087069069041268e-06, |
| "loss": -0.004, |
| "step": 374 |
| }, |
| { |
| "clip_ratio": 0.0017915331991389394, |
| "epoch": 0.006995942353435008, |
| "grad_norm": 0.10194279253482819, |
| "kl": 0.01751708984375, |
| "learning_rate": 1.1981215011242654e-06, |
| "loss": -0.0037, |
| "step": 375 |
| }, |
| { |
| "clip_ratio": 0.0009616998431738466, |
| "completion_length": 945.7500610351562, |
| "epoch": 0.007014598199710834, |
| "grad_norm": 0.06673284620046616, |
| "kl": 0.0225830078125, |
| "learning_rate": 1.1876012090581184e-06, |
| "loss": -0.0215, |
| "num_tokens": 4339982.0, |
| "reward": 0.4850490316748619, |
| "reward_std": 0.451216384768486, |
| "rewards/code_reward": 0.39338236302137375, |
| "rewards/format_reward": 0.9166666567325592, |
| "step": 376 |
| }, |
| { |
| "clip_ratio": 0.0011033907067030668, |
| "epoch": 0.007033254045986661, |
| "grad_norm": 0.06927090138196945, |
| "kl": 0.02490234375, |
| "learning_rate": 1.177146472116071e-06, |
| "loss": -0.0215, |
| "step": 377 |
| }, |
| { |
| "clip_ratio": 0.0009509723749943078, |
| "epoch": 0.007051909892262488, |
| "grad_norm": 0.06608384847640991, |
| "kl": 0.0247802734375, |
| "learning_rate": 1.1667577289579462e-06, |
| "loss": -0.0216, |
| "step": 378 |
| }, |
| { |
| "clip_ratio": 0.0002945388841908425, |
| "completion_length": 615.0000152587891, |
| "epoch": 0.007070565738538315, |
| "grad_norm": 0.09600235521793365, |
| "kl": 0.07568359375, |
| "learning_rate": 1.1564354154746007e-06, |
| "loss": 0.0006, |
| "num_tokens": 4370618.0, |
| "reward": 0.5112499818205833, |
| "reward_std": 0.22198906354606152, |
| "rewards/code_reward": 0.4166666567325592, |
| "rewards/format_reward": 0.9458333551883698, |
| "step": 379 |
| }, |
| { |
| "clip_ratio": 0.0003342999843880534, |
| "epoch": 0.007089221584814141, |
| "grad_norm": 0.09851517528295517, |
| "kl": 0.07568359375, |
| "learning_rate": 1.146179964769635e-06, |
| "loss": 0.0007, |
| "step": 380 |
| }, |
| { |
| "clip_ratio": 0.00041099853115156293, |
| "epoch": 0.007107877431089968, |
| "grad_norm": 0.10035818070173264, |
| "kl": 0.0843505859375, |
| "learning_rate": 1.1359918071412195e-06, |
| "loss": 0.0008, |
| "step": 381 |
| }, |
| { |
| "clip_ratio": 0.0005624304030789062, |
| "completion_length": 937.5417175292969, |
| "epoch": 0.007126533277365794, |
| "grad_norm": 0.09549058228731155, |
| "kl": 0.0206298828125, |
| "learning_rate": 1.1258713700640456e-06, |
| "loss": 0.0096, |
| "num_tokens": 4404219.0, |
| "reward": 0.667364239692688, |
| "reward_std": 0.4190048724412918, |
| "rewards/code_reward": 0.576114222407341, |
| "rewards/format_reward": 0.9124999642372131, |
| "step": 382 |
| }, |
| { |
| "clip_ratio": 0.0006415857642423362, |
| "epoch": 0.007145189123641621, |
| "grad_norm": 0.09549405425786972, |
| "kl": 0.0218505859375, |
| "learning_rate": 1.115819078171383e-06, |
| "loss": 0.0097, |
| "step": 383 |
| }, |
| { |
| "clip_ratio": 0.0007653232023585588, |
| "epoch": 0.0071638449699174475, |
| "grad_norm": 0.09525733441114426, |
| "kl": 0.021331787109375, |
| "learning_rate": 1.1058353532372667e-06, |
| "loss": 0.0096, |
| "step": 384 |
| }, |
| { |
| "clip_ratio": 0.000980669748969376, |
| "completion_length": 1172.1666870117188, |
| "epoch": 0.007182500816193275, |
| "grad_norm": 0.07197841256856918, |
| "kl": 0.01171875, |
| "learning_rate": 1.0959206141587998e-06, |
| "loss": -0.0047, |
| "num_tokens": 4448983.0, |
| "reward": 0.11828883737325668, |
| "reward_std": 0.10133323073387146, |
| "rewards/code_reward": 0.05703882873058319, |
| "rewards/format_reward": 0.6124999821186066, |
| "step": 385 |
| }, |
| { |
| "clip_ratio": 0.0007983763935044408, |
| "epoch": 0.0072011566624691015, |
| "grad_norm": 0.07335319370031357, |
| "kl": 0.01263427734375, |
| "learning_rate": 1.0860752769385766e-06, |
| "loss": -0.0049, |
| "step": 386 |
| }, |
| { |
| "clip_ratio": 0.001325059012742713, |
| "epoch": 0.007219812508744928, |
| "grad_norm": 0.07985582202672958, |
| "kl": 0.011871337890625, |
| "learning_rate": 1.0762997546672279e-06, |
| "loss": -0.005, |
| "step": 387 |
| }, |
| { |
| "clip_ratio": 0.0009500840096734464, |
| "completion_length": 764.8750305175781, |
| "epoch": 0.007238468355020755, |
| "grad_norm": 0.103624626994133, |
| "kl": 0.019287109375, |
| "learning_rate": 1.0665944575060914e-06, |
| "loss": 0.0971, |
| "num_tokens": 4478560.0, |
| "reward": 0.47962963581085205, |
| "reward_std": 0.2753693163394928, |
| "rewards/code_reward": 0.383796289563179, |
| "rewards/format_reward": 0.9583333432674408, |
| "step": 388 |
| }, |
| { |
| "clip_ratio": 0.0007495690369978547, |
| "epoch": 0.007257124201296581, |
| "grad_norm": 0.1009078398346901, |
| "kl": 0.02117919921875, |
| "learning_rate": 1.056959792669997e-06, |
| "loss": 0.0968, |
| "step": 389 |
| }, |
| { |
| "clip_ratio": 0.0008809762366581708, |
| "epoch": 0.007275780047572408, |
| "grad_norm": 0.10338608175516129, |
| "kl": 0.0205078125, |
| "learning_rate": 1.0473961644101856e-06, |
| "loss": 0.0969, |
| "step": 390 |
| }, |
| { |
| "clip_ratio": 0.0008085968729574233, |
| "completion_length": 664.7916870117188, |
| "epoch": 0.007294435893848234, |
| "grad_norm": 0.09412232786417007, |
| "kl": 0.02545166015625, |
| "learning_rate": 1.037903973997345e-06, |
| "loss": 0.0, |
| "num_tokens": 4507655.0, |
| "reward": 0.5595833584666252, |
| "reward_std": 0.18141867592930794, |
| "rewards/code_reward": 0.4583333432674408, |
| "rewards/format_reward": 1.012499988079071, |
| "step": 391 |
| }, |
| { |
| "clip_ratio": 0.0006834749365225434, |
| "epoch": 0.007313091740124062, |
| "grad_norm": 0.13709917664527893, |
| "kl": 0.0255126953125, |
| "learning_rate": 1.0284836197047737e-06, |
| "loss": 0.0001, |
| "step": 392 |
| }, |
| { |
| "clip_ratio": 0.0006806632154621184, |
| "epoch": 0.007331747586399888, |
| "grad_norm": 0.09327536076307297, |
| "kl": 0.0245361328125, |
| "learning_rate": 1.0191354967916712e-06, |
| "loss": 0.0001, |
| "step": 393 |
| }, |
| { |
| "clip_ratio": 0.0008892881742212921, |
| "completion_length": 666.8333435058594, |
| "epoch": 0.007350403432675715, |
| "grad_norm": 0.09906277060508728, |
| "kl": 0.02410888671875, |
| "learning_rate": 1.0098599974865515e-06, |
| "loss": 0.0111, |
| "num_tokens": 4542367.0, |
| "reward": 0.771384060382843, |
| "reward_std": 0.17679990455508232, |
| "rewards/code_reward": 0.6901340931653976, |
| "rewards/format_reward": 0.8125, |
| "step": 394 |
| }, |
| { |
| "clip_ratio": 0.0008279377470898908, |
| "epoch": 0.007369059278951541, |
| "grad_norm": 0.10521873086690903, |
| "kl": 0.02398681640625, |
| "learning_rate": 1.0006575109707898e-06, |
| "loss": 0.0108, |
| "step": 395 |
| }, |
| { |
| "clip_ratio": 0.0009161636990029365, |
| "epoch": 0.007387715125227368, |
| "grad_norm": 0.09651351720094681, |
| "kl": 0.023193359375, |
| "learning_rate": 9.915284233622877e-07, |
| "loss": 0.011, |
| "step": 396 |
| }, |
| { |
| "clip_ratio": 0.00039033169741742313, |
| "completion_length": 786.7500457763672, |
| "epoch": 0.0074063709715031945, |
| "grad_norm": 0.09486052393913269, |
| "kl": 0.02703857421875, |
| "learning_rate": 9.824731176992796e-07, |
| "loss": 0.0268, |
| "num_tokens": 4569841.0, |
| "reward": 0.4910014681518078, |
| "reward_std": 0.2589704990386963, |
| "rewards/code_reward": 0.37975146202370524, |
| "rewards/format_reward": 1.1124999523162842, |
| "step": 397 |
| }, |
| { |
| "clip_ratio": 0.00043224109685979784, |
| "epoch": 0.007425026817779021, |
| "grad_norm": 0.09329359233379364, |
| "kl": 0.03033447265625, |
| "learning_rate": 9.734919739242543e-07, |
| "loss": 0.0269, |
| "step": 398 |
| }, |
| { |
| "clip_ratio": 0.0005088059406261891, |
| "epoch": 0.0074436826640548485, |
| "grad_norm": 0.09744512289762497, |
| "kl": 0.0316162109375, |
| "learning_rate": 9.645853688680177e-07, |
| "loss": 0.027, |
| "step": 399 |
| }, |
| { |
| "clip_ratio": 0.0007269750349223614, |
| "completion_length": 853.0833435058594, |
| "epoch": 0.007462338510330675, |
| "grad_norm": 0.07783529907464981, |
| "kl": 0.029083251953125, |
| "learning_rate": 9.557536762338786e-07, |
| "loss": 0.0011, |
| "num_tokens": 4600515.0, |
| "reward": 0.4749346524477005, |
| "reward_std": 0.22702517919242382, |
| "rewards/code_reward": 0.39910128712654114, |
| "rewards/format_reward": 0.7583333253860474, |
| "step": 400 |
| }, |
| { |
| "clip_ratio": 0.0009095004643313587, |
| "epoch": 0.007480994356606502, |
| "grad_norm": 0.07656848430633545, |
| "kl": 0.029296875, |
| "learning_rate": 9.46997266581973e-07, |
| "loss": 0.0013, |
| "step": 401 |
| }, |
| { |
| "clip_ratio": 0.0006659157224930823, |
| "epoch": 0.007499650202882328, |
| "grad_norm": 0.07752339541912079, |
| "kl": 0.03289794921875, |
| "learning_rate": 9.383165073137115e-07, |
| "loss": 0.001, |
| "step": 402 |
| }, |
| { |
| "clip_ratio": 0.0015702053788118064, |
| "completion_length": 578.1666717529297, |
| "epoch": 0.007518306049158155, |
| "grad_norm": 0.11208923161029816, |
| "kl": 0.0478515625, |
| "learning_rate": 9.297117626563687e-07, |
| "loss": -0.0263, |
| "num_tokens": 4624963.0, |
| "reward": 0.8968709111213684, |
| "reward_std": 0.3033272698521614, |
| "rewards/code_reward": 0.78228759765625, |
| "rewards/format_reward": 1.1458333134651184, |
| "step": 403 |
| }, |
| { |
| "clip_ratio": 0.0014707182417623699, |
| "epoch": 0.007536961895433981, |
| "grad_norm": 0.11273815482854843, |
| "kl": 0.048583984375, |
| "learning_rate": 9.211833936477957e-07, |
| "loss": -0.0262, |
| "step": 404 |
| }, |
| { |
| "clip_ratio": 0.0014846965787000954, |
| "epoch": 0.007555617741709809, |
| "grad_norm": 0.11112023890018463, |
| "kl": 0.0447998046875, |
| "learning_rate": 9.127317581212753e-07, |
| "loss": -0.0261, |
| "step": 405 |
| }, |
| { |
| "clip_ratio": 0.000766730576287955, |
| "completion_length": 860.0000305175781, |
| "epoch": 0.007574273587985635, |
| "grad_norm": 0.07397118955850601, |
| "kl": 0.03369140625, |
| "learning_rate": 9.043572106905084e-07, |
| "loss": 0.0091, |
| "num_tokens": 4658083.0, |
| "reward": 0.6170833520591259, |
| "reward_std": 0.16897812485694885, |
| "rewards/code_reward": 0.5416666567325592, |
| "rewards/format_reward": 0.7541666999459267, |
| "step": 406 |
| }, |
| { |
| "clip_ratio": 0.0006420460704248399, |
| "epoch": 0.007592929434261462, |
| "grad_norm": 0.0758887380361557, |
| "kl": 0.03009033203125, |
| "learning_rate": 8.960601027347321e-07, |
| "loss": 0.0092, |
| "step": 407 |
| }, |
| { |
| "clip_ratio": 0.0007185917929746211, |
| "epoch": 0.007611585280537288, |
| "grad_norm": 0.07315798103809357, |
| "kl": 0.0299072265625, |
| "learning_rate": 8.878407823839788e-07, |
| "loss": 0.0091, |
| "step": 408 |
| }, |
| { |
| "clip_ratio": 0.0008533093787264079, |
| "completion_length": 1027.0000305175781, |
| "epoch": 0.007630241126813115, |
| "grad_norm": 0.08570308238267899, |
| "kl": 0.03460693359375, |
| "learning_rate": 8.796995945044689e-07, |
| "loss": 0.0127, |
| "num_tokens": 4699639.0, |
| "reward": 0.06000000238418579, |
| "reward_std": 0.05202655866742134, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.5999999940395355, |
| "step": 409 |
| }, |
| { |
| "clip_ratio": 0.0009294236078858376, |
| "epoch": 0.0076488969730889415, |
| "grad_norm": 0.07877162843942642, |
| "kl": 0.0330810546875, |
| "learning_rate": 8.716368806841405e-07, |
| "loss": 0.0129, |
| "step": 410 |
| }, |
| { |
| "clip_ratio": 0.0008056100341491401, |
| "epoch": 0.007667552819364768, |
| "grad_norm": 0.07954779267311096, |
| "kl": 0.031005859375, |
| "learning_rate": 8.636529792183171e-07, |
| "loss": 0.0125, |
| "step": 411 |
| }, |
| { |
| "clip_ratio": 0.0007956986082717776, |
| "completion_length": 737.4166870117188, |
| "epoch": 0.0076862086656405955, |
| "grad_norm": 0.08732157200574875, |
| "kl": 0.027008056640625, |
| "learning_rate": 8.557482250955144e-07, |
| "loss": 0.0307, |
| "num_tokens": 4734041.0, |
| "reward": 0.7656373083591461, |
| "reward_std": 0.2801913693547249, |
| "rewards/code_reward": 0.6764705926179886, |
| "rewards/format_reward": 0.8916666507720947, |
| "step": 412 |
| }, |
| { |
| "clip_ratio": 0.0005478582752402872, |
| "epoch": 0.007704864511916422, |
| "grad_norm": 0.0853348970413208, |
| "kl": 0.027374267578125, |
| "learning_rate": 8.479229499833844e-07, |
| "loss": 0.0308, |
| "step": 413 |
| }, |
| { |
| "clip_ratio": 0.0008438964432571083, |
| "epoch": 0.007723520358192249, |
| "grad_norm": 0.08625921607017517, |
| "kl": 0.02655029296875, |
| "learning_rate": 8.401774822147976e-07, |
| "loss": 0.0306, |
| "step": 414 |
| }, |
| { |
| "clip_ratio": 0.0019030745315831155, |
| "completion_length": 749.3750152587891, |
| "epoch": 0.007742176204468075, |
| "grad_norm": 0.09860255569219589, |
| "kl": 0.041900634765625, |
| "learning_rate": 8.325121467740695e-07, |
| "loss": -0.0314, |
| "num_tokens": 4770374.0, |
| "reward": 0.16708937659859657, |
| "reward_std": 0.18778712674975395, |
| "rewards/code_reward": 0.11292270570993423, |
| "rewards/format_reward": 0.5416666716337204, |
| "step": 415 |
| }, |
| { |
| "clip_ratio": 0.0017334477743133903, |
| "epoch": 0.007760832050743902, |
| "grad_norm": 0.09663218259811401, |
| "kl": 0.039215087890625, |
| "learning_rate": 8.249272652833226e-07, |
| "loss": -0.0314, |
| "step": 416 |
| }, |
| { |
| "clip_ratio": 0.0020351834245957434, |
| "epoch": 0.007779487897019728, |
| "grad_norm": 0.10066119581460953, |
| "kl": 0.0433349609375, |
| "learning_rate": 8.174231559889931e-07, |
| "loss": -0.0313, |
| "step": 417 |
| }, |
| { |
| "clip_ratio": 0.0017151011270470917, |
| "completion_length": 631.6666870117188, |
| "epoch": 0.007798143743295555, |
| "grad_norm": 0.09030763059854507, |
| "kl": 0.046661376953125, |
| "learning_rate": 8.100001337484787e-07, |
| "loss": 0.0406, |
| "num_tokens": 4798446.0, |
| "reward": 0.9333169162273407, |
| "reward_std": 0.1303430050611496, |
| "rewards/code_reward": 0.8399835228919983, |
| "rewards/format_reward": 0.9333333075046539, |
| "step": 418 |
| }, |
| { |
| "clip_ratio": 0.0014318364555947483, |
| "epoch": 0.007816799589571382, |
| "grad_norm": 0.1113353744149208, |
| "kl": 0.051666259765625, |
| "learning_rate": 8.026585100169251e-07, |
| "loss": 0.0409, |
| "step": 419 |
| }, |
| { |
| "clip_ratio": 0.0014859928633086383, |
| "epoch": 0.007835455435847209, |
| "grad_norm": 0.09183201938867569, |
| "kl": 0.055267333984375, |
| "learning_rate": 7.953985928341601e-07, |
| "loss": 0.0409, |
| "step": 420 |
| }, |
| { |
| "clip_ratio": 0.0009874806564766914, |
| "completion_length": 722.5833435058594, |
| "epoch": 0.007854111282123035, |
| "grad_norm": 0.301491379737854, |
| "kl": 0.03594970703125, |
| "learning_rate": 7.882206868117693e-07, |
| "loss": 0.0123, |
| "num_tokens": 4831364.0, |
| "reward": 0.46648313850164413, |
| "reward_std": 0.24989983439445496, |
| "rewards/code_reward": 0.39314980432391167, |
| "rewards/format_reward": 0.7333333492279053, |
| "step": 421 |
| }, |
| { |
| "clip_ratio": 0.0007859274046495557, |
| "epoch": 0.007872767128398862, |
| "grad_norm": 0.09945830702781677, |
| "kl": 0.03179931640625, |
| "learning_rate": 7.81125093120313e-07, |
| "loss": 0.0124, |
| "step": 422 |
| }, |
| { |
| "clip_ratio": 0.0010357568971812725, |
| "epoch": 0.007891422974674689, |
| "grad_norm": 0.09847865253686905, |
| "kl": 0.03509521484375, |
| "learning_rate": 7.741121094766916e-07, |
| "loss": 0.0124, |
| "step": 423 |
| }, |
| { |
| "clip_ratio": 0.0015808032476343215, |
| "completion_length": 677.6250152587891, |
| "epoch": 0.007910078820950515, |
| "grad_norm": 0.1073075532913208, |
| "kl": 0.05718994140625, |
| "learning_rate": 7.671820301316532e-07, |
| "loss": 0.0359, |
| "num_tokens": 4852811.0, |
| "reward": 0.2411028817296028, |
| "reward_std": 0.08415344171226025, |
| "rewards/code_reward": 0.15360287349903956, |
| "rewards/format_reward": 0.875, |
| "step": 424 |
| }, |
| { |
| "clip_ratio": 0.0017813361482694745, |
| "epoch": 0.007928734667226342, |
| "grad_norm": 0.10579974949359894, |
| "kl": 0.05438232421875, |
| "learning_rate": 7.603351458574474e-07, |
| "loss": 0.0363, |
| "step": 425 |
| }, |
| { |
| "clip_ratio": 0.001651192782446742, |
| "epoch": 0.007947390513502168, |
| "grad_norm": 0.11324102431535721, |
| "kl": 0.05828857421875, |
| "learning_rate": 7.535717439356255e-07, |
| "loss": 0.0361, |
| "step": 426 |
| }, |
| { |
| "clip_ratio": 0.0014180371072143316, |
| "completion_length": 693.3333435058594, |
| "epoch": 0.007966046359777995, |
| "grad_norm": 0.16692698001861572, |
| "kl": 0.0213623046875, |
| "learning_rate": 7.46892108144986e-07, |
| "loss": 0.0654, |
| "num_tokens": 4883755.0, |
| "reward": 0.6448469460010529, |
| "reward_std": 0.49255189299583435, |
| "rewards/code_reward": 0.5340135842561722, |
| "rewards/format_reward": 1.1083332896232605, |
| "step": 427 |
| }, |
| { |
| "clip_ratio": 0.0013092211447656155, |
| "epoch": 0.007984702206053821, |
| "grad_norm": 0.09344299882650375, |
| "kl": 0.02069091796875, |
| "learning_rate": 7.402965187496697e-07, |
| "loss": 0.0651, |
| "step": 428 |
| }, |
| { |
| "clip_ratio": 0.0009931237436830997, |
| "epoch": 0.00800335805232965, |
| "grad_norm": 0.0950753316283226, |
| "kl": 0.02081298828125, |
| "learning_rate": 7.337852524873974e-07, |
| "loss": 0.0652, |
| "step": 429 |
| }, |
| { |
| "clip_ratio": 0.000936219235882163, |
| "completion_length": 608.0000152587891, |
| "epoch": 0.008022013898605476, |
| "grad_norm": 0.10651998966932297, |
| "kl": 0.080657958984375, |
| "learning_rate": 7.273585825578608e-07, |
| "loss": -0.0054, |
| "num_tokens": 4910095.0, |
| "reward": 0.6445792466402054, |
| "reward_std": 0.16696248203516006, |
| "rewards/code_reward": 0.5420792102813721, |
| "rewards/format_reward": 1.024999976158142, |
| "step": 430 |
| }, |
| { |
| "clip_ratio": 0.0005899647367186844, |
| "epoch": 0.008040669744881303, |
| "grad_norm": 0.10619425773620605, |
| "kl": 0.083984375, |
| "learning_rate": 7.21016778611259e-07, |
| "loss": -0.0052, |
| "step": 431 |
| }, |
| { |
| "clip_ratio": 0.0008555436506867409, |
| "epoch": 0.00805932559115713, |
| "grad_norm": 0.10411829501390457, |
| "kl": 0.0823974609375, |
| "learning_rate": 7.147601067369835e-07, |
| "loss": -0.0054, |
| "step": 432 |
| }, |
| { |
| "clip_ratio": 0.0007757125422358513, |
| "completion_length": 393.5833435058594, |
| "epoch": 0.008077981437432956, |
| "grad_norm": 0.11761093884706497, |
| "kl": 0.08935546875, |
| "learning_rate": 7.085888294524561e-07, |
| "loss": -0.0138, |
| "num_tokens": 4927329.0, |
| "reward": 0.6537500023841858, |
| "reward_std": 0.17917166277766228, |
| "rewards/code_reward": 0.5416666679084301, |
| "rewards/format_reward": 1.120833307504654, |
| "step": 433 |
| }, |
| { |
| "clip_ratio": 0.0007667961181141436, |
| "epoch": 0.008096637283708782, |
| "grad_norm": 0.11278140544891357, |
| "kl": 0.076904296875, |
| "learning_rate": 7.025032056921117e-07, |
| "loss": -0.0137, |
| "step": 434 |
| }, |
| { |
| "clip_ratio": 0.000893244257895276, |
| "epoch": 0.008115293129984609, |
| "grad_norm": 0.1142934262752533, |
| "kl": 0.077880859375, |
| "learning_rate": 6.965034907965349e-07, |
| "loss": -0.0139, |
| "step": 435 |
| }, |
| { |
| "clip_ratio": 0.0005022610421292484, |
| "completion_length": 807.9583435058594, |
| "epoch": 0.008133948976260436, |
| "grad_norm": 0.0828719511628151, |
| "kl": 0.0184326171875, |
| "learning_rate": 6.905899365017462e-07, |
| "loss": 0.027, |
| "num_tokens": 4961552.0, |
| "reward": 0.7701181471347809, |
| "reward_std": 0.3888111412525177, |
| "rewards/code_reward": 0.6847014874219894, |
| "rewards/format_reward": 0.8541666865348816, |
| "step": 436 |
| }, |
| { |
| "clip_ratio": 0.0007263242732733488, |
| "epoch": 0.008152604822536262, |
| "grad_norm": 0.08436731994152069, |
| "kl": 0.0183563232421875, |
| "learning_rate": 6.847627909286409e-07, |
| "loss": 0.0272, |
| "step": 437 |
| }, |
| { |
| "clip_ratio": 0.0007911297434475273, |
| "epoch": 0.008171260668812089, |
| "grad_norm": 0.08241833746433258, |
| "kl": 0.0186309814453125, |
| "learning_rate": 6.790222985725761e-07, |
| "loss": 0.027, |
| "step": 438 |
| }, |
| { |
| "clip_ratio": 0.0008176612027455121, |
| "completion_length": 741.7083435058594, |
| "epoch": 0.008189916515087915, |
| "grad_norm": 0.09797824174165726, |
| "kl": 0.02587890625, |
| "learning_rate": 6.733687002931141e-07, |
| "loss": -0.0261, |
| "num_tokens": 4989097.0, |
| "reward": 0.3370833322405815, |
| "reward_std": 0.2753293476998806, |
| "rewards/code_reward": 0.2291666716337204, |
| "rewards/format_reward": 1.0791666209697723, |
| "step": 439 |
| }, |
| { |
| "clip_ratio": 0.0010906177922151983, |
| "epoch": 0.008208572361363742, |
| "grad_norm": 0.10210465639829636, |
| "kl": 0.026519775390625, |
| "learning_rate": 6.678022333039158e-07, |
| "loss": -0.0261, |
| "step": 440 |
| }, |
| { |
| "clip_ratio": 0.0006749743188265711, |
| "epoch": 0.008227228207639568, |
| "grad_norm": 0.09442983567714691, |
| "kl": 0.027008056640625, |
| "learning_rate": 6.623231311627876e-07, |
| "loss": -0.0263, |
| "step": 441 |
| }, |
| { |
| "clip_ratio": 0.0011484751594252884, |
| "completion_length": 866.75, |
| "epoch": 0.008245884053915395, |
| "grad_norm": 0.07345132529735565, |
| "kl": 0.03179931640625, |
| "learning_rate": 6.569316237618811e-07, |
| "loss": 0.0639, |
| "num_tokens": 5026687.0, |
| "reward": 0.2147694230079651, |
| "reward_std": 0.33931225538253784, |
| "rewards/code_reward": 0.132686085999012, |
| "rewards/format_reward": 0.8208333253860474, |
| "step": 442 |
| }, |
| { |
| "clip_ratio": 0.0010752396774478257, |
| "epoch": 0.008264539900191223, |
| "grad_norm": 0.07625681161880493, |
| "kl": 0.0343017578125, |
| "learning_rate": 6.516279373180499e-07, |
| "loss": 0.0637, |
| "step": 443 |
| }, |
| { |
| "clip_ratio": 0.001316602574661374, |
| "epoch": 0.00828319574646705, |
| "grad_norm": 0.07411348074674606, |
| "kl": 0.0333251953125, |
| "learning_rate": 6.464122943633543e-07, |
| "loss": 0.0638, |
| "step": 444 |
| }, |
| { |
| "clip_ratio": 0.0007968830032041296, |
| "completion_length": 739.5000305175781, |
| "epoch": 0.008301851592742876, |
| "grad_norm": 0.09090346097946167, |
| "kl": 0.0362548828125, |
| "learning_rate": 6.412849137357271e-07, |
| "loss": 0.0124, |
| "num_tokens": 5059039.0, |
| "reward": 0.6221906468272209, |
| "reward_std": 0.09545084834098816, |
| "rewards/code_reward": 0.530940592288971, |
| "rewards/format_reward": 0.9124999940395355, |
| "step": 445 |
| }, |
| { |
| "clip_ratio": 0.0005075681256130338, |
| "epoch": 0.008320507439018703, |
| "grad_norm": 0.09324011206626892, |
| "kl": 0.0374755859375, |
| "learning_rate": 6.3624601056979e-07, |
| "loss": 0.0122, |
| "step": 446 |
| }, |
| { |
| "clip_ratio": 0.0002910211042035371, |
| "epoch": 0.00833916328529453, |
| "grad_norm": 0.0913727805018425, |
| "kl": 0.0367431640625, |
| "learning_rate": 6.312957962878278e-07, |
| "loss": 0.0122, |
| "step": 447 |
| }, |
| { |
| "clip_ratio": 0.0009194318408844993, |
| "completion_length": 722.7500305175781, |
| "epoch": 0.008357819131570356, |
| "grad_norm": 0.08921171724796295, |
| "kl": 0.015167236328125, |
| "learning_rate": 6.264344785909181e-07, |
| "loss": -0.0187, |
| "num_tokens": 5088301.0, |
| "reward": 0.3760389983654022, |
| "reward_std": 0.21540092676877975, |
| "rewards/code_reward": 0.25312230736017227, |
| "rewards/format_reward": 1.2291666865348816, |
| "step": 448 |
| }, |
| { |
| "clip_ratio": 0.000731065621948801, |
| "epoch": 0.008376474977846183, |
| "grad_norm": 0.08806627243757248, |
| "kl": 0.0145263671875, |
| "learning_rate": 6.216622614502149e-07, |
| "loss": -0.0183, |
| "step": 449 |
| }, |
| { |
| "clip_ratio": 0.001043492229655385, |
| "epoch": 0.008395130824122009, |
| "grad_norm": 0.08825229853391647, |
| "kl": 0.013092041015625, |
| "learning_rate": 6.169793450983916e-07, |
| "loss": -0.0185, |
| "step": 450 |
| }, |
| { |
| "clip_ratio": 0.0011616212141234428, |
| "completion_length": 723.0416870117188, |
| "epoch": 0.008413786670397836, |
| "grad_norm": 0.10913647711277008, |
| "kl": 0.0169677734375, |
| "learning_rate": 6.123859260212393e-07, |
| "loss": 0.0171, |
| "num_tokens": 5114834.0, |
| "reward": 0.4591666907072067, |
| "reward_std": 0.2421144973486662, |
| "rewards/code_reward": 0.375, |
| "rewards/format_reward": 0.8416666686534882, |
| "step": 451 |
| }, |
| { |
| "clip_ratio": 0.0010601775429677218, |
| "epoch": 0.008432442516673662, |
| "grad_norm": 0.1069192886352539, |
| "kl": 0.0181884765625, |
| "learning_rate": 6.07882196949423e-07, |
| "loss": 0.0172, |
| "step": 452 |
| }, |
| { |
| "clip_ratio": 0.0011184008326381445, |
| "epoch": 0.008451098362949489, |
| "grad_norm": 0.10386621952056885, |
| "kl": 0.017730712890625, |
| "learning_rate": 6.034683468503948e-07, |
| "loss": 0.0172, |
| "step": 453 |
| }, |
| { |
| "clip_ratio": 0.001289009174797684, |
| "completion_length": 979.8333740234375, |
| "epoch": 0.008469754209225315, |
| "grad_norm": 0.08415919542312622, |
| "kl": 0.011962890625, |
| "learning_rate": 5.991445609204641e-07, |
| "loss": -0.0303, |
| "num_tokens": 5149462.0, |
| "reward": 0.5253205597400665, |
| "reward_std": 0.1456993781030178, |
| "rewards/code_reward": 0.4149038642644882, |
| "rewards/format_reward": 1.1041666269302368, |
| "step": 454 |
| }, |
| { |
| "clip_ratio": 0.000828628777526319, |
| "epoch": 0.008488410055501142, |
| "grad_norm": 0.0916297659277916, |
| "kl": 0.011932373046875, |
| "learning_rate": 5.949110205770292e-07, |
| "loss": -0.0305, |
| "step": 455 |
| }, |
| { |
| "clip_ratio": 0.0010398888844065368, |
| "epoch": 0.00850706590177697, |
| "grad_norm": 0.08343882113695145, |
| "kl": 0.0126953125, |
| "learning_rate": 5.90767903450964e-07, |
| "loss": -0.0306, |
| "step": 456 |
| }, |
| { |
| "clip_ratio": 0.0010439236648380756, |
| "completion_length": 754.125, |
| "epoch": 0.008525721748052797, |
| "grad_norm": 0.08621830493211746, |
| "kl": 0.019927978515625, |
| "learning_rate": 5.867153833791652e-07, |
| "loss": 0.0192, |
| "num_tokens": 5181685.0, |
| "reward": 0.3584918677806854, |
| "reward_std": 0.23101448267698288, |
| "rewards/code_reward": 0.2543252221075818, |
| "rewards/format_reward": 1.0416666567325592, |
| "step": 457 |
| }, |
| { |
| "clip_ratio": 0.0011347323888912797, |
| "epoch": 0.008544377594328623, |
| "grad_norm": 0.08586623519659042, |
| "kl": 0.020263671875, |
| "learning_rate": 5.827536303972587e-07, |
| "loss": 0.0193, |
| "step": 458 |
| }, |
| { |
| "clip_ratio": 0.0011065362195950001, |
| "epoch": 0.00856303344060445, |
| "grad_norm": 0.08708977699279785, |
| "kl": 0.018890380859375, |
| "learning_rate": 5.78882810732465e-07, |
| "loss": 0.0192, |
| "step": 459 |
| }, |
| { |
| "clip_ratio": 0.0007787059585098177, |
| "completion_length": 975.9167175292969, |
| "epoch": 0.008581689286880276, |
| "grad_norm": 0.11942039430141449, |
| "kl": 0.025390625, |
| "learning_rate": 5.75103086796625e-07, |
| "loss": 0.0441, |
| "num_tokens": 5223179.0, |
| "reward": 0.0820833370089531, |
| "reward_std": 0.06146477162837982, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.8208333253860474, |
| "step": 460 |
| }, |
| { |
| "clip_ratio": 0.0007732175581622869, |
| "epoch": 0.008600345133156103, |
| "grad_norm": 0.0921056792140007, |
| "kl": 0.02691650390625, |
| "learning_rate": 5.714146171793846e-07, |
| "loss": 0.0444, |
| "step": 461 |
| }, |
| { |
| "clip_ratio": 0.0012547720107249916, |
| "epoch": 0.00861900097943193, |
| "grad_norm": 0.09074374288320541, |
| "kl": 0.02606201171875, |
| "learning_rate": 5.678175566415422e-07, |
| "loss": 0.0445, |
| "step": 462 |
| }, |
| { |
| "clip_ratio": 0.0012275170302018523, |
| "completion_length": 753.1666870117188, |
| "epoch": 0.008637656825707756, |
| "grad_norm": 0.10298697650432587, |
| "kl": 0.0291748046875, |
| "learning_rate": 5.643120561085528e-07, |
| "loss": -0.0121, |
| "num_tokens": 5257791.0, |
| "reward": 0.3049456551671028, |
| "reward_std": 0.2707311660051346, |
| "rewards/code_reward": 0.22036230191588402, |
| "rewards/format_reward": 0.8458333015441895, |
| "step": 463 |
| }, |
| { |
| "clip_ratio": 0.0011390139115974307, |
| "epoch": 0.008656312671983583, |
| "grad_norm": 0.10827159136533737, |
| "kl": 0.02630615234375, |
| "learning_rate": 5.608982626641991e-07, |
| "loss": -0.0124, |
| "step": 464 |
| }, |
| { |
| "clip_ratio": 0.001239880220964551, |
| "epoch": 0.00867496851825941, |
| "grad_norm": 0.10058361291885376, |
| "kl": 0.0302734375, |
| "learning_rate": 5.575763195444166e-07, |
| "loss": -0.0123, |
| "step": 465 |
| }, |
| { |
| "clip_ratio": 0.0006581109191756696, |
| "completion_length": 424.9583435058594, |
| "epoch": 0.008693624364535236, |
| "grad_norm": 0.1420270800590515, |
| "kl": 0.0567169189453125, |
| "learning_rate": 5.543463661312847e-07, |
| "loss": -0.0107, |
| "num_tokens": 5276846.0, |
| "reward": 0.6923423409461975, |
| "reward_std": 0.068264564499259, |
| "rewards/code_reward": 0.5506756789982319, |
| "rewards/format_reward": 1.4166666269302368, |
| "step": 466 |
| }, |
| { |
| "clip_ratio": 0.0005906374426558614, |
| "epoch": 0.008712280210811062, |
| "grad_norm": 0.13097578287124634, |
| "kl": 0.0464019775390625, |
| "learning_rate": 5.512085379471808e-07, |
| "loss": -0.0104, |
| "step": 467 |
| }, |
| { |
| "clip_ratio": 0.0008538902620784938, |
| "epoch": 0.008730936057086889, |
| "grad_norm": 0.13688351213932037, |
| "kl": 0.0464019775390625, |
| "learning_rate": 5.481629666490903e-07, |
| "loss": -0.0102, |
| "step": 468 |
| }, |
| { |
| "clip_ratio": 0.0010946058901026845, |
| "completion_length": 817.5833435058594, |
| "epoch": 0.008749591903362716, |
| "grad_norm": 0.09143988788127899, |
| "kl": 0.03515625, |
| "learning_rate": 5.452097800230853e-07, |
| "loss": -0.0014, |
| "num_tokens": 5311588.0, |
| "reward": 0.29928937181830406, |
| "reward_std": 0.14651047810912132, |
| "rewards/code_reward": 0.21553937532007694, |
| "rewards/format_reward": 0.8374999761581421, |
| "step": 469 |
| }, |
| { |
| "clip_ratio": 0.0012111273827031255, |
| "epoch": 0.008768247749638544, |
| "grad_norm": 0.10869612544775009, |
| "kl": 0.03814697265625, |
| "learning_rate": 5.423491019789623e-07, |
| "loss": -0.0015, |
| "step": 470 |
| }, |
| { |
| "clip_ratio": 0.0006720305973431095, |
| "epoch": 0.00878690359591437, |
| "grad_norm": 0.10290330648422241, |
| "kl": 0.03741455078125, |
| "learning_rate": 5.395810525450425e-07, |
| "loss": -0.0014, |
| "step": 471 |
| }, |
| { |
| "clip_ratio": 0.0014231341774575412, |
| "completion_length": 797.8333740234375, |
| "epoch": 0.008805559442190197, |
| "grad_norm": 0.09828333556652069, |
| "kl": 0.0361328125, |
| "learning_rate": 5.369057478631359e-07, |
| "loss": 0.0535, |
| "num_tokens": 5343864.0, |
| "reward": 0.22396931797266006, |
| "reward_std": 0.12565985321998596, |
| "rewards/code_reward": 0.12938596308231354, |
| "rewards/format_reward": 0.945833295583725, |
| "step": 472 |
| }, |
| { |
| "clip_ratio": 0.0012132310948800296, |
| "epoch": 0.008824215288466023, |
| "grad_norm": 0.10423380881547928, |
| "kl": 0.0355224609375, |
| "learning_rate": 5.343233001836694e-07, |
| "loss": 0.0532, |
| "step": 473 |
| }, |
| { |
| "clip_ratio": 0.001151920121628791, |
| "epoch": 0.00884287113474185, |
| "grad_norm": 0.10031603276729584, |
| "kl": 0.03497314453125, |
| "learning_rate": 5.318338178609754e-07, |
| "loss": 0.0533, |
| "step": 474 |
| }, |
| { |
| "clip_ratio": 0.001284417463466525, |
| "completion_length": 560.7083435058594, |
| "epoch": 0.008861526981017677, |
| "grad_norm": 0.10862758755683899, |
| "kl": 0.05657958984375, |
| "learning_rate": 5.294374053487459e-07, |
| "loss": -0.0221, |
| "num_tokens": 5366837.0, |
| "reward": 0.7270297110080719, |
| "reward_std": 0.4961380064487457, |
| "rewards/code_reward": 0.6303630471229553, |
| "rewards/format_reward": 0.9666666388511658, |
| "step": 475 |
| }, |
| { |
| "clip_ratio": 0.0007715776737313718, |
| "epoch": 0.008880182827293503, |
| "grad_norm": 0.10750824958086014, |
| "kl": 0.05804443359375, |
| "learning_rate": 5.271341631956511e-07, |
| "loss": -0.0221, |
| "step": 476 |
| }, |
| { |
| "clip_ratio": 0.0011225882335565984, |
| "epoch": 0.00889883867356933, |
| "grad_norm": 0.10623571276664734, |
| "kl": 0.05712890625, |
| "learning_rate": 5.249241880411181e-07, |
| "loss": -0.0221, |
| "step": 477 |
| }, |
| { |
| "clip_ratio": 0.0007046593527775258, |
| "completion_length": 761.6250305175781, |
| "epoch": 0.008917494519845156, |
| "grad_norm": 0.10407714545726776, |
| "kl": 0.0296630859375, |
| "learning_rate": 5.228075726112785e-07, |
| "loss": 0.0165, |
| "num_tokens": 5401616.0, |
| "reward": 0.28155867755413055, |
| "reward_std": 0.2833222597837448, |
| "rewards/code_reward": 0.18364198692142963, |
| "rewards/format_reward": 0.9791666567325592, |
| "step": 478 |
| }, |
| { |
| "clip_ratio": 0.0008142671431414783, |
| "epoch": 0.008936150366120983, |
| "grad_norm": 0.10599526017904282, |
| "kl": 0.02685546875, |
| "learning_rate": 5.207844057150768e-07, |
| "loss": 0.0164, |
| "step": 479 |
| }, |
| { |
| "clip_ratio": 0.0007827770750736818, |
| "epoch": 0.00895480621239681, |
| "grad_norm": 0.10547417402267456, |
| "kl": 0.02947998046875, |
| "learning_rate": 5.188547722405437e-07, |
| "loss": 0.0163, |
| "step": 480 |
| }, |
| { |
| "clip_ratio": 0.0009513153345324099, |
| "completion_length": 1125.7500305175781, |
| "epoch": 0.008973462058672636, |
| "grad_norm": 0.10954833775758743, |
| "kl": 0.014617919921875, |
| "learning_rate": 5.170187531512351e-07, |
| "loss": 0.0535, |
| "num_tokens": 5448530.0, |
| "reward": 0.08833334222435951, |
| "reward_std": 0.054226743057370186, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.8833333253860474, |
| "step": 481 |
| }, |
| { |
| "clip_ratio": 0.0012509520165622234, |
| "epoch": 0.008992117904948463, |
| "grad_norm": 0.09105175733566284, |
| "kl": 0.01336669921875, |
| "learning_rate": 5.152764254828348e-07, |
| "loss": 0.0535, |
| "step": 482 |
| }, |
| { |
| "clip_ratio": 0.0008532060019206256, |
| "epoch": 0.009010773751224289, |
| "grad_norm": 0.09203584492206573, |
| "kl": 0.01513671875, |
| "learning_rate": 5.136278623399225e-07, |
| "loss": 0.0533, |
| "step": 483 |
| }, |
| { |
| "clip_ratio": 0.0007692791696172208, |
| "completion_length": 997.0833740234375, |
| "epoch": 0.009029429597500117, |
| "grad_norm": 0.09475655108690262, |
| "kl": 0.03662109375, |
| "learning_rate": 5.120731328929058e-07, |
| "loss": -0.0121, |
| "num_tokens": 5492116.0, |
| "reward": 0.5058333426713943, |
| "reward_std": 0.3466748893260956, |
| "rewards/code_reward": 0.40416665747761726, |
| "rewards/format_reward": 1.0166666209697723, |
| "step": 484 |
| }, |
| { |
| "clip_ratio": 0.000924331892747432, |
| "epoch": 0.009048085443775944, |
| "grad_norm": 0.09625120460987091, |
| "kl": 0.033935546875, |
| "learning_rate": 5.106123023751187e-07, |
| "loss": -0.0121, |
| "step": 485 |
| }, |
| { |
| "clip_ratio": 0.0005081451236037537, |
| "epoch": 0.00906674129005177, |
| "grad_norm": 0.08786183595657349, |
| "kl": 0.03375244140625, |
| "learning_rate": 5.092454320800833e-07, |
| "loss": -0.0123, |
| "step": 486 |
| }, |
| { |
| "clip_ratio": 0.00082587351789698, |
| "completion_length": 691.6250305175781, |
| "epoch": 0.009085397136327597, |
| "grad_norm": 0.08786230534315109, |
| "kl": 0.0277099609375, |
| "learning_rate": 5.079725793589405e-07, |
| "loss": 0.0248, |
| "num_tokens": 5518999.0, |
| "reward": 0.9639226198196411, |
| "reward_std": 0.33985109627246857, |
| "rewards/code_reward": 0.861005961894989, |
| "rewards/format_reward": 1.0291666388511658, |
| "step": 487 |
| }, |
| { |
| "clip_ratio": 0.0007328885840252042, |
| "epoch": 0.009104052982603424, |
| "grad_norm": 0.08928890526294708, |
| "kl": 0.02886962890625, |
| "learning_rate": 5.067937976180407e-07, |
| "loss": 0.0248, |
| "step": 488 |
| }, |
| { |
| "clip_ratio": 0.0006209511193446815, |
| "epoch": 0.00912270882887925, |
| "grad_norm": 0.08875181525945663, |
| "kl": 0.02874755859375, |
| "learning_rate": 5.057091363167046e-07, |
| "loss": 0.0247, |
| "step": 489 |
| }, |
| { |
| "clip_ratio": 0.0006376176825142466, |
| "completion_length": 884.0000610351562, |
| "epoch": 0.009141364675155077, |
| "grad_norm": 0.0868891254067421, |
| "kl": 0.0316162109375, |
| "learning_rate": 5.047186409651489e-07, |
| "loss": 0.0218, |
| "num_tokens": 5557063.0, |
| "reward": 0.3246118724346161, |
| "reward_std": 0.20474953204393387, |
| "rewards/code_reward": 0.25627854466438293, |
| "rewards/format_reward": 0.6833333075046539, |
| "step": 490 |
| }, |
| { |
| "clip_ratio": 0.0008357224578503519, |
| "epoch": 0.009160020521430903, |
| "grad_norm": 0.08658529072999954, |
| "kl": 0.03106689453125, |
| "learning_rate": 5.038223531225742e-07, |
| "loss": 0.022, |
| "step": 491 |
| }, |
| { |
| "clip_ratio": 0.0008412750175921246, |
| "epoch": 0.00917867636770673, |
| "grad_norm": 0.08690661936998367, |
| "kl": 0.0299072265625, |
| "learning_rate": 5.030203103954232e-07, |
| "loss": 0.022, |
| "step": 492 |
| }, |
| { |
| "clip_ratio": 0.000971353001659736, |
| "completion_length": 961.7916870117188, |
| "epoch": 0.009197332213982556, |
| "grad_norm": 0.08556245267391205, |
| "kl": 0.019775390625, |
| "learning_rate": 5.023125464358026e-07, |
| "loss": -0.0011, |
| "num_tokens": 5594090.0, |
| "reward": 0.2715613692998886, |
| "reward_std": 0.2223658785223961, |
| "rewards/code_reward": 0.18156136944890022, |
| "rewards/format_reward": 0.9000000357627869, |
| "step": 493 |
| }, |
| { |
| "clip_ratio": 0.0009858837001957, |
| "epoch": 0.009215988060258383, |
| "grad_norm": 0.08375484496355057, |
| "kl": 0.01922607421875, |
| "learning_rate": 5.016990909400709e-07, |
| "loss": -0.0013, |
| "step": 494 |
| }, |
| { |
| "clip_ratio": 0.000899194972589612, |
| "epoch": 0.00923464390653421, |
| "grad_norm": 0.08476914465427399, |
| "kl": 0.02081298828125, |
| "learning_rate": 5.011799696475915e-07, |
| "loss": -0.0011, |
| "step": 495 |
| }, |
| { |
| "clip_ratio": 0.0013499458436854184, |
| "completion_length": 723.0000305175781, |
| "epoch": 0.009253299752810036, |
| "grad_norm": 0.11245770007371902, |
| "kl": 0.0792236328125, |
| "learning_rate": 5.007552043396547e-07, |
| "loss": 0.0186, |
| "num_tokens": 5628218.0, |
| "reward": 0.19834937155246735, |
| "reward_std": 0.12879429571330547, |
| "rewards/code_reward": 0.11418268829584122, |
| "rewards/format_reward": 0.8416666686534882, |
| "step": 496 |
| }, |
| { |
| "clip_ratio": 0.0017962601850740612, |
| "epoch": 0.009271955599085864, |
| "grad_norm": 0.1085243970155716, |
| "kl": 0.07861328125, |
| "learning_rate": 5.004248128385618e-07, |
| "loss": 0.019, |
| "step": 497 |
| }, |
| { |
| "clip_ratio": 0.001379593217279762, |
| "epoch": 0.009290611445361691, |
| "grad_norm": 0.110112763941288, |
| "kl": 0.0782470703125, |
| "learning_rate": 5.001888090068784e-07, |
| "loss": 0.0188, |
| "step": 498 |
| }, |
| { |
| "clip_ratio": 0.0009661116928327829, |
| "completion_length": 820.2083435058594, |
| "epoch": 0.009309267291637517, |
| "grad_norm": 0.09085725247859955, |
| "kl": 0.030242919921875, |
| "learning_rate": 5.000472027468528e-07, |
| "loss": 0.0247, |
| "num_tokens": 5666887.0, |
| "reward": 0.0845833346247673, |
| "reward_std": 0.05373080633580685, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.8458333313465118, |
| "step": 499 |
| }, |
| { |
| "clip_ratio": 0.0010680440173018724, |
| "epoch": 0.009327923137913344, |
| "grad_norm": 0.08862722665071487, |
| "kl": 0.02752685546875, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.0244, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.009327923137913344, |
| "step": 500, |
| "total_flos": 0.0, |
| "train_loss": 0.010072903922897239, |
| "train_runtime": 28596.067, |
| "train_samples_per_second": 0.42, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|