diff --git a/.gitattributes b/.gitattributes index 875997aea7d7335a58d131207af08c5bd4a3a53b..79ae45858a5d8dd00cfd40094ac44bc4fd0c762c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -17414,3 +17414,125 @@ neuronxcc-2.23.6484.0+3b612583/MODULE_17549650398225281442+f7f529f3/model.neff f neuronxcc-2.23.6484.0+3b612583/MODULE_2912675361925083263+f7f529f3/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.23.6484.0+3b612583/MODULE_6870343675213383355+f7f529f3/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.23.6484.0+3b612583/MODULE_9355619343028815470+f7f529f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/4ef38fe13fd2c7209a1f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/4ef38fe13fd2c7209a1f.json new file mode 100644 index 0000000000000000000000000000000000000000..f619ae9ca34bdb27ffc8e7855d0a489aeed8d6cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/4ef38fe13fd2c7209a1f.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/gemma3_text/unsloth/gemma-3-270m-it/4ef38fe13fd2c7209a1f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/gemma3_text/unsloth/gemma-3-270m-it/4ef38fe13fd2c7209a1f.json new file mode 100644 index 0000000000000000000000000000000000000000..f619ae9ca34bdb27ffc8e7855d0a489aeed8d6cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/gemma3_text/unsloth/gemma-3-270m-it/4ef38fe13fd2c7209a1f.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/70022928da5b1a3a3562.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/70022928da5b1a3a3562.json new file mode 100644 index 0000000000000000000000000000000000000000..f0130b9697754fee793ff6ba25100b6579c30086 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/70022928da5b1a3a3562.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/3dc71b9dfd0bde51256a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/3dc71b9dfd0bde51256a.json new file mode 100644 index 0000000000000000000000000000000000000000..e73fe696387bd48bd473ae7a9b01d477acee105c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/3dc71b9dfd0bde51256a.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/gemma3_text/unsloth/gemma-3-270m-it/70022928da5b1a3a3562.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/gemma3_text/unsloth/gemma-3-270m-it/70022928da5b1a3a3562.json new file mode 100644 index 0000000000000000000000000000000000000000..f0130b9697754fee793ff6ba25100b6579c30086 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/gemma3_text/unsloth/gemma-3-270m-it/70022928da5b1a3a3562.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3dc71b9dfd0bde51256a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3dc71b9dfd0bde51256a.json new file mode 100644 index 0000000000000000000000000000000000000000..e73fe696387bd48bd473ae7a9b01d477acee105c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3dc71b9dfd0bde51256a.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/43349245d62eb2d76d64.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/43349245d62eb2d76d64.json new file mode 100644 index 0000000000000000000000000000000000000000..fddb46b6ab70c7bec1b93fd957e3cdea8799c0fd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/43349245d62eb2d76d64.json @@ -0,0 +1,189 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "_task": "image-text-to-text", + "architectures": [ + "Idefics3ForConditionalGeneration" + ], + "dtype": "bfloat16", + "image_token_id": 49190, + "model_type": "idefics3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "checkpoint_revision": "7e3e67edbbed1bf9888184d9df282b700a323964", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 64, + "image_size": 512, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_num_images": 17, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "scale_factor": 4, + "text_config": { + "_attn_implementation_autoset": false, + "_flash_attn_2_enabled": true, + "_name_or_path": "None", + "architectures": [ + "VLlama3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 576, + "initializer_range": 0.041666666666666664, + "intermediate_size": 1536, + "is_llama_config": true, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "neftune_noise_alpha": 0.0, + "num_attention_heads": 9, + "num_hidden_layers": 30, + "num_key_value_heads": 3, + "pad_token_id": 2, + "perceiver_config": { + "_attn_implementation_autoset": false, + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "begin_suppress_tokens": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "silu", + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "length_penalty": 1.0, + "max_length": 20, + "min_length": 0, + "model_type": "vllama3", + "no_repeat_ngram_size": 0, + "num_beam_groups": 1, + "num_beams": 1, + "num_key_value_heads": 1, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "qk_layer_norms_perceiver": false, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "resampler_depth": 6, + "resampler_head_dim": 96, + "resampler_n_heads": 16, + "resampler_n_latents": 64, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "suppress_tokens": null, + "task_specific_params": null, + "temperature": 1.0, + "tf_legacy_loss": false, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.46.0", + "typical_p": 1.0, + "use_bfloat16": false + }, + "pixel_shuffle_factor": 4, + "pretraining_tp": 1, + "qk_layer_norms": false, + "rms_norm_eps": 1e-05, + "rope_interleaved": false, + "rope_scaling": null, + "rope_theta": 100000, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "use_resampler": false, + "vocab_size": 49280 + }, + "tie_word_embeddings": false, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "vision_config": { + "_attn_implementation_autoset": false, + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 768, + "image_size": 512, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-06, + "max_image_size": { + "longest_edge": 512 + }, + "model_type": "idefics3_vision", + "num_attention_heads": 12, + "num_channels": 3, + "num_hidden_layers": 12, + "patch_size": 16, + "size": { + "longest_edge": 2048 + }, + "tie_word_embeddings": false, + "use_base_siglip": true + }, + "vocab_size": 49280 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/566b663bde0d89e24b29.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/566b663bde0d89e24b29.json new file mode 100644 index 0000000000000000000000000000000000000000..2099dbe7b62c3b2112501768bc8e74d8dcd2983c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/566b663bde0d89e24b29.json @@ -0,0 +1,188 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "_task": "image-text-to-text", + "architectures": [ + "Idefics3ForConditionalGeneration" + ], + "dtype": "bfloat16", + "image_token_id": 49190, + "model_type": "idefics3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "checkpoint_revision": "7e3e67edbbed1bf9888184d9df282b700a323964", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 64, + "image_size": 512, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_num_images": 1, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "scale_factor": 4, + "text_config": { + "_attn_implementation_autoset": false, + "_flash_attn_2_enabled": true, + "_name_or_path": "None", + "architectures": [ + "VLlama3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 576, + "initializer_range": 0.041666666666666664, + "intermediate_size": 1536, + "is_llama_config": true, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "neftune_noise_alpha": 0.0, + "num_attention_heads": 9, + "num_hidden_layers": 30, + "num_key_value_heads": 3, + "pad_token_id": 2, + "perceiver_config": { + "_attn_implementation_autoset": false, + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "begin_suppress_tokens": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "silu", + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "length_penalty": 1.0, + "max_length": 20, + "min_length": 0, + "model_type": "vllama3", + "no_repeat_ngram_size": 0, + "num_beam_groups": 1, + "num_beams": 1, + "num_key_value_heads": 1, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "qk_layer_norms_perceiver": false, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "resampler_depth": 6, + "resampler_head_dim": 96, + "resampler_n_heads": 16, + "resampler_n_latents": 64, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "suppress_tokens": null, + "task_specific_params": null, + "temperature": 1.0, + "tf_legacy_loss": false, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.46.0", + "typical_p": 1.0, + "use_bfloat16": false + }, + "pixel_shuffle_factor": 4, + "pretraining_tp": 1, + "qk_layer_norms": false, + "rms_norm_eps": 1e-05, + "rope_interleaved": false, + "rope_scaling": null, + "rope_theta": 100000, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "use_resampler": false, + "vocab_size": 49280 + }, + "tie_word_embeddings": false, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "vision_config": { + "_attn_implementation_autoset": false, + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 768, + "image_size": 512, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-06, + "max_image_size": { + "longest_edge": 512 + }, + "model_type": "idefics3_vision", + "num_attention_heads": 12, + "num_channels": 3, + "num_hidden_layers": 12, + "patch_size": 16, + "size": { + "longest_edge": 2048 + }, + "tie_word_embeddings": false, + "use_base_siglip": true + }, + "vocab_size": 49280 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/756910ddabb85196bbca.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/756910ddabb85196bbca.json new file mode 100644 index 0000000000000000000000000000000000000000..ee198b3e64a55b966b8741c75b37c3d76099a817 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/756910ddabb85196bbca.json @@ -0,0 +1,188 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "_task": "image-text-to-text", + "architectures": [ + "Idefics3ForConditionalGeneration" + ], + "dtype": "bfloat16", + "image_token_id": 49190, + "model_type": "idefics3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "checkpoint_revision": "7e3e67edbbed1bf9888184d9df282b700a323964", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 64, + "image_size": 512, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_num_images": 17, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "scale_factor": 4, + "text_config": { + "_attn_implementation_autoset": false, + "_flash_attn_2_enabled": true, + "_name_or_path": "None", + "architectures": [ + "VLlama3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 576, + "initializer_range": 0.041666666666666664, + "intermediate_size": 1536, + "is_llama_config": true, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "neftune_noise_alpha": 0.0, + "num_attention_heads": 9, + "num_hidden_layers": 30, + "num_key_value_heads": 3, + "pad_token_id": 2, + "perceiver_config": { + "_attn_implementation_autoset": false, + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "begin_suppress_tokens": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "silu", + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "length_penalty": 1.0, + "max_length": 20, + "min_length": 0, + "model_type": "vllama3", + "no_repeat_ngram_size": 0, + "num_beam_groups": 1, + "num_beams": 1, + "num_key_value_heads": 1, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "qk_layer_norms_perceiver": false, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "resampler_depth": 6, + "resampler_head_dim": 96, + "resampler_n_heads": 16, + "resampler_n_latents": 64, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "suppress_tokens": null, + "task_specific_params": null, + "temperature": 1.0, + "tf_legacy_loss": false, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.46.0", + "typical_p": 1.0, + "use_bfloat16": false + }, + "pixel_shuffle_factor": 4, + "pretraining_tp": 1, + "qk_layer_norms": false, + "rms_norm_eps": 1e-05, + "rope_interleaved": false, + "rope_scaling": null, + "rope_theta": 100000, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "use_resampler": false, + "vocab_size": 49280 + }, + "tie_word_embeddings": false, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "vision_config": { + "_attn_implementation_autoset": false, + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 768, + "image_size": 512, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-06, + "max_image_size": { + "longest_edge": 512 + }, + "model_type": "idefics3_vision", + "num_attention_heads": 12, + "num_channels": 3, + "num_hidden_layers": 12, + "patch_size": 16, + "size": { + "longest_edge": 2048 + }, + "tie_word_embeddings": false, + "use_base_siglip": true + }, + "vocab_size": 49280 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/969ba466803204052129.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/969ba466803204052129.json new file mode 100644 index 0000000000000000000000000000000000000000..d161d7c542206f6f05a664af4ddb990655981d95 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/969ba466803204052129.json @@ -0,0 +1,189 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "_task": "image-text-to-text", + "architectures": [ + "Idefics3ForConditionalGeneration" + ], + "dtype": "bfloat16", + "image_token_id": 49190, + "model_type": "idefics3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "checkpoint_revision": "7e3e67edbbed1bf9888184d9df282b700a323964", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 64, + "image_size": 512, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_num_images": 17, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "scale_factor": 4, + "text_config": { + "_attn_implementation_autoset": false, + "_flash_attn_2_enabled": true, + "_name_or_path": "None", + "architectures": [ + "VLlama3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 576, + "initializer_range": 0.041666666666666664, + "intermediate_size": 1536, + "is_llama_config": true, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "neftune_noise_alpha": 0.0, + "num_attention_heads": 9, + "num_hidden_layers": 30, + "num_key_value_heads": 3, + "pad_token_id": 2, + "perceiver_config": { + "_attn_implementation_autoset": false, + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "begin_suppress_tokens": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "silu", + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "length_penalty": 1.0, + "max_length": 20, + "min_length": 0, + "model_type": "vllama3", + "no_repeat_ngram_size": 0, + "num_beam_groups": 1, + "num_beams": 1, + "num_key_value_heads": 1, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "qk_layer_norms_perceiver": false, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "resampler_depth": 6, + "resampler_head_dim": 96, + "resampler_n_heads": 16, + "resampler_n_latents": 64, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "suppress_tokens": null, + "task_specific_params": null, + "temperature": 1.0, + "tf_legacy_loss": false, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.46.0", + "typical_p": 1.0, + "use_bfloat16": false + }, + "pixel_shuffle_factor": 4, + "pretraining_tp": 1, + "qk_layer_norms": false, + "rms_norm_eps": 1e-05, + "rope_interleaved": false, + "rope_scaling": null, + "rope_theta": 100000, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "use_resampler": false, + "vocab_size": 49280 + }, + "tie_word_embeddings": false, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "vision_config": { + "_attn_implementation_autoset": false, + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 768, + "image_size": 512, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-06, + "max_image_size": { + "longest_edge": 512 + }, + "model_type": "idefics3_vision", + "num_attention_heads": 12, + "num_channels": 3, + "num_hidden_layers": 12, + "patch_size": 16, + "size": { + "longest_edge": 2048 + }, + "tie_word_embeddings": false, + "use_base_siglip": true + }, + "vocab_size": 49280 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/efa7f046361aa22fb2b9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/efa7f046361aa22fb2b9.json new file mode 100644 index 0000000000000000000000000000000000000000..81172029b2ae3a962fb9759e64d903783ffbb177 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/efa7f046361aa22fb2b9.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/fa7d24fcf68294ebad29.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/fa7d24fcf68294ebad29.json new file mode 100644 index 0000000000000000000000000000000000000000..fe434f7147133402321ed0d03ac1be244d7ce767 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/fa7d24fcf68294ebad29.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/e5ddd3afb102c02baf93.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/e5ddd3afb102c02baf93.json new file mode 100644 index 0000000000000000000000000000000000000000..7c84929d07a94f5748d3db71a471ad556ca5d869 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/e5ddd3afb102c02baf93.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/51c77185b9832eaebdfc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/51c77185b9832eaebdfc.json new file mode 100644 index 0000000000000000000000000000000000000000..90a8c3f5a61d7f2ea03dc500de5771afb26bf9cf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/51c77185b9832eaebdfc.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/51eb5f08405da966cefd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/51eb5f08405da966cefd.json new file mode 100644 index 0000000000000000000000000000000000000000..09d956d9caf00958654ad919393f3e3230558f81 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/51eb5f08405da966cefd.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/6109da25218b5116e9b5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/6109da25218b5116e9b5.json new file mode 100644 index 0000000000000000000000000000000000000000..7a733ee7ddc05d93510a5731601ebffe03dea4eb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/6109da25218b5116e9b5.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/03a64a22d1b885eece61.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/03a64a22d1b885eece61.json new file mode 100644 index 0000000000000000000000000000000000000000..491849bf23560601bb4029a0deb3226d58c1eeaf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/03a64a22d1b885eece61.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/1c8b4a21eb41ff235945.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/1c8b4a21eb41ff235945.json new file mode 100644 index 0000000000000000000000000000000000000000..e3a4ed53c55cf5b6a3e26694d52b562dee6ee224 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/1c8b4a21eb41ff235945.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/b12b8be52c487dcc560f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/b12b8be52c487dcc560f.json new file mode 100644 index 0000000000000000000000000000000000000000..a40abd7fcd67f91db0178256b1581d066f17d13e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/b12b8be52c487dcc560f.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/97e03fcfc7f46ac8836e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/97e03fcfc7f46ac8836e.json new file mode 100644 index 0000000000000000000000000000000000000000..56e1604a7f579df3a3c713967d6f58b68267db94 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/97e03fcfc7f46ac8836e.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/078d41a850db4b8221d6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/078d41a850db4b8221d6.json new file mode 100644 index 0000000000000000000000000000000000000000..6064e52d6ccf0e5c786e134975e9a6f30f4dfc4a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/078d41a850db4b8221d6.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/2dc771f3c35b9af34ce4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/2dc771f3c35b9af34ce4.json new file mode 100644 index 0000000000000000000000000000000000000000..bb0fec7381ac4e4a22ecfa1152f72d171dc05a71 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/2dc771f3c35b9af34ce4.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/d04c2a3f18746af5f901.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/d04c2a3f18746af5f901.json new file mode 100644 index 0000000000000000000000000000000000000000..257c88286ef6d77fef15d888092a2b56ea8450f8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/d04c2a3f18746af5f901.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/29c61ad2f54baaec4c1d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/29c61ad2f54baaec4c1d.json new file mode 100644 index 0000000000000000000000000000000000000000..d5f28f2553ecf66f80d7f9b2fa557e044ea1592b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/29c61ad2f54baaec4c1d.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/7f05bde17c7b0ffeb657897697f23d182f406b76ced7f1b2cd5741dc93fe2e2e/dadf01a9f544218eabdd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/7f05bde17c7b0ffeb657897697f23d182f406b76ced7f1b2cd5741dc93fe2e2e/dadf01a9f544218eabdd.json new file mode 100644 index 0000000000000000000000000000000000000000..1c135e3215a6c0703734fe294f7ff5150f8f2f9f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/7f05bde17c7b0ffeb657897697f23d182f406b76ced7f1b2cd5741dc93fe2e2e/dadf01a9f544218eabdd.json @@ -0,0 +1,125 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "architectures": [ + "Llama4ForConditionalGeneration" + ], + "boi_token_index": 200080, + "dtype": "bfloat16", + "eoi_token_index": 200081, + "image_token_index": 200092, + "model_type": "llama4", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_attn_implementation_autoset": true, + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "bos_token_id": 200000, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "eos_token_id": [ + 200001, + 200007, + 200008 + ], + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "pad_token_id": 200018, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 + }, + "tie_word_embeddings": false, + "vision_config": { + "_attn_implementation_autoset": true, + "_vision_feature_layer": -1, + "attention_dropout": 0.0, + "hidden_act": "gelu", + "hidden_size": 32, + "image_size": 336, + "initializer_range": 0.02, + "intermediate_size": 128, + "model_type": "llama4_vision_model", + "multi_modal_projector_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 1, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "pixel_shuffle_ratio": 0.5, + "projector_dropout": 0.0, + "projector_input_dim": 32, + "projector_output_dim": 32, + "rope_theta": 10000, + "vision_feature_layer": -1, + "vision_feature_select_strategy": "default", + "vision_output_dim": 32 + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/3c497b71919d297a9da9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/3c497b71919d297a9da9.json new file mode 100644 index 0000000000000000000000000000000000000000..a1e805d40d2917bb4b9cfad43e6658e2f31baca4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/3c497b71919d297a9da9.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/d00ca5f7300e7c2696dd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/d00ca5f7300e7c2696dd.json new file mode 100644 index 0000000000000000000000000000000000000000..4c78a232ee13ec5dd9cf1975158f8beea11ed37f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/d00ca5f7300e7c2696dd.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/b5678f2b1f926f36a4fd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/b5678f2b1f926f36a4fd.json new file mode 100644 index 0000000000000000000000000000000000000000..6c639fd15663ed4abeadf091c0a6da223fc3576c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/b5678f2b1f926f36a4fd.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/21441e8ed07d8b61298d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/21441e8ed07d8b61298d.json new file mode 100644 index 0000000000000000000000000000000000000000..159fa561d5f4334ec53043a5b3e6bc39350f6e3c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/21441e8ed07d8b61298d.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/f65f144a780153ddd757.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/f65f144a780153ddd757.json new file mode 100644 index 0000000000000000000000000000000000000000..96c6a86f10b38f589536f35e19d64662a63b8a99 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/f65f144a780153ddd757.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/4d99c6a74830655285f6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/4d99c6a74830655285f6.json new file mode 100644 index 0000000000000000000000000000000000000000..4bd58e945c5173784100b6a3d52d1d7a2b4047a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/4d99c6a74830655285f6.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/cb2c5b9dc81e576d83aa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/cb2c5b9dc81e576d83aa.json new file mode 100644 index 0000000000000000000000000000000000000000..aded52ed1834120891017b3ebc7c59ab1036414e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/cb2c5b9dc81e576d83aa.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d020f018e0819410feb2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d020f018e0819410feb2.json new file mode 100644 index 0000000000000000000000000000000000000000..2ff127ac676e733998e52586939019b7845cc6c4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d020f018e0819410feb2.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/da4adf3105368a5df618.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/da4adf3105368a5df618.json new file mode 100644 index 0000000000000000000000000000000000000000..172aee4d2423bea0b12d812e8f4c94412f73e32c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/da4adf3105368a5df618.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/0e7a6f2933f99785cba6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/0e7a6f2933f99785cba6.json new file mode 100644 index 0000000000000000000000000000000000000000..d5d4bafd72d9e6e4262728a53f905b34c8fdf5a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/0e7a6f2933f99785cba6.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/51c77185b9832eaebdfc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/51c77185b9832eaebdfc.json new file mode 100644 index 0000000000000000000000000000000000000000..90a8c3f5a61d7f2ea03dc500de5771afb26bf9cf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/51c77185b9832eaebdfc.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0e7a6f2933f99785cba6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0e7a6f2933f99785cba6.json new file mode 100644 index 0000000000000000000000000000000000000000..d5d4bafd72d9e6e4262728a53f905b34c8fdf5a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0e7a6f2933f99785cba6.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/ibm-granite/granite-3.1-2b-instruct/efa7f046361aa22fb2b9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/ibm-granite/granite-3.1-2b-instruct/efa7f046361aa22fb2b9.json new file mode 100644 index 0000000000000000000000000000000000000000..81172029b2ae3a962fb9759e64d903783ffbb177 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/ibm-granite/granite-3.1-2b-instruct/efa7f046361aa22fb2b9.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/idefics3/HuggingFaceTB/SmolVLM-256M-Instruct/566b663bde0d89e24b29.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/idefics3/HuggingFaceTB/SmolVLM-256M-Instruct/566b663bde0d89e24b29.json new file mode 100644 index 0000000000000000000000000000000000000000..2099dbe7b62c3b2112501768bc8e74d8dcd2983c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/idefics3/HuggingFaceTB/SmolVLM-256M-Instruct/566b663bde0d89e24b29.json @@ -0,0 +1,188 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "_task": "image-text-to-text", + "architectures": [ + "Idefics3ForConditionalGeneration" + ], + "dtype": "bfloat16", + "image_token_id": 49190, + "model_type": "idefics3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "checkpoint_revision": "7e3e67edbbed1bf9888184d9df282b700a323964", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 64, + "image_size": 512, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_num_images": 1, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "scale_factor": 4, + "text_config": { + "_attn_implementation_autoset": false, + "_flash_attn_2_enabled": true, + "_name_or_path": "None", + "architectures": [ + "VLlama3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 576, + "initializer_range": 0.041666666666666664, + "intermediate_size": 1536, + "is_llama_config": true, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "neftune_noise_alpha": 0.0, + "num_attention_heads": 9, + "num_hidden_layers": 30, + "num_key_value_heads": 3, + "pad_token_id": 2, + "perceiver_config": { + "_attn_implementation_autoset": false, + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "begin_suppress_tokens": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "silu", + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "length_penalty": 1.0, + "max_length": 20, + "min_length": 0, + "model_type": "vllama3", + "no_repeat_ngram_size": 0, + "num_beam_groups": 1, + "num_beams": 1, + "num_key_value_heads": 1, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "qk_layer_norms_perceiver": false, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "resampler_depth": 6, + "resampler_head_dim": 96, + "resampler_n_heads": 16, + "resampler_n_latents": 64, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "suppress_tokens": null, + "task_specific_params": null, + "temperature": 1.0, + "tf_legacy_loss": false, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.46.0", + "typical_p": 1.0, + "use_bfloat16": false + }, + "pixel_shuffle_factor": 4, + "pretraining_tp": 1, + "qk_layer_norms": false, + "rms_norm_eps": 1e-05, + "rope_interleaved": false, + "rope_scaling": null, + "rope_theta": 100000, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "use_resampler": false, + "vocab_size": 49280 + }, + "tie_word_embeddings": false, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "vision_config": { + "_attn_implementation_autoset": false, + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 768, + "image_size": 512, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-06, + "max_image_size": { + "longest_edge": 512 + }, + "model_type": "idefics3_vision", + "num_attention_heads": 12, + "num_channels": 3, + "num_hidden_layers": 12, + "patch_size": 16, + "size": { + "longest_edge": 2048 + }, + "tie_word_embeddings": false, + "use_base_siglip": true + }, + "vocab_size": 49280 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/llamafactory/tiny-random-Llama-3/97e03fcfc7f46ac8836e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/llamafactory/tiny-random-Llama-3/97e03fcfc7f46ac8836e.json new file mode 100644 index 0000000000000000000000000000000000000000..56e1604a7f579df3a3c713967d6f58b68267db94 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/llamafactory/tiny-random-Llama-3/97e03fcfc7f46ac8836e.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/4d99c6a74830655285f6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/4d99c6a74830655285f6.json new file mode 100644 index 0000000000000000000000000000000000000000..4bd58e945c5173784100b6a3d52d1d7a2b4047a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/4d99c6a74830655285f6.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama4/tiny-random/llama-4/dadf01a9f544218eabdd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama4/tiny-random/llama-4/dadf01a9f544218eabdd.json new file mode 100644 index 0000000000000000000000000000000000000000..1c135e3215a6c0703734fe294f7ff5150f8f2f9f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama4/tiny-random/llama-4/dadf01a9f544218eabdd.json @@ -0,0 +1,125 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "architectures": [ + "Llama4ForConditionalGeneration" + ], + "boi_token_index": 200080, + "dtype": "bfloat16", + "eoi_token_index": 200081, + "image_token_index": 200092, + "model_type": "llama4", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_attn_implementation_autoset": true, + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "bos_token_id": 200000, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "eos_token_id": [ + 200001, + 200007, + 200008 + ], + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "pad_token_id": 200018, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 + }, + "tie_word_embeddings": false, + "vision_config": { + "_attn_implementation_autoset": true, + "_vision_feature_layer": -1, + "attention_dropout": 0.0, + "hidden_act": "gelu", + "hidden_size": 32, + "image_size": 336, + "initializer_range": 0.02, + "intermediate_size": 128, + "model_type": "llama4_vision_model", + "multi_modal_projector_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 1, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "pixel_shuffle_ratio": 0.5, + "projector_dropout": 0.0, + "projector_input_dim": 32, + "projector_output_dim": 32, + "rope_theta": 10000, + "vision_feature_layer": -1, + "vision_feature_select_strategy": "default", + "vision_output_dim": 32 + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/mixtral/dacorvo/Mixtral-tiny/03a64a22d1b885eece61.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/mixtral/dacorvo/Mixtral-tiny/03a64a22d1b885eece61.json new file mode 100644 index 0000000000000000000000000000000000000000..491849bf23560601bb4029a0deb3226d58c1eeaf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/mixtral/dacorvo/Mixtral-tiny/03a64a22d1b885eece61.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/microsoft/Phi-3.5-mini-instruct/d00ca5f7300e7c2696dd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/microsoft/Phi-3.5-mini-instruct/d00ca5f7300e7c2696dd.json new file mode 100644 index 0000000000000000000000000000000000000000..4c78a232ee13ec5dd9cf1975158f8beea11ed37f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/microsoft/Phi-3.5-mini-instruct/d00ca5f7300e7c2696dd.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/yujiepan/phi-4-tiny-random/e5ddd3afb102c02baf93.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/yujiepan/phi-4-tiny-random/e5ddd3afb102c02baf93.json new file mode 100644 index 0000000000000000000000000000000000000000..7c84929d07a94f5748d3db71a471ad556ca5d869 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/yujiepan/phi-4-tiny-random/e5ddd3afb102c02baf93.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/Qwen/Qwen2.5-0.5B/1c8b4a21eb41ff235945.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/Qwen/Qwen2.5-0.5B/1c8b4a21eb41ff235945.json new file mode 100644 index 0000000000000000000000000000000000000000..e3a4ed53c55cf5b6a3e26694d52b562dee6ee224 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/Qwen/Qwen2.5-0.5B/1c8b4a21eb41ff235945.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/29c61ad2f54baaec4c1d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/29c61ad2f54baaec4c1d.json new file mode 100644 index 0000000000000000000000000000000000000000..d5f28f2553ecf66f80d7f9b2fa557e044ea1592b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/29c61ad2f54baaec4c1d.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-0.6B/f65f144a780153ddd757.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-0.6B/f65f144a780153ddd757.json new file mode 100644 index 0000000000000000000000000000000000000000..96c6a86f10b38f589536f35e19d64662a63b8a99 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-0.6B/f65f144a780153ddd757.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/b5678f2b1f926f36a4fd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/b5678f2b1f926f36a4fd.json new file mode 100644 index 0000000000000000000000000000000000000000..6c639fd15663ed4abeadf091c0a6da223fc3576c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/b5678f2b1f926f36a4fd.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/smollm3/HuggingFaceTB/SmolLM3-3B/2dc771f3c35b9af34ce4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/smollm3/HuggingFaceTB/SmolLM3-3B/2dc771f3c35b9af34ce4.json new file mode 100644 index 0000000000000000000000000000000000000000..bb0fec7381ac4e4a22ecfa1152f72d171dc05a71 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/smollm3/HuggingFaceTB/SmolLM3-3B/2dc771f3c35b9af34ce4.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/5203c54260c86081b779.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/5203c54260c86081b779.json new file mode 100644 index 0000000000000000000000000000000000000000..66811fdb65e3dbdc1abf225d03988c5744a0e1bc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/03b0c107d1cede36875199a5d51decfe04c473de2af9999f8577a028d74d0ab4/5203c54260c86081b779.json @@ -0,0 +1,189 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "_task": "image-text-to-text", + "architectures": [ + "Idefics3ForConditionalGeneration" + ], + "dtype": "bfloat16", + "image_token_id": 49190, + "model_type": "idefics3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "checkpoint_revision": "7e3e67edbbed1bf9888184d9df282b700a323964", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 64, + "image_size": 512, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 17, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "scale_factor": 4, + "text_config": { + "_attn_implementation_autoset": false, + "_flash_attn_2_enabled": true, + "_name_or_path": "None", + "architectures": [ + "VLlama3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 576, + "initializer_range": 0.041666666666666664, + "intermediate_size": 1536, + "is_llama_config": true, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "neftune_noise_alpha": 0.0, + "num_attention_heads": 9, + "num_hidden_layers": 30, + "num_key_value_heads": 3, + "pad_token_id": 2, + "perceiver_config": { + "_attn_implementation_autoset": false, + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "begin_suppress_tokens": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "silu", + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "length_penalty": 1.0, + "max_length": 20, + "min_length": 0, + "model_type": "vllama3", + "no_repeat_ngram_size": 0, + "num_beam_groups": 1, + "num_beams": 1, + "num_key_value_heads": 1, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "qk_layer_norms_perceiver": false, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "resampler_depth": 6, + "resampler_head_dim": 96, + "resampler_n_heads": 16, + "resampler_n_latents": 64, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "suppress_tokens": null, + "task_specific_params": null, + "temperature": 1.0, + "tf_legacy_loss": false, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.46.0", + "typical_p": 1.0, + "use_bfloat16": false + }, + "pixel_shuffle_factor": 4, + "pretraining_tp": 1, + "qk_layer_norms": false, + "rms_norm_eps": 1e-05, + "rope_interleaved": false, + "rope_scaling": null, + "rope_theta": 100000, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "use_resampler": false, + "vocab_size": 49280 + }, + "tie_word_embeddings": false, + "transformers.js_config": { + "kv_cache_dtype": { + "fp16": "float16", + "q4f16": "float16" + } + }, + "use_cache": true, + "vision_config": { + "_attn_implementation_autoset": false, + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 768, + "image_size": 512, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-06, + "max_image_size": { + "longest_edge": 512 + }, + "model_type": "idefics3_vision", + "num_attention_heads": 12, + "num_channels": 3, + "num_hidden_layers": 12, + "patch_size": 16, + "size": { + "longest_edge": 2048 + }, + "tie_word_embeddings": false, + "use_base_siglip": true + }, + "vocab_size": 49280 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/179f57339300218ee5f1e16199abfeaede9aa891026ff77f3e8e13d9e193a48f/b4b9b98e1b50f733a7a8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/179f57339300218ee5f1e16199abfeaede9aa891026ff77f3e8e13d9e193a48f/b4b9b98e1b50f733a7a8.json new file mode 100644 index 0000000000000000000000000000000000000000..c28a2a7a64c2f6322012947d60af826d5c037f65 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/179f57339300218ee5f1e16199abfeaede9aa891026ff77f3e8e13d9e193a48f/b4b9b98e1b50f733a7a8.json @@ -0,0 +1,97 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gemma-3-vlm", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tengomucho/tiny-random-gemma-3-vlm", + "checkpoint_revision": "fbd846052676e5f7b417d7df6a736f5e980106b2", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 1, + "max_batch_size": 1, + "max_context_length": 2048, + "max_num_images": 5, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_key_value_heads": 2, + "text_config": { + "_sliding_window_pattern": 2, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 128, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 128, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 168, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 2, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 32, + "image_size": 896, + "intermediate_size": 128, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 2, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/179f57339300218ee5f1e16199abfeaede9aa891026ff77f3e8e13d9e193a48f/f6c4e510f62a9f40e4d4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/179f57339300218ee5f1e16199abfeaede9aa891026ff77f3e8e13d9e193a48f/f6c4e510f62a9f40e4d4.json new file mode 100644 index 0000000000000000000000000000000000000000..3df4a161900675e3d2a8008d46a9b5369484a5c2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/179f57339300218ee5f1e16199abfeaede9aa891026ff77f3e8e13d9e193a48f/f6c4e510f62a9f40e4d4.json @@ -0,0 +1,97 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gemma-3-vlm", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "tengomucho/tiny-random-gemma-3-vlm", + "checkpoint_revision": "fbd846052676e5f7b417d7df6a736f5e980106b2", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 5, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_key_value_heads": 2, + "text_config": { + "_sliding_window_pattern": 2, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 128, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 128, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 168, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 2, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 32, + "image_size": 896, + "intermediate_size": 128, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 2, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/182a37bfe893fd31970187d70d32e6d0bd7b48dbcc75e18f38fe640919803c7f/17e814f23198609bb428.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/182a37bfe893fd31970187d70d32e6d0bd7b48dbcc75e18f38fe640919803c7f/17e814f23198609bb428.json new file mode 100644 index 0000000000000000000000000000000000000000..34ea958e9a9202d4fe9180a0a0b2a1064dc4d7c3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/182a37bfe893fd31970187d70d32e6d0bd7b48dbcc75e18f38fe640919803c7f/17e814f23198609bb428.json @@ -0,0 +1,96 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/gemma-3", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/gemma-3", + "checkpoint_revision": "69a78d1ad0ad66620c43579acd1327553713e22a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_num_images": 1, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_sliding_window_pattern": 2, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 32, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 128, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 1, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 168, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 2, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 32, + "image_size": 896, + "intermediate_size": 128, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 1, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/182a37bfe893fd31970187d70d32e6d0bd7b48dbcc75e18f38fe640919803c7f/e99fafc5c2c9d411da82.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/182a37bfe893fd31970187d70d32e6d0bd7b48dbcc75e18f38fe640919803c7f/e99fafc5c2c9d411da82.json new file mode 100644 index 0000000000000000000000000000000000000000..f5658ef702d2412ade56ca17b3d9beed7c2dc72a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/182a37bfe893fd31970187d70d32e6d0bd7b48dbcc75e18f38fe640919803c7f/e99fafc5c2c9d411da82.json @@ -0,0 +1,96 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/gemma-3", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "tiny-random/gemma-3", + "checkpoint_revision": "69a78d1ad0ad66620c43579acd1327553713e22a", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 5, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_sliding_window_pattern": 2, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 32, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 128, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 1, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 168, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 2, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 32, + "image_size": 896, + "intermediate_size": 128, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 1, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/7c802278b11187486c40.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/7c802278b11187486c40.json new file mode 100644 index 0000000000000000000000000000000000000000..3b2a2936d7054587c893f2a6930171c1dcd40668 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/7c802278b11187486c40.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/c1450d3a39bd7e0c7df9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/c1450d3a39bd7e0c7df9.json new file mode 100644 index 0000000000000000000000000000000000000000..16e7200a78c0e8b5d2f59d6737555bdf868482bd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/c1450d3a39bd7e0c7df9.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/fc27c32a0c4ec3f887e9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/fc27c32a0c4ec3f887e9.json new file mode 100644 index 0000000000000000000000000000000000000000..379ebf479df161b3a393d8bfb623a9005228b744 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/fc27c32a0c4ec3f887e9.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/46b5a08db4c9e1e310e2129802471a3e7b09cdb98e2879268b07c333fb3357b3/ef9a5b9f403c21576da2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/46b5a08db4c9e1e310e2129802471a3e7b09cdb98e2879268b07c333fb3357b3/ef9a5b9f403c21576da2.json new file mode 100644 index 0000000000000000000000000000000000000000..d00083be0500e0873891a28add78e24ba64a7c05 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/46b5a08db4c9e1e310e2129802471a3e7b09cdb98e2879268b07c333fb3357b3/ef9a5b9f403c21576da2.json @@ -0,0 +1,126 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-4b-it", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-4b-it", + "checkpoint_revision": "093f9f388b31de276ce2de164bdc2081324b9767", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 1, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_sliding_window_pattern": 6, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 10240, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 8, + "num_hidden_layers": 34, + "num_key_value_heads": 4, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 1152, + "image_size": 896, + "intermediate_size": 4304, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 27, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/0538515e2eab8cf1a81f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/0538515e2eab8cf1a81f.json new file mode 100644 index 0000000000000000000000000000000000000000..8742379d6487566d9936ab114df8cbcd8af5fb03 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/0538515e2eab8cf1a81f.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/18421eedc4d9b7b7fb51.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/18421eedc4d9b7b7fb51.json new file mode 100644 index 0000000000000000000000000000000000000000..b074f1a7f0c45f977a77fabf517c1d16b10f864a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/18421eedc4d9b7b7fb51.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/5756ac9be9333f9c8d82.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/5756ac9be9333f9c8d82.json new file mode 100644 index 0000000000000000000000000000000000000000..2134526f2a42b1697204c65fda95c4675a9d4a12 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/5756ac9be9333f9c8d82.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/69ca5316cb953eb61fa2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/69ca5316cb953eb61fa2.json new file mode 100644 index 0000000000000000000000000000000000000000..7f2694b630dd0e7668f0c5d2aa2cba5c6f53bb84 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/69ca5316cb953eb61fa2.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/d3503138cf92a3bb6ba7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/d3503138cf92a3bb6ba7.json new file mode 100644 index 0000000000000000000000000000000000000000..0a0a7171224a6954e6fb75c67cdc3a9712c4d411 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/d3503138cf92a3bb6ba7.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/f7dfefb13d09e6187d80.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/f7dfefb13d09e6187d80.json new file mode 100644 index 0000000000000000000000000000000000000000..6abc7a37a5f514a83116dea634cc0c2887da444a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/f7dfefb13d09e6187d80.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/1a184dc437617b13d697.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/1a184dc437617b13d697.json new file mode 100644 index 0000000000000000000000000000000000000000..682e9c6cdcea450151d055de5a15b60705a3bc54 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/1a184dc437617b13d697.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/ba14ae2ea4511b47132d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/ba14ae2ea4511b47132d.json new file mode 100644 index 0000000000000000000000000000000000000000..d5d828a53f9e5853f63a1846df7ffda809593a98 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/ba14ae2ea4511b47132d.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/00688423de1428d98e68.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/00688423de1428d98e68.json new file mode 100644 index 0000000000000000000000000000000000000000..784bde98726778605bbd930c15caf801b35223e2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/00688423de1428d98e68.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/87e6309b5c6fe024caaa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/87e6309b5c6fe024caaa.json new file mode 100644 index 0000000000000000000000000000000000000000..c536a3a345cf752b3921f05dd5ff96c4b183259a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/87e6309b5c6fe024caaa.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/8eff71913f842fa8f404.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/8eff71913f842fa8f404.json new file mode 100644 index 0000000000000000000000000000000000000000..18825581fc8909b28bdfe582660101cfafc1875a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/8eff71913f842fa8f404.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/b8db5a05f548780ebfae.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/b8db5a05f548780ebfae.json new file mode 100644 index 0000000000000000000000000000000000000000..d810d715897b894c4a59d497c48df5f2faf3e17f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/b8db5a05f548780ebfae.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/dfcd91ed17670ed71f9d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/dfcd91ed17670ed71f9d.json new file mode 100644 index 0000000000000000000000000000000000000000..a22a96adc3a50b5b29b60113f44aa92da4ec9443 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/dfcd91ed17670ed71f9d.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/ed4c5924c8e305088a85.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/ed4c5924c8e305088a85.json new file mode 100644 index 0000000000000000000000000000000000000000..ec26c0415d77e94862e05a00dce62a443f1168b9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/ed4c5924c8e305088a85.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d2eb4ac153ba8d26f615a30665abec77e91fe75a5262767a548a93ec5fbcccbc/24596f911d11ff4166b8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d2eb4ac153ba8d26f615a30665abec77e91fe75a5262767a548a93ec5fbcccbc/24596f911d11ff4166b8.json new file mode 100644 index 0000000000000000000000000000000000000000..9e312416a877a439833648304e50ffd2e3887c77 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d2eb4ac153ba8d26f615a30665abec77e91fe75a5262767a548a93ec5fbcccbc/24596f911d11ff4166b8.json @@ -0,0 +1,97 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gemma-3-vlm", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "tengomucho/tiny-random-gemma-3-vlm", + "checkpoint_revision": "880db0f4a0e1000cec5193ea9a93c30ce425c0e5", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 5, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_key_value_heads": 2, + "text_config": { + "_sliding_window_pattern": 2, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 64, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 128, + "initializer_range": 0.02, + "intermediate_size": 256, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 168, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 2, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 32, + "image_size": 896, + "intermediate_size": 128, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 2, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d2eb4ac153ba8d26f615a30665abec77e91fe75a5262767a548a93ec5fbcccbc/daa804ac0ea3a3db3a93.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d2eb4ac153ba8d26f615a30665abec77e91fe75a5262767a548a93ec5fbcccbc/daa804ac0ea3a3db3a93.json new file mode 100644 index 0000000000000000000000000000000000000000..cf8aa85983b2a1fdf2d21f6e1a7b2d64e59b3536 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d2eb4ac153ba8d26f615a30665abec77e91fe75a5262767a548a93ec5fbcccbc/daa804ac0ea3a3db3a93.json @@ -0,0 +1,97 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gemma-3-vlm", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tengomucho/tiny-random-gemma-3-vlm", + "checkpoint_revision": "880db0f4a0e1000cec5193ea9a93c30ce425c0e5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_num_images": 5, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_key_value_heads": 2, + "text_config": { + "_sliding_window_pattern": 2, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 64, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 128, + "initializer_range": 0.02, + "intermediate_size": 256, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 168, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 2, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 32, + "image_size": 896, + "intermediate_size": 128, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 2, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/e0a07fcbaaaca6e450fabdb638553525f4bfb07bcac85320ca02556f31cd98cf/4e3ac574a30b9827ea5e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/e0a07fcbaaaca6e450fabdb638553525f4bfb07bcac85320ca02556f31cd98cf/4e3ac574a30b9827ea5e.json new file mode 100644 index 0000000000000000000000000000000000000000..93cd53b01a6d7b350c28fa83efd72107cd0131a5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/e0a07fcbaaaca6e450fabdb638553525f4bfb07bcac85320ca02556f31cd98cf/4e3ac574a30b9827ea5e.json @@ -0,0 +1,130 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-4b-it", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-4b-it", + "checkpoint_revision": "bf46152c47f5dd20b896357cb51abc4c03b8ee8c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 1, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_sliding_window_pattern": 6, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 10240, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 8, + "num_hidden_layers": 34, + "num_key_value_heads": 4, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "unsloth_fixed": true, + "vision_config": { + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 1152, + "image_size": 896, + "intermediate_size": 4304, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 27, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/e0a07fcbaaaca6e450fabdb638553525f4bfb07bcac85320ca02556f31cd98cf/7ef65b5757e03512715d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/e0a07fcbaaaca6e450fabdb638553525f4bfb07bcac85320ca02556f31cd98cf/7ef65b5757e03512715d.json new file mode 100644 index 0000000000000000000000000000000000000000..f016436e7cac1788229726e9f5edb0df3f8d5a01 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/e0a07fcbaaaca6e450fabdb638553525f4bfb07bcac85320ca02556f31cd98cf/7ef65b5757e03512715d.json @@ -0,0 +1,130 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-4b-it", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-4b-it", + "checkpoint_revision": "bf46152c47f5dd20b896357cb51abc4c03b8ee8c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 3, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_sliding_window_pattern": 6, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 10240, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 8, + "num_hidden_layers": 34, + "num_key_value_heads": 4, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "unsloth_fixed": true, + "vision_config": { + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 1152, + "image_size": 896, + "intermediate_size": 4304, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 27, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/e0a07fcbaaaca6e450fabdb638553525f4bfb07bcac85320ca02556f31cd98cf/bea7134934abbc294477.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/e0a07fcbaaaca6e450fabdb638553525f4bfb07bcac85320ca02556f31cd98cf/bea7134934abbc294477.json new file mode 100644 index 0000000000000000000000000000000000000000..feb26db043f4c8179c4939afd3aebc9e20434d1b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/e0a07fcbaaaca6e450fabdb638553525f4bfb07bcac85320ca02556f31cd98cf/bea7134934abbc294477.json @@ -0,0 +1,130 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-4b-it", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-4b-it", + "checkpoint_revision": "bf46152c47f5dd20b896357cb51abc4c03b8ee8c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 5, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_sliding_window_pattern": 6, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 10240, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 8, + "num_hidden_layers": 34, + "num_key_value_heads": 4, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "unsloth_fixed": true, + "vision_config": { + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 1152, + "image_size": 896, + "intermediate_size": 4304, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 27, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/f2b3698075778373c8db941c7e90213b3c7a6dc311947938cb2ff93518c6a4cc/1f8dd49c1101a3bcbfaf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/f2b3698075778373c8db941c7e90213b3c7a6dc311947938cb2ff93518c6a4cc/1f8dd49c1101a3bcbfaf.json new file mode 100644 index 0000000000000000000000000000000000000000..051499b3e08b8d5b27babc828a9658670999a4a9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/f2b3698075778373c8db941c7e90213b3c7a6dc311947938cb2ff93518c6a4cc/1f8dd49c1101a3bcbfaf.json @@ -0,0 +1,97 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gemma-3-vlm", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "tengomucho/tiny-random-gemma-3-vlm", + "checkpoint_revision": "8e5127a746b5297bc637a347f6279cb34a0d1d9d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 5, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_key_value_heads": 2, + "text_config": { + "_sliding_window_pattern": 2, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 32, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 128, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 168, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 2, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 32, + "image_size": 896, + "intermediate_size": 128, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 2, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/google/gemma-3-4b-it/ef9a5b9f403c21576da2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/google/gemma-3-4b-it/ef9a5b9f403c21576da2.json new file mode 100644 index 0000000000000000000000000000000000000000..d00083be0500e0873891a28add78e24ba64a7c05 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/google/gemma-3-4b-it/ef9a5b9f403c21576da2.json @@ -0,0 +1,126 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-4b-it", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-4b-it", + "checkpoint_revision": "093f9f388b31de276ce2de164bdc2081324b9767", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 1, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_sliding_window_pattern": 6, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 10240, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 8, + "num_hidden_layers": 34, + "num_key_value_heads": 4, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 1152, + "image_size": 896, + "intermediate_size": 4304, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 27, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/tengomucho/tiny-random-gemma-3-vlm/1f8dd49c1101a3bcbfaf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/tengomucho/tiny-random-gemma-3-vlm/1f8dd49c1101a3bcbfaf.json new file mode 100644 index 0000000000000000000000000000000000000000..051499b3e08b8d5b27babc828a9658670999a4a9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/tengomucho/tiny-random-gemma-3-vlm/1f8dd49c1101a3bcbfaf.json @@ -0,0 +1,97 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tengomucho/tiny-random-gemma-3-vlm", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "tengomucho/tiny-random-gemma-3-vlm", + "checkpoint_revision": "8e5127a746b5297bc637a347f6279cb34a0d1d9d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 5, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_key_value_heads": 2, + "text_config": { + "_sliding_window_pattern": 2, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 32, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 128, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 168, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 2, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 32, + "image_size": 896, + "intermediate_size": 128, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 2, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/tiny-random/gemma-3/17e814f23198609bb428.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/tiny-random/gemma-3/17e814f23198609bb428.json new file mode 100644 index 0000000000000000000000000000000000000000..34ea958e9a9202d4fe9180a0a0b2a1064dc4d7c3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/tiny-random/gemma-3/17e814f23198609bb428.json @@ -0,0 +1,96 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/gemma-3", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/gemma-3", + "checkpoint_revision": "69a78d1ad0ad66620c43579acd1327553713e22a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_num_images": 1, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_sliding_window_pattern": 2, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 32, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 128, + "layer_types": [ + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 1, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 168, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 2, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 32, + "image_size": 896, + "intermediate_size": 128, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 1, + "num_channels": 3, + "num_hidden_layers": 2, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/unsloth/gemma-3-4b-it/4e3ac574a30b9827ea5e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/unsloth/gemma-3-4b-it/4e3ac574a30b9827ea5e.json new file mode 100644 index 0000000000000000000000000000000000000000..93cd53b01a6d7b350c28fa83efd72107cd0131a5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3/unsloth/gemma-3-4b-it/4e3ac574a30b9827ea5e.json @@ -0,0 +1,130 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-4b-it", + "_task": "image-text-to-text", + "architectures": [ + "Gemma3ForConditionalGeneration" + ], + "boi_token_index": 255999, + "dtype": "bfloat16", + "eoi_token_index": 256000, + "image_token_index": 262144, + "initializer_range": 0.02, + "mm_tokens_per_image": 256, + "model_type": "gemma3", + "neuron": { + "_serialized_key": "NxDVLMNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-4b-it", + "checkpoint_revision": "bf46152c47f5dd20b896357cb51abc4c03b8ee8c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "image_seq_len": 256, + "image_size": 896, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_num_images": 1, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "text_config": { + "_sliding_window_pattern": 6, + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 10240, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma3_text", + "num_attention_heads": 8, + "num_hidden_layers": 34, + "num_key_value_heads": 4, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": { + "factor": 8.0, + "rope_type": "linear" + }, + "rope_theta": 1000000.0, + "sliding_window": 1024, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262208 + }, + "unsloth_fixed": true, + "vision_config": { + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 1152, + "image_size": 896, + "intermediate_size": 4304, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 27, + "patch_size": 14, + "vision_use_head": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-1b-it/b8db5a05f548780ebfae.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-1b-it/b8db5a05f548780ebfae.json new file mode 100644 index 0000000000000000000000000000000000000000..d810d715897b894c4a59d497c48df5f2faf3e17f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-1b-it/b8db5a05f548780ebfae.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-270m-it/18421eedc4d9b7b7fb51.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-270m-it/18421eedc4d9b7b7fb51.json new file mode 100644 index 0000000000000000000000000000000000000000..b074f1a7f0c45f977a77fabf517c1d16b10f864a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-270m-it/18421eedc4d9b7b7fb51.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/unsloth/gemma-3-270m-it/7c802278b11187486c40.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/unsloth/gemma-3-270m-it/7c802278b11187486c40.json new file mode 100644 index 0000000000000000000000000000000000000000..3b2a2936d7054587c893f2a6930171c1dcd40668 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/unsloth/gemma-3-270m-it/7c802278b11187486c40.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/llama/unsloth/Llama-3.2-1B-Instruct/ba14ae2ea4511b47132d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/llama/unsloth/Llama-3.2-1B-Instruct/ba14ae2ea4511b47132d.json new file mode 100644 index 0000000000000000000000000000000000000000..d5d828a53f9e5853f63a1846df7ffda809593a98 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/llama/unsloth/Llama-3.2-1B-Instruct/ba14ae2ea4511b47132d.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04c64527506049d877d9+b02446f6/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_04c64527506049d877d9+b02446f6/model.neff index 0f2838c88350affb455a209eeb3705d8bdf6fe6a..8eb38fe9fcbc5fb707bf05b6925fac39469e6fd3 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_04c64527506049d877d9+b02446f6/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04c64527506049d877d9+b02446f6/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae1f3f7cb2cc80137f4849385dad6bb0743f033cf7ede6d04c9dc4081d8e1d82 +oid sha256:85283fffc5dd91d50382e89d49467bb31e57616c3327f0f2a149c2f2e9b31c23 size 30598144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04c64527506049d877d9+b02446f6/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_04c64527506049d877d9+b02446f6/wrapped_neff.hlo index cc81cf60d9dc7efce8ffdb1cd9ab48cb76dd58fa..0bd498a9bab58944e5bcf6bf0586a2f5cfd94f23 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_04c64527506049d877d9+b02446f6/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04c64527506049d877d9+b02446f6/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4fc9bb782609dfaeb2b74d4fdc0108e917006c4f1622b3a05d2f0ebefc20b16 +oid sha256:b24c80f0983c8ad3f70ca38bbd54c6ac68250c4206c94eed336a7ab2dec68156 size 30708309 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..63679a09567c314bb9c60413c1d3d88945055284 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03adffb1ac29009d5dc73c5a1cb3238a85f92058a525ad350f72cac0708fce18 +size 1779146 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4853a7076094b2ca10c6f2d98033b2172d196259 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597f4f3cdcd094d9b61867edb5be9b2772bccfdc5109041406729cae7091bb0f +size 32287744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c6da4c847265a8aa877d5a7878eaaffa9507371 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_0cf9bbe5-cda4-4ba3-9500-b99a12d0191e/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65cdd33ee539447553160cece8704c1e2e8745cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409d8cc6bc5037c585be1e7ac4f332ca2d5938432853ee512e99ced1ae7345f +size 8611 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c02e16b4e9858c81ace6383e4bcfdf33fefa4702 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_0b322a637f64f8cfeaaa+8e0289b7/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ca8b1970ca47245f090982c5f1ed42899a3f6c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_8e875688-1752-4989-a92b-c82bf8ad309c/compiler_workdir/ScaledQKComparisonModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..25766bae20d721f9d5c2f3ea835c3631e3010787 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138b427bf7769ac62d72b0c008b58065b93c16f2f09976be109e29ef90de7854 +size 4940 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..be5033cc8dd8f2020e6de505cee497405c2fbb44 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0cb2d9cedfdb8f619286+2e929b57/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c995144267e804e459b4807a5382270f1adce21f317afd8a703ef2ca12e9287c +size 390144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..46eccc27469371764dbbfceabc96dbec97a23b95 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03a41dc87295d12b16c622fd1094b1579271c83fb5db911125b3dbfe5063095 +size 102657 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..890a31a2bb46b946c8ad0b46d032d3a964870b56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_105c0ff965441eae0a1c+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ecd05e592693d080982bb3c10303a511a15867bc9590fb12a9f450e01e8e55f +size 533504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9c277888420f00defd99fc3c102007a98b09199d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..412fd235dd733bdd9282f1bd5c9498152ec3548c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cc6f8d95a871997518915d5a2afe24c7416df74799ccc2f42e213075c4e40b9 +size 959740 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5826793d163b0cfca3981210ab77d25fe67b2439 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12e7cddd1a2b1bbf480f+6170d8e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1925084d6ae5b01572b11fb30cb3766a41c175626f4a08e58c137a427acd8e1b +size 12872704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3ccf8abc32ecf51e263054ec6c5f7124504c4fd3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570017663a6900b5493f75c47921e938e0e1744a44c1f04d5f4536bfa44d644c +size 1779218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..44bccb0ecee120cf84f9f3ed0cf1d0d1464a22c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9544338723eb399adde799a747e2c438ff69fdfd1deeec0e1ce187b38edab180 +size 83313664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7bee26a8d7ecc1b891c4a0c8b53d46a17e62ee34 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_b4137ebb-97b3-4dfb-b478-6ff75b21ebe0/compiler_workdir/OldSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fe2330973b305a8e95d46b9a4803dc4fb5111046 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:931bab94e0ad026baae4dc1e3e73ade90e2954dbeb86887622692d40ecb80fe9 +size 4706 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7d4b872eb974da53b834672643607f302f4f5eb0 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_1477804b8d4dee16534e+5047b4cf/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d8e6ebe2b9afa3f2e58f4906cfd006b5e6b4e20 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbfd2d97c8bd9f6ae6e1eac3dc6590c9c46e4524a9233efe50ced80fdc586880 +size 1048014 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2c567e7c31226886f26369e959cd8bd53259c723 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15503648c89ebbfa6dc4+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5197489ec8a4607c85543665a90d5492077cdd6b0c4c3948b510e947ab322668 +size 32861184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a5099ac1e9ba31245410f681471d7305b94b69f5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b75e291602f7ae0685f835d4e8c2f0f41e4690a00ef27e66b74ec9f26a57cf9 +size 597952 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a25de6e37e4585078386ec736fc822132df72968 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5994a6c5d0a50da5d31049009e3972755d3b5d3fb8d86d210b1f289827dd95d7 +size 7394304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff index 746e9dbb8c0595eb34d6d692b5649b35c0e857aa..16454301701bcde1e8cc715e4bf84698b69012ea 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3a40a794d892d5d9cfb8c194ada140e91d4be151 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_vision_encoder/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a588192b07e15f7d3fd3b15b3359b211667db0c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0072b120d9879fb627367933dbb0852e46475e73544d8b55dbce5f3b65019786 +size 169609 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..08524debf982fd35d0d33bc56d340068d2c5296a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ddd0a7101ccab94307a8f46d474e82830909d3749dfbe0b38c9d3d32ea9018 +size 25345024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2d88bf1f86261f946a658c5e9588d682a1b7f6b0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17bab0cf99484f23e361+b02446f6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1216c94caabf068b88390f3a5003cc7ccce125d78d567025ada2ee9a2bfe61 +size 25455330 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c00b27ef51756ae833530d177d946644dd203570 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d06e13fb7c050bb1840d5521e950d28037d97bc6b245f8ac9dd44e23e1cbfeb8 +size 597952 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..30d41b269794cf102c88f29b6b383de23d1b085e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4da2c1a0a0177b946afd719d4b1361c4458c97c02a2dd404f8e7cff8aa5abc9 +size 7394304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..33ff9c762e801ba28bd83254ec39542561b1c127 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_ae3de8dc-521b-489e-b0f7-8de4a42742e5/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65cdd33ee539447553160cece8704c1e2e8745cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409d8cc6bc5037c585be1e7ac4f332ca2d5938432853ee512e99ced1ae7345f +size 8611 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aa4c1eeaee50c1a717654547f7c60d8f3d9a59ec Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_1980033719e8e5ff938d+9462c5c2/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..24d8cb291ca094f6322c42185dfb92dd055f23bd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_4f5a149a-03be-4cf9-a17d-e14642250d11/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9de08d5f638a4e4274a1a3ac42b4b8783c200b84 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_1a003e2d7f6e57c2ced6+adae9597/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..469ed1aa6baed1e387e8320dc57d000b74f40772 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_1cc78844-e8b4-43e0-ba44-b79bbf438b36/compiler_workdir/ScaledQKComparisonModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..25766bae20d721f9d5c2f3ea835c3631e3010787 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138b427bf7769ac62d72b0c008b58065b93c16f2f09976be109e29ef90de7854 +size 4940 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..58653588d5f02f9b35cf4245e830b2a149d0afd8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b43d7c01692b5e7842e+9e8e849c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b43771b36045ed2db972b64bde026addb1f81471b9c67074c847f79c215377 +size 390144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..312254947785a14deb5ca279ee14723b0288a275 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cffabd2edca1fa4e310a911a7f1ee3949022a4d512d81ab71073566d4e75b82 +size 842788 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..072da2af0574c0d18af1ac6ca8b7e20e805d3870 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b397f20205d28dac763ae3e1b0b2d6cc82391a85343d2e59a61b09c83f0b41a2 +size 15289344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cd65345a6a903baf951afaa5f3a5048cc8f4ff22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7789b9e434dece65641d8f5f98d2b54d005c9a140d377677724c8cef055f366 +size 15466469 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7a1c0dd809db95e8204e911cad257c0b393f8217 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_eaee0352-c67e-47c8-a351-639892e8cf13/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65cdd33ee539447553160cece8704c1e2e8745cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409d8cc6bc5037c585be1e7ac4f332ca2d5938432853ee512e99ced1ae7345f +size 8611 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a012da644f54aaa3ac64352fc0e9ff4fcda471b4 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_1bd0b808e14d551efa66+98b5e993/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e80cb62d53db2fbce7734e1dd365ac6d62e72b61 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_06b8f181-fb50-4b6f-9311-a12a72ecf2d9/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..42e36dec9fc65ec3bab4ddbbcd887080608ce2f8 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_1f102454055b7cfd0c9d+585a7fb2/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3a40a794d892d5d9cfb8c194ada140e91d4be151 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_vision_encoder/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2aafc38d5c6825e402b57074049575099ae4efaf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e1319bebe52030d6832cf7c1077261076ffc38d9ae3bda83961c8b0ab28479 +size 160725 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6f7a0612f524647cfac3d46cb4b75017cc688578 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e50001bbdc5fd906b1eb0ace8a1aadad4ab5851caeab7dfc6f5aee099dd4dc6 +size 58317824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1da40f8aa36335956b76eb293f39c08494bb5058 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1f6ea746117003b733f2+b02446f6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f75be03f1ef3fe62e0a78e283ce9ad3f4886f2b6c41aed904b4ff23fb11574b +size 58427989 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..040272c9057f3f3f3191a41cf64a2124a750e867 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5883954151d2373de4a87a2f5b2a08a670d791c6f5878fc43922d2793441a8 +size 107163 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..70c1d2abe7c1fea5c76edc5a63f6fb3b3c6d0f65 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_213ad9728f93ce707ff0+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a007596edd2bf3db665fb3c2069b7b17c8be6f389d909cf1265b788f36f242 +size 656384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_27b2c4720a6a1662f8b5+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_27b2c4720a6a1662f8b5+24129607/model.hlo_module.pb index 90a9be2fe1a681994bf75de85c03215f147cd2b6..d105f364d288ca1e759de41c8ae5eeb8408d54f8 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_27b2c4720a6a1662f8b5+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_27b2c4720a6a1662f8b5+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1dddf91c6a86546b55890eda4dd0026330a7ea59d853f39f8ac2758b366729a6 +oid sha256:cc270384625da985be79e1806b1b95e3c06eaccdcbfc110c85006b28c1f920f9 size 961456 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_27b2c4720a6a1662f8b5+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_27b2c4720a6a1662f8b5+24129607/model.neff index 6f664ff148e06931f35dea1e5aa23319736c9efe..96a98e57b821a55a452bce88750e25d3b942b4ff 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_27b2c4720a6a1662f8b5+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_27b2c4720a6a1662f8b5+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c9b303d5ffb9dbe60b8a13f4198948947628044e3564750dd748a1831dcfceb +oid sha256:d6dfd1091bff77d12e9c39cd483a4199ea945392b8e9616733c3f3e7375395b0 size 14654464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3f3f63cfc811d7c29767d385ff3656cb30b13037 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c3ce05844c456292525d86ad5934b3e4a66af2af353bf28193385d439a3b87 +size 1308 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a71dc2d1ffe5ee97228619fa972fce187f4aae2a Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_2873803374188476844+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..10a23461f4b715bad574abd30c13f0dff5bef81c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3aeeb1d9e3ac163b82c3cb7891c3a8df2dc83db68134f2793e5ceb80eaa62e +size 383063 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6f7c1166e3dd405dfda676943bdc2b882750cc4b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:292fc6c5085157358dde40c3145ca531288845ab29ee619901b619dd8b4d211a +size 18699264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e1b45c825f8e3e25f5ce5944f555a1cc95aed17b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2a8f4b110978c8fcb167+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfe2c4f10e5f8641aa0b19e4cef09bd15e6cac8b8514e1605688869c76ca7906 +size 18932389 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d4d0458fb903bd3be3fb4891810eb0cd1f148ca5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7efff9218985039996f9ca40d6d24c05cf7ad45492b08b8fd3cf6d53f106b7 +size 436 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..97fe15fda9210b1b5455ea17e29ff84a6a14e354 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_3024185903572207584+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..22a77ec328e6559293f560d0a151c2f45236ef52 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3b63f8b6fdac5ccd30b6614e531e69ea5fed79e8b2fd5db7111542a9be0ef9 +size 40868 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..59d7c041c4cf15a3b18ff172bf76cc62c9e14cc7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f6840ed372f10521792f61043353b4281726b0e9ccdff957c348b5cb8c86e3 +size 2130944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b1bba54a2a736b802efe8ad9b84cc941c512834b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_303a765b958c392446f2+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cd1c96ba1428ba3c7a52db4a68d8653032fba8fcc7370c3fe1f8dc6d627bd5 +size 2141801 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2dd86326f71d32bbe0aa4bb4bff06eec15912684 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_d4f1b64b-e89a-49c8-977a-7456f49a66b3/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0c6f808a467213d313f6744cd54bc402547a1247 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88d9d739b71e46465dd28e647344f8c400735a41054bdfa33c54d54aefae12e +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0668ea3576e57d8547ffbb48909ff4bb77196356 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_3071b4f6d522c100a154+06cf3f28/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..80431f9fe0583eeba9908ccf735c246344b6637b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:845b3ea51d1ffbcaf1dae071c83c4fdf7afebee0fc572f41b4ab461bb56eb283 +size 96227 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a2d94773a31efbce8999f51de4a8ba7daecd383d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3164b243bf918aa0b477+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa1e4393e72c40199b02ef0700e6826a80f99aeb8d55a54f4ebb67e62c9fc0c0 +size 1301504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8b7cbe17498eb158f7e5c41abd56686ab71265d2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_bce0a345-26be-4993-9a0c-96eebea9323e/compiler_workdir/ScaledQKComparisonModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..25766bae20d721f9d5c2f3ea835c3631e3010787 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138b427bf7769ac62d72b0c008b58065b93c16f2f09976be109e29ef90de7854 +size 4940 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..828ca25842b968ae0729c772161121c0e2ab1bf4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_31af602603367560091b+4070fd2c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36875483e8499ddfa3a1d55590c38b35acc743d4308dbc4e0579ec3e3661af6c +size 390144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..73dd38c73accb898fec4b812a3e3b86ba2437846 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cb39e4ee43fef9e4ab34ba0c65d0e5d40a495386b6e2b33b16ade366f630493 +size 2549448 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..54d08f338e838089bfd618ca6a54199310616c8d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a41555756fc779961639134f6e2566d8faf2f2e441379455fa9d097f0232525 +size 7711744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..bb5f709ad1dc74ba1943cce42122cf8732f3c4c0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_f8d34745-a064-4324-b6af-3d9d40ba60c5/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65cdd33ee539447553160cece8704c1e2e8745cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409d8cc6bc5037c585be1e7ac4f332ca2d5938432853ee512e99ced1ae7345f +size 8611 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c669019a9115389e279af8757ba9f6779742f62b Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_35b57d4de639967e833c+ff30f942/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..658d1218983254421c46c4a5c82d6d86e6c26722 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_ad9cd81c-fb65-43b7-8079-dbf6faa5ee81/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65cdd33ee539447553160cece8704c1e2e8745cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409d8cc6bc5037c585be1e7ac4f332ca2d5938432853ee512e99ced1ae7345f +size 8611 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..95ddb0560628982daa30591ad0aff49c2cd050d5 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_38faed37a17c3c662c3d+dfef5999/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cc8c24dd57b4dff767a29931843c072a97805a21 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_55003c3e-af6c-4439-9d6e-9c570991aeb3/compiler_workdir/OldSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc8595724b949c0c1c3c933ab980970a6b43e6a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5114d4a4b6883064dcb776ed5bd26ee295ba206bf9cfbe32191a6df351c35e1e +size 4581 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e7c46c6296c15d33b183e2f1da6fc5e479784cb2 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae38b2f10558b767b68+45415967/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8037f77ce7977f4763508da963e696cfe9c2703a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d30f46cbe4536d9735050e6a47ceca3a8f3d8d7c19ee38e83eeea76115ba3bc6 +size 577259 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d9011b6f0567025ba42c29ac263c13f1398e937e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858f62bd4b13fdc35c3fe832ca9d929e8c7cdff924bc6be3982fd768bf2a5373 +size 1281024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3719d779c23e8a8e4bb16720a54604c4caa9debc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fa54d08f9bf0d057baa+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37b109e123b8e4c713ad4657e0372a23365c6387c98f61b6f71f87c9fdf68b7 +size 1403413 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4216092561315976987+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4216092561315976987+e30acd3a/model.neff index 9be8d7b9dc293ad71fba90a890a4ed02af103885..3418431dd8cafa499ae4ff049af2013019f0d645 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4216092561315976987+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4216092561315976987+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..543242b7252fa27dcfa53877b0551ba6c497704e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038debe25c42a4bcecc0a4dd651fd50660fab3ebfcc515cf8129a4703d280486 +size 100432 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..00eee417b1cd25881b98cd5195d8eab388691f4a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3f89b011e1162150e88a1bafd4c24ea5b1bf5107613d5d8404488e092448d0 +size 420864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bfced9ce9a497de7646e9c6b4de4c10fe49fff0c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_43dbb7c2a39f5c4fb1c8+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06618b7357f7ff8ef3593798990374862b6d4223c035cc65f44961d0ba04c1a4 +size 429557 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c49bb8a2645fe583af0597c9812d28558d09a331 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86f1aa49dbddfc33c2f59d85ddec5a2e3d26274e2bd24dc9b44645a626a9acf0 +size 2549552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a1e2dc87d73d4c5f79beccda767518b29bd88a24 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8427cba9f0cc79b11f2a4905885b74a5ee87deac370e3845d8b65da6f6250a03 +size 17900544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bc1ff09316246ba2ab9d8405e209686aa49e66cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d0ec3e537d1edb6bd63452260cc0127e02213437bfd0e40bd7c37a039b2993 +size 1739896 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..17aacccb7cceeb4717d7db6140ef5c6a4b88443f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6cf965cb2fdea318aa68997cd8cca53e7fb87c41a68cf7e0c7acca4bdcefa06 +size 82566144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5648e873e84aba8e05eea74cd0b2a0fa9a76b21a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_6cad8bdf-58af-41ad-8549-f9f413408e7f/compiler_workdir/NewSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41be8cc3984f4e6a0de845cea098fd53922b2fbd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2e93fef39637b2e7d1ceb514fa3024d9836275bca2d538f49802342c231f0e +size 5471 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6587c30d5618032573013dca9e2a9e573bed6de5 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_4e68975fa752b951c6f0+c4c89da0/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..043b8b1a9e84f12580e5e2da7649ab5ef5a98dcc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f798909e3233225caab6080b72e5f1e64b3699001301f91f04d24e0a39017e +size 110053 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7bba53c15086c074efd502728271024fe612d1d7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b76bfbbf72f5e91caf122c5bb483b6f25393b4548acba385b2941f77ab8c3c +size 492544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6c071e19a7efeb45b1b19d1198caa14b14d20964 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4f8ed259d620f5f869d0+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a16f4f4ad9fe09e56abfea938eb27ae58676dc5feab143d720e3553c7bc29e +size 501517 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4fec46278d7846173304e6193c6d6af4604927fa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdaf565435be8facd9149bb0db421ca3ab498e5ca8e7ce76c4ac70d52b842ace +size 654916 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..91ae047eb65cd6da7beebe33e3399ced4c97779f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980900bb3a186094e23dfcba022299fb8eceee3b1cf7eaeddb7a3ae04acd96a2 +size 1414144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..092459c5549f5b6d2876b9202815a7aba8127cb5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0f4b0f2efcf9c3db829e6fd12518adb9bbe53762e248778e139165b7b89f79 +size 1525019 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fbf087d672c18b3db1abf1e6eb0972c3c20d67ff --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_fb164dc3-f066-411a-a8fd-746da0c9bf29/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..26da537cf94a738a12dc7056a6aa9b6bccc7978f Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_526995fbc83a4a64429c+5a3ee265/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d8e7decca1006d4d89899d018259c11f902b3980 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8baa176ec99ed6ce0bb68762bbecc359e01625a863f906c412f6b8d841027ab +size 107907 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fe0bf043873272ef8f167a27dd44d0feb9c542d3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_531086d2d1e6a2d23ebd+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b8d43032515b5f9fe0f8db8a937a2ec88788bb9a1121e92ac2e8afe86958e3 +size 635904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8e98e695077b7dd6f223ceb9732864b3f52f08c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4441104e24b3256f42866cf72036a80a04d5baca332d866224104f80413023fc +size 364203 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6080d430f6e9bc9444b22f521d93502258200e7b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f8d05f29bfdb78428ff9109f82ed7f7a658c6c104a2d98a9dc74bad53da88a +size 107664384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bd3f3b26a03bd040f5436c8139218b0dc560d326 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_560b92ed487f36488c4c+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db5659a1ddb96e92956d0891e2a13e1205e070b7f8bcd5858f3bafa64b8a009 +size 107897725 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2fcce37a137951232e705a849522ec35acb5b7e4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9697d1b98623f3e1d06109c239ef4b4149d4b61bc5ec37840f43a83a768adf2f +size 569536 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5a5a2ee2ee3c0bd218ff714544132492835ca879 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f48ca19c604cfe3c0144196d25f761eaf1ce7e3266a430d5dc39a9961b778ca3 +size 1373184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4e8eca136304e8e7ae1a9aaf27ebc8c860c6517f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5b12e9643c49ec4b4ebd+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c1eb5a3ccf47e2a374fa060578bb291a2f113b47c9b0b116b4c7dc9af8ed89a +size 1495573 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f0e4ddcbf81b5138780048a29abd7dd5637dde82 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a064c6f2fc0df263d71479ccdeed7f50465e9f14030ec5334b9ac7bedd5e4420 +size 42200 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..82769d65b207a145160c3e580a381ff2d09a7eb8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1565e9e1b0e1581535ad1df52c665d284f95291541b1e88ca49711b348fee7e +size 4404224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0e1270ebb13f79f5b72939bba88dc029c79f3af6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5f5b72ab3e0b8d826f02+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5cf6e5611bb5d009acfb6cb6a6467d2f73bb51ffa65c7d2aa44bf863930098f +size 4415081 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5edc6da2a4b3ce3eef03d4ac1a77537133a7409b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:036a69e60f5202c14cac8f7672b76b678cb49d42362c6d7a31ce83373c654415 +size 687614 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dbf295261bd779da3d094116050ab19f46141eaf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11f5346c8ce5b2957e26ea631dc82651f714b2497584863f228176591142f7ce +size 1977344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..278b3504f91c01ab6ae714a2e3d84ccd5a4e074b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6040d38a7245c853f482+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980c6da9fe68f925a5da4b0a262f6e597a51f408f5a69d1542c5f7402ab7550d +size 2088222 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_60f37ef2cb181ebeb434+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_60f37ef2cb181ebeb434+a02c3a36/model.neff index 44ccec38914b4b2e4f03e4c81dc3c89b056edefd..c39e39432c699614f6138e50580cc59883154aad 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_60f37ef2cb181ebeb434+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_60f37ef2cb181ebeb434+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1fe4ebd73ffc01c6a9f0e1b592025b6316c1795432ee900302da206df867cc79 +oid sha256:663e05b29d30cbd492c1f74adc46dca314605eb2658b0f4dcdb5f97f715116f8 size 1772544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_60f37ef2cb181ebeb434+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_60f37ef2cb181ebeb434+a02c3a36/wrapped_neff.hlo index 13973c534ac0df98a8fedb4f0d9457bc40ad1970..c07924bd5cb4238136db938ef0f4f2949cd9dc96 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_60f37ef2cb181ebeb434+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_60f37ef2cb181ebeb434+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ebfa95057eb5713aa05862ae992aaa77ce4045ea5f806dcc3726efca85a605a +oid sha256:2b1d4da75119b89a223ada9b8876044c4c911d18b2fdbaa6f412a4293bbf5449 size 1894933 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8e7da1759f18644fd58b12e947edc76849946cc0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a014b74ceda95eade982c79b8f8c1b429e66ab6a36b6a6a7fed94bb0e453bf6 +size 2549448 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e21cdeae29e400ae88dbc76631b584b1195d9765 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e406af060c688d0d78087538230afc828390f3b763072b3a181d212d56a4ce59 +size 7711744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4705cf3cb38f3fcb84c7aba562013aad8b022dbd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:defeeef8d88c2fb6b97ecc83961f887616af71deb8db89bb2ad48de31a58bcea +size 569572 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f91b33fdc9f5a42eaefd0457ed7ce863f89482c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a35f54c5ea741195a233850761c1dd370a4bbe2309be605e54a11a5279eec1 +size 1373184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..797f65f6dfcf127895727e7c08b14fa6b8925a3b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_633cc41552f3f933642a+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47f9e6b0e424700345aeb2b40c2b2f322538ad059f454d12fc457a91385fd8b1 +size 1495573 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a7f7fb38ea0311e9edcf01e08c2e2b48c33555fe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_2dd839dc-b43c-4e0c-b9a9-636730af5470/compiler_workdir/OldSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc8595724b949c0c1c3c933ab980970a6b43e6a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5114d4a4b6883064dcb776ed5bd26ee295ba206bf9cfbe32191a6df351c35e1e +size 4581 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..314b2e94ac5ad1d4d582f663df7c46dab7c0ab38 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_68600a704763bfc8b034+ec6d3105/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..387862d01758b0ec5120a2b642598278fca13e88 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a4f6dd8ec3923c15e7bb2499f8c18b3dd9e171e3e8c357c393238f75439ed3 +size 42200 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2418fc15f401f7b298661e4667d2970e66c0171d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870613766de6c72d0bc8961b3b38442554cb482fdce633fe0074e22b6381f366 +size 2130944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1b260c5bddf5507a82f8a15365ffde306c5e8073 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6abd2a6ee6cb217440b5+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f08080baec438dfb6634371e89bb91d8460b37ddabf5944d7b155bcb097b4fe +size 2141801 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..63be28a4dbbefaba10071856c35cc1641986306b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_b07b8b12-e364-4386-91f7-af60e9838452/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..799e545b39ab1382b7d4fe3923096af6caf6ead2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a16e85e684bbec7a5bb9e9df72afa8ab2fd5510493638ea9f1eea4fc86e8e50e +size 7822 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dc633591cf96402678525d7810a5f682d3db7447 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_6e5f16275b9ce75f3593+27d5548e/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1719f759e11756414af4a41153be48d1266945e9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_90b10281-1374-4500-950a-d12b5ccaf9fa/compiler_workdir/NewSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41be8cc3984f4e6a0de845cea098fd53922b2fbd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2e93fef39637b2e7d1ceb514fa3024d9836275bca2d538f49802342c231f0e +size 5471 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c0a33d1ba193b721840c4c3ba4f68e2461629838 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_769b4b23dbcb08117f8d+7175ec2c/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..48c4a0dd31696a0866142ce73baeacd9170436f1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:123441c0c01d741960d9edf16df88dad93f3426a0a1a940c2348c8e6d3add05a +size 891117 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9f2f305786968566f79aadaa303be565f99ccf70 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936df18dc8957bed20db8622f6d7b72bc72e61832b07efa4c8d925146b024023 +size 5530624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5f1de785a707d5dcc07d1ddaad3e63354e68534e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa2ade0e387063ba57d08563e9714f731604a9747d8a4a39ef642f0dbed3402 +size 5707749 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/model.neff index 39057715a862657fe1ac954d72e7bf1beb93c1b0..7a6773ee9130956a02ae929277909d8d648f64c2 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:422a22826c569c92260c9e11e4c7845725eb13de7f0943b0e6f012f3908c996b +oid sha256:780f27ab64a77a2d7673cae26c346b6da19aa58f30ed002ce5077f87a8710d69 size 1485824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/wrapped_neff.hlo index 83926baeb44d4e3bfc15c8a02cc05d87b934be0d..141f54a74a05f509b1b8635c4f12005777516eca 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f412c2af4dc5c77c58211597836cecb60ff04a8741197af646a6c6b39fc137ab +oid sha256:51431efdc5ced1e6781f9fb5b83b2831d7daf2fa5d21ee9a225e1de641722e92 size 1596702 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e286bd2104ffd044be467266f6de65fcd4261d02 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e899fd2bfa8bc130c686d925d195ccd3ec66d32116f3f4b9611b211c6d94444b +size 102577 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..61255c3e55904a4dab860ab0fe172fad2810d8c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651e66023989ecce31f41745714e2fe351dcc074d55b67171c4a8665ac9b5f66 +size 881664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f3e33e223625ca05644dc5444a896b82442a17f8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78aec5e2f32f6fc831aa+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c6193ec8564aa7a37dea47e9a05b582e35db1e0f68ba59d34cf3fd0c9f08c2 +size 890539 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..6f3b9cbefd9e2245e479f6fdaf3eb161c87694bb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_ce1ada91-ad07-49f1-a768-42fa1abaae7b/compiler_workdir/OldSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc8595724b949c0c1c3c933ab980970a6b43e6a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5114d4a4b6883064dcb776ed5bd26ee295ba206bf9cfbe32191a6df351c35e1e +size 4581 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3e1385848e412bd6ffff55060c071511b9f92595 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_790e5dde0889aab1bfc0+f15a2550/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ae7e0c6790082c43cd145aae8cc7e3cc89e15ca8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3551f5f286f0d26cd1f58c4f03722d60510d2578 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4869c0c3c7d38fa9e5d446a51ad52862a768b38ca5ccb79f6907634ba8c2b13 +size 423848 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..80161713a7617a57ca6e4d3f9eac8aac60f108c0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7cc268c882c393abfe7c+677eeb9d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21abbab1accee8ba1eb0e37f8c45dee7eaed39d6f4c022033f183c9064fa3976 +size 3687424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_82382ce0b34b3e272481+6170d8e1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_82382ce0b34b3e272481+6170d8e1/model.neff index 36427377622480e69b7e59a96590be22e8007024..7092479e26b1efcd7d8f8e2395992d57e208ca34 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_82382ce0b34b3e272481+6170d8e1/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_82382ce0b34b3e272481+6170d8e1/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68e1150370b99c7ff1054699ec25421e64872fd7e32e9777e95cefc3c4e8e4a2 +oid sha256:c7ca411cad894e0005429beef808a8b7660c4e8c7d2e4149e12e1075dd681fb6 size 11674624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc69077c988a9490906b4cbe354ef5f36a9cf9e5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_3fbbc5fd-f234-4f22-8d0c-aed266c1dc85/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9be41b3694d830129f28337b6166f40093f2c1b5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a83e3ab17ecb6a05e820607c191bf82d420bac993c8802f5850cf1a9da80866 +size 8740 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4196cba4213e9ae3b5e611ad2742c4464463babe Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_839f7ce0ce17e2a97166+39f22460/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a499d9aa5cbf95780ca870f227fb23585d07ef5c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf4efeb25a6e5d4498a29635f2085805945ac8ece5c20c9ca11cbf069e5bdab +size 109574 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9859f76ba98a932f50d7e7f9240371a39485de83 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb6767aa8d9fd31cb4b37d6d5537d4f0b6479db57c2c0ac3948bb333de0ad8b +size 492544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4cde4fa4f4b916b93bc06ea84d3c546a41dafd4a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_866615121c80b108f18d+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:555d930b5be5aea79dc8de0cf09ce8313735cc090e10767cd011044dbf009dbf +size 501283 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7984084abc395afd34e75c578a71fe8e211d4056 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_5c3ddf9f-1382-47c5-8389-181d4856e49d/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a0cb918db1b09ff009ac2403ab76a89d4b2196db Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_86a4971e4e8a2bcf0eec+7008b40f/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cdc03b6fa47aa2083a63d09b3dd61eec3646e000 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2204afab47789e9f4194a0be9c175a6e1e58449e7398fbbf46dacde6a43caaa1 +size 390 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f256d5cb7460e7a9c71f2ffad8a0de90a8c57de8 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_893363720092557162+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..c2c201f05058c1123c7e6fff69c4add922cb4a97 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_3489806a-e3f5-455b-b9d6-cd3ec3047da4/compiler_workdir/NewSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41be8cc3984f4e6a0de845cea098fd53922b2fbd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2e93fef39637b2e7d1ceb514fa3024d9836275bca2d538f49802342c231f0e +size 5471 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8ae5726c01c31e5f0a0eb6031920606397c9ff9c Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_89a1398b308284d73780+b102fbbc/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..af58b805e9715e161328c42ef2191dbdb9eb15c4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_b8301d09-8942-4560-aa05-f878496ccc01/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65cdd33ee539447553160cece8704c1e2e8745cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409d8cc6bc5037c585be1e7ac4f332ca2d5938432853ee512e99ced1ae7345f +size 8611 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f2c3742911c2581fe3d07063933bc51446d2cdc1 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_8b48097eaa2ae85e1c16+48fd059c/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..450d3f4e6af5daaf406fdb889011045fe0bf58e7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff76f4ff7318c5dba578d830c0143cfd69e2a85526d97cb316f603e37c991ed +size 2549552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..610a3fb9b64190db9ad66d696aba5b1ec64d0d86 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:238000d2883e3847412559a856aa0e25b08ade4240632c2d276500f25b75cc06 +size 17900544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c209ee0b08ffaa149913bfa7a7137902cddff2c1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57514c806d17e25ee09d2fd865d04f97f1073a6f1c2184bd4d3648e815c384f1 +size 599865 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f95b0813a05233678236bf3fa8dc05d4c86599db --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d388724dd763c73545642219f0ffeb0f44f25be87750be37b6772835accc21ff +size 1803264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9c830f194b93d5e350d21d3393b8ed82f1674325 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8e252cfdf6f4e90ebf4a+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:affc84170362c92086992a84d41a40e0e32acd5a60952eba2d30a50981fa08c6 +size 1925653 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3d1d73a34490181a7d79c9dbdf35b6dfad05e5df --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b868910b39583cee3aeeab326d9fb6bfc1e12ce4e937d244abc110c48b64465e +size 160725 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..607eb141c64018328fd47205630261f8aade6245 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936f7c19a4ff86454a5b83a26a3fb7b07cdcc62518970b6c78f0bf293cc21420 +size 30598144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e424f96aac1c54f0bed80e6ae76e81ef50f4d7a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5009b951afa0dace7b72350a1ffd6ed43bfb3919fd73d44c0de35f45a53cc56 +size 30708309 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..56883efb446a30f65122354deaecbd9e07bd2127 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4041b29f7b9b399a0ce45b3d6cf353206c50e80704a931977b6150b712ca5584 +size 656602 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..92046cab8fb4f5f642704e6d15ecb7b241715c56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d740f7f2ecea942ea86820849b04c793d85b1a27e3f2ca68378548f54f6ef0ef +size 19252224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7e10876eb40f372b751d4ccd8ad45186eda0e284 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc16916f41a2b29d4666512abcf5574e6e0b0084884874e61d77e39b3bd9d415 +size 19374868 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..20edcbe8579014e194d19ba072040be266fe52af --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ab56ddcb99f9347dc39c6feec4d13c469a8231193baaaf8c481d3d7abd7bb1 +size 390 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c3be4742783f798d629c83bb82efc36bcb1dbbc2 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_9055199234366240053+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0c93e217bacda7d3c6d0d27d7566e0d2ad68c1c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d723ae5b805a67e37735be339b51dada0275c59c1dff6d87e29681cc17b85d53 +size 40011 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..519a78a6b771b2c5b79d901e046e7b9b8f86c611 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76f8fc4039bfd0fd8c8821351b07a9aa0f99988c3f0f260e75ce68bd22e06700 +size 502784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..20120d0fd9bf1987cc6ecd6c629ac0a8f67a1bea --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_933998fc47a290de4562+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d640a39ba027f5078bffcdb24ab05005b2aa101cb7cd6479221f9cee11a22b +size 513687 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..64964590faa733ae582d6caabd61bd3aae92f755 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_ab949340-f91e-4d52-bd8d-9179c7533c63/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..516125468a06690adbd1a47c2c30053228ab7141 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_93c65358592304bcc644+6d93be2f/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..889c34b1a018a2449be87e1bc9c6b5b5c510a309 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_a8de0c8f-5baf-485b-afeb-d86316569c80/compiler_workdir/ScaledQKComparisonModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..25766bae20d721f9d5c2f3ea835c3631e3010787 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138b427bf7769ac62d72b0c008b58065b93c16f2f09976be109e29ef90de7854 +size 4940 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a2d10eee14e0f84622b8de39bce5c7f6a781aa41 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_952fafb4c315904dcb0e+f2c40fef/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318f983d6956631fa0b8d49cd12366f749070eebd7e9a7b6d7be6545b7ff5997 +size 390144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff index edd73552ff8f4f9e8d8d13dbc1545c5f9edf4590..a2fab63b246f77f2b81d64bd3aee76b01e3c995e 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb68e29ac9c2790c0764956146429d2afa0a88f59a7e87b54141271d789ccb53 +oid sha256:bdf04a70eb8bda2f7f4e49bedb2c346136c7e9b0dec32fa7421ea72474563fbe size 2509824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo index d7c716136edc57ba7471385f4b1306b5182a8df5..23e1a4ca305899ccce17e373419202f11f100017 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94dab80edb15bb2de7835eb2ba89df093fd885bdc38b96f0a1852f767c384202 +oid sha256:c84f3a91584fb71a18f3ba16dd9c1cde9917ff28ab72e432ab03a61ad722f8dc size 2583911 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3d88b87a0433d8ab04a71af0aa1acd5b483572d7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0899a2fcff4fb689f10963f98273e4c484457d3dfd50a44cca242354cfd3f0 +size 384416 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9c9a194b9630718475c71ed77be80ebacba3ae3f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d839c923038c0af58ae5b8fd91f9ec112a4a36f170f2edc3e176f320fb0e01 +size 11049984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d66571b4f5f28d3eb8e758193c69f994a0982c9b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95f18043665401be9ab1+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc9eb54e448b0d4d13c16a1bf281c5676959eff6459b39cb148d5ead61fa87f0 +size 11283102 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6812597064c1fd58cbff3fc59ab8e405b9651eb5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b0d926269b1784c847b6071026c160956fff6d7568422257c4a8983ffe1dd4a +size 388429 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..974c3be8cd70d31f6a8e46a2d379cbac883f8acb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68c20a3d3d33b361cd01dc97f53a411af52faca4ea4bf3e8affe209562cd133b +size 1936384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..590dbc0dfe5e1d1253d2011e1750a0a4329009db --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9748d4fa623593f9a070+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5835318ddc86522e674c4ebd4c6916f5678c8e707c7465bf67b99f01acaab64 +size 2029297 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1fa0a63256ea9a8447b25fc8c6d1a470ea4a8f53 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a6a6e18483253c9d2f07fdf231818889894232c4ed56c55360f58394d7b13ea +size 1062927 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..437c2085f19b3070384c20a60c48616800cb0751 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93693cfc44c1704bb08f92dd01eb408aa1f8eb4888603a61da5ab9431c4fc292 +size 6933504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..052a53a17f291b1506970e02037ae09f348aedae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_976a4227c74e1e5d858d+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbbad0e649a2e58665016e3b6e5daaf73cdcbdcef29dabc386d4ab08aec3b56 +size 7164278 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a09b1ef66fb49621d4f3c88e0fe84f50083de9dc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_58b5bb62-17bc-43d8-b489-7045415e79b7/compiler_workdir/NewSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f4f0f72f12cee5aeb71997d3478cd67d6cc875bd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef4eb7da53f1d7688fa76cd7128a4ab77825c53d03571f98179e0d8c3b7575c +size 4581 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4de96646520064db4589927a337b7214facd1e4f Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_9867fe51759b27101cfc+58603c09/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..15c87adeacdc10214d8e058f019556bcc295c112 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df2108c8bdd21eebffc5de6069d4e84357115456e3ee3973c9d35fade13918c6 +size 42192 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bdb745101c00e5f61a4c385e0a302604510b51e1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faad9069616a57d672efb6f47a004ba83bf4a0d7117841138f6ed411f5e4e35c +size 4393984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0401cb8f0abdd3398894f2a3e41069f64991725e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b7eb59becdba58feaf4+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd88017bbd2dc4353559e502f3afb74d769e830411c2662fbb0aec5ca5326d31 +size 4404834 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0d95364afe5f2db74bb7cc2f689037e36240e79c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_37cf7660-7689-4315-93bc-96ec297e3977/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65cdd33ee539447553160cece8704c1e2e8745cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409d8cc6bc5037c585be1e7ac4f332ca2d5938432853ee512e99ced1ae7345f +size 8611 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8322eaee5b65ef6d7314db183aba109587721cca Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_9dc27b1a79c8fd4b3abd+e8026fae/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b5161df8420fd02f4637e8a28c4a016dad5e5cca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ae29baeb064ec697a8a958f908b31b98f8e2f91d002827bd6809acd9b1c4fb1 +size 107706 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aa2f3942d51e37a0979fbe1ab8329efe7db6977d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a02e663f9d5e1913e9e3+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6202fa09b0c67ee52dc345cd604e3d64666a98037f0026c541a84f2d0b49e6 +size 656384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a1cd38756c74f5816b8b+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a1cd38756c74f5816b8b+24129607/model.hlo_module.pb index 8b3b83651c6fadd4d8f7355f90c20f0dea795ac0..4c41b01bb597ea40a544419f0c701e45eedabe18 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_a1cd38756c74f5816b8b+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a1cd38756c74f5816b8b+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5afb2944ba0711a4441c137bd39995accbbe79dc510f590407ae77b49a33df0d +oid sha256:db4d29e73bd32d78207490919cc67097223016b8f282251e22eed37a4209717a size 1050679 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a1cd38756c74f5816b8b+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a1cd38756c74f5816b8b+24129607/model.neff index a5edbee3f81b7fc208c0f966cbdc13c9c02e8262..c72718076dfbd4297e32da14fffa282eaadbb197 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_a1cd38756c74f5816b8b+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a1cd38756c74f5816b8b+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:652897628aae608aa5ee58fc2ab10b8599eba028f43996e8519fe056c5c402f8 +oid sha256:4807eec7d26a7d456764b6ba7e0004e40c7abd34ae300771c1cfd5cf427aa1d0 size 5827584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..d99f718320ff7861bed29960e6249af24a2b20ea --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_19cbcff6-b961-46fa-8a66-6d66b77bd948/compiler_workdir/ScaledQKComparisonModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..25766bae20d721f9d5c2f3ea835c3631e3010787 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138b427bf7769ac62d72b0c008b58065b93c16f2f09976be109e29ef90de7854 +size 4940 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5f037337793ebff7ef6085fc9c49f1bafebabf9e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36debd95d53c8bebd53+f7cce17f/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc8c94992d4eadc9d8ac33d19c980836d29f3f525d84ce30d73a2bd0bcaaf563 +size 390144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3abb42aa0bc31f6048446e47c689445b14ddf0a6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40de54a293a9d07390bdfc8efc7a66171274e2d19ea27d5af0f36db00e6970d9 +size 891924 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fe3dc43772d8e4c1d6adcebfb46118712c5a6922 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ffce009447cbf266712d8b182140cf4b8ad02b5c1a807eca71e782ca1362162 +size 9903104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..df947164d82e268eada82e36fd7693b8afeb41c1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401c29f76367655299c8980aaed43243cd7a7d429084f4c8ed38cb08b6754323 +size 10080229 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2aafc38d5c6825e402b57074049575099ae4efaf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e1319bebe52030d6832cf7c1077261076ffc38d9ae3bda83961c8b0ab28479 +size 160725 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fae7a82949ca1ca016fedd7bb3783b53559a1061 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc63cf6846e7cc1c4803ce5cbe54a9570f509d93caad9a5d9681cbcccdd5bdc +size 58317824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..27f0abceaf5b932c204300e7700b6eb58266f8af --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1796a5d188a593377924c7003d5fef63e3c9a56f10bfa5eceff8821e8a2097 +size 58427989 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dd954b2160bd03668249483609a559e29f34a673 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a9fa8891b6fc2820862875ba19ed8057a120fc148a5debae12b01299b5c920 +size 109171 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..90319011eeef8c89a9776f9fc9a9c83e14889d35 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47be45eb0635e62dda5be98b712f81b1087743dbe2d2d5ea0e184577b70e7e7d +size 441344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5373dfd24520637772bb7f0cbdb1719dc4a18953 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ab7617cb5e9186411e52+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d8e8e6f5bc867227ec248ffac8db15463b2dc95d71b1d6d8b88849f2e1de0d +size 450317 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e9d61234b043e54a08b3e6eac0497b5da706a38e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c002da7fb053a98cb443d5933c4adc1e8e771f8669d9090a199bc95ddfcfad42 +size 842788 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..64b9f761eac62127b7bb0d16e6ca48ee9642f54b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d549a2b91d6589e765e56ed6551515979b03ee91709cb6ca6803071b5d126f +size 27761664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..071777164176481217bdd5a32e8ebce9a0454406 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6561a091b91f028c18dab3ec2b98fe5ee1cc8fc6162a42334d88f430462456e8 +size 27938789 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff index 84efe9938d42fd7f5ee025dcb86b61fe9214b9ac..f3975f4a81fdbe1be47d692c7b2ab87709cb3172 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f7ccb3c563a4c0fdae234892df68dcd6c60cd41674bf983c7a5693709d2af9c +oid sha256:2b8be74d754d3877ff4c9586c81611124a2989d55ff021b4915372155a27fe1a size 3073024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo index b61449e8786a2d6f5af41ac47d0f24a21d57ed1a..04dfc20be200de89da5ba6c2aa0dd1da7aad7cc4 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a6ac7c0556c0e218f9e95cabc999fa5d73cb79aabcc96ccbc57e3c5325e36e9 +oid sha256:a1a2dfe758abc52214d97cbae6fd8bfe738893d16164557e687784ac1c1a80a9 size 3147125 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..457b4bbf4f6972e3a1299e24c61e2f6090fb4c22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26bdb3a565bea60ea9289a3a632756326fd2d156224839610ee4b2f30fb1d2f +size 1779218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96c5faab2852e9ec23fa33107e3940c1839aacc3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa3d20f8b1395de354a91b601ba1d610518bc100bdb757601f6808bd98f69dd2 +size 83313664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0c3ccadd2394039a6891fc9e2c4fec493bf375cc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4a7180c79df0d067e960030b46425c217430be95efbd931ad1b3310733f6f1 +size 656586 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c6f07a8af43f1bc891eb1ab4e496ec561bbedb88 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0353221fc2c2895c0efa4c883c10183dbb0286a7cb7aa5e582f72ec274eeb16 +size 10957824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7b19dd1944796d1ba629bbfc691e9f2a93d92a8a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf27d3d6de096aba6558fd96146eb143ddbb3df576e33083ac3f71690e5699d +size 11080468 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3d2f9f3b63e8b0eae65a1f889d831b6837516b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_ba8ce1e7-d102-4f58-bf0f-3590244fa7ea/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65cdd33ee539447553160cece8704c1e2e8745cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409d8cc6bc5037c585be1e7ac4f332ca2d5938432853ee512e99ced1ae7345f +size 8611 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..acb26c276f2c85a2f7e38e03e400d02a96e65da8 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_b4dc83095892de986d11+c0fd8930/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..67c346ff4fa382e99df517c74e9984729045d98d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dca0ef8134630f9023ca270e14a15f29ee728ccba0a89c6b5d4e1d9877cbb9e3 +size 98209 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f7200bf3a45d433c2c04558a479f4697fd6d29d7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b681faf194284309cdeb+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c9c0e93bdfbf849b40ef174c75cc4972b7b482fce42fe51ea394ce0e9b5e89d +size 523264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0d3a33583fb2bf2b082502da9d1bcdeb4e201cf2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_2a5d0f03-49b1-4a46-a3aa-d3e7bb48e215/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9eca7295f8b03b28f036fa793fad9c2249a92487 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_b8a5758f1a9b4ba1eab2+cfc273b7/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b431b81c2179de62c0d0c98a41b34538a0859747 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5231dfd98b741f8a7925588041cdcd45f2f927acef9e412cb34e6f8c4e947b3e +size 2549552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f52d47b30234721947c3693cfe72478bda088bd3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0bce227347cd77ac4286ffd97388546988de59cf17e03dad12ffcba42640ea +size 17900544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..f47d7f2aa3f64fd6467d1ee220522b612699877f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_f92dffcd-4f2f-4331-beb8-fc81ed89f00f/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0c6f808a467213d313f6744cd54bc402547a1247 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88d9d739b71e46465dd28e647344f8c400735a41054bdfa33c54d54aefae12e +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f8a8e2aea1dcd18cd17647bf698513f5624b9d1c Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_bf77597e48a3f7c2e5b1+5919f42b/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..97ab66109ca47cec263453e6af3cd0d08487edf8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183b671885b2ebd77781e23be35c645fdb4551da9ba2d0aa386b4a4fccc0cd9c +size 571822 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b08482d878f1073d298ecc64cd904f1163a1ca55 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c05382aac6cf9c66958b+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb173647cd0c1d15302f2f88d8be708cde202aed161022733b0e427660931c2b +size 3861504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..f0813aad2fa6997867e405988c209126b120362c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_e362e7d1-53aa-4955-9303-6aa292db6f5f/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..10f2ca762045bfe17bc03aa06af471ca2efb90a7 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_c19b2d1bea0aa209874e+752f1888/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2dc7fefd8416df9fd6e4932be7ba5fe2e7056450 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1136862d93c97e0882587ce7501a718e7e94c9abbab0cedcd11b8d7034b6f10 +size 39717 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8b05db30777c8bee9009763098e3961fe12e63d8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7701eb6ab62bfb715f6463985d5f4ee136cd9e335f285bdabef67aa131d5143f +size 2980864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..71e359037f442e6825e4683130818a2356fc1258 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c480f8583bce4a388b93+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40265d84a046407b34b8f0513c9657bbe89f92cf11e8b34dd7aac05a53507e89 +size 2993386 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9ac80839a44d9dcd768fa7797df381b4537e6aae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04da69c73d9f7a86fa19933dffa67a34ddc65ea57f6f8a296f151df8b2e2478c +size 656586 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..640cb86569bd9c81338457fae00ab05f23aca7bb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc7e40c383be5f91a93de631c564de343d1d3c2bab14879acf444739e67fd46c +size 6472704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7b1f48fac41b3dcd6a93d67c4b85d868571c83e5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c9aa30c711919642e19e8959bfc40cb2300195d12a625f6a9d1823628ed7c0 +size 6595348 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..264871f853522e6c4b1dc6fbeaafebf3b0bdfc5c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906e54fd0845481cd8e557aa253fbb18b9f71a3032ab259e9212591290afcf44 +size 586366 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5eb7b89a9dd011d402663a22bfc15b0c29f958a3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6110180fe3ea8a5b7f48097e5e386a55599b94768fd6d3c7c14f710dfd78f858 +size 1680384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6ed7402735507638dbf2fa36626c34c57b8bb84c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c5bb11161997e6aa48a1+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4dd3dc7a69530934ceeb5d9aa72380371611786ac96c9f55a167e7ded19372 +size 1802773 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff index 9f21fb9c1b37177ab43fe11f01202aa3f2086d77..0c16d5ebde5b7936f39a7e05d141a8d45373657b 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f353a7f7b170b85e60abe2956848930d93fbb17b91c48a5fb5f89605979c662 +oid sha256:a7c258ba5a15e6c435b0719ac65ac7aed62119f5d3d3dc497bc8557452b5afc7 size 1659904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo index 4ef8b262adc22a340cdb2a97c9e9131f4766b9c1..fe11e2ff3fdd06413ccf6d281331049dc701a936 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f61cb51f77618dd82308e979bbd1a94946a7e0d90a0a1d37e561c52ac3c36a64 +oid sha256:fd30f302fc03f3e2cbf90294931da3ae0f5faa5a5f0cf021340988ef20175088 size 1782293 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..51f991c0223d83d387e2a09104e17165ac30f027 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_ded0ff9e-5aea-499c-8629-45101ca260ae/compiler_workdir/NewSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..92cbb15878eb2bdc3eeabe777af9cce7e94fcd9b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87458fd13251d76e9632c5810e69ddd78e456d1f89d0994dba8f85036bef13e1 +size 5596 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c34618b69d9a9612f2cdde39f9f6d2b76947b216 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_cb89e83a0ee1de5cd569+9390da5f/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..03ac906643db935d3aac4480923f571237a3b0bf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_3c6d4882-61ba-4053-b3a1-8c4fdcf3e96e/compiler_workdir/OldSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc8595724b949c0c1c3c933ab980970a6b43e6a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5114d4a4b6883064dcb776ed5bd26ee295ba206bf9cfbe32191a6df351c35e1e +size 4581 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ab099046edf2b5a53f9b7f8003f88c78c23bde94 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf424b84989b6787ad9+4423a048/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..946c451116ac1fa2da469655f37a3e7338c2c713 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_62959f21-6073-4fd5-b612-d423dccc7c42/compiler_workdir/NewAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9be41b3694d830129f28337b6166f40093f2c1b5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a83e3ab17ecb6a05e820607c191bf82d420bac993c8802f5850cf1a9da80866 +size 8740 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3e2762aa2bf2ab44a9b1a068a65e0d2e8e669716 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_d13b2023e03adb230124+67d388cf/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e8bbaba31a19ef3f01fd86502eba80f63e998343 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed28d3d41bf7a8276a0fbcb2923a794ef184c4b1a98373af3734a5bf23fc6b84 +size 573364 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b667eb2efb0ae8c04764a8eaa25d086126783627 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a91bc4dfdb93e2a06a86eb2b9616d2212d074a45d1ad0ae270ecb461319680 +size 1045504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0a926c05e8366b817480573b5b03b215f0aaedd3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d5b145fe6e14064993e3+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9e3c6e7925e8d7ae159996a7c1a8421e583d97bfcf9f4a5a7d175faba1ec13 +size 1167893 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8117b3acb1b884ae74b52a4ad0b844adb282f66f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_5cb94fcd-6a90-4e47-9256-fb0d0362f564/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a94121905df02e29034803cd624ca3ac2f3c955a Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_d617d5a80725dd447538+37d32d7c/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8581b7be56aaf8b02af5ecba9f6b018998265e6b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_dc6ddbd7-72bf-4255-be61-5f86626e3a98/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..adcc8aee2ca2db0fb4f9292d55b61f7aca4c547a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80e7d6d10b9e57d7975b715f6b5048485e54ef4cdb92d5d73309fb5dadb7673 +size 8979 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dfcc20ea790db9b52eab115382045f23054834fe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d51d6a35b878c9e95f88a3fc7a68a0c90247902017e6041aa06e74646a50287 +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..35b9af6c74199d29199a1c16ff4f0aec585097ea --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d64e5c89cbc237bea34f+b4e83f56/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb48bb50500db554783d149acb51ca91531603e5efa2d7ca11b8f7e5959daf3d +size 249608 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e0b743157f2c13621331b222cd01f495b5cd45a2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed6cb1b0d25616ecd91f133d82018b0b522f55e7077b2b9e7170d2c97efe3d6 +size 101315 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1bf86f07cd334df3f09d058962de8999a2912195 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3da971841a571fa3712570d39cedee1896dbff45199fe8cfb768d9931cbdad +size 482304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..21b0d4852a0d5fc4c703ef32fa5f764ea9b6f6f5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6713fbd83cd891615f3+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e464e59c6f9a68351267cd23f5b238b6d5d5fe56a052a6f7c90fe24a563ba2a6 +size 490997 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3a40a794d892d5d9cfb8c194ada140e91d4be151 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_vision_encoder/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..534f246b053d076a8383ec2e2b7759eb6a169a2c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f58a90778d6846a42965bdddbcb665f0582723b12a37d596940cfbda1237e23b +size 166119 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d28fe70f55b28cd233297a97f927e3a37af0c1a9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c395181dfa11029799f65902d822b8d8e537596c7c60d290bd0ad653923ec03a +size 1639424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9313c6fb0aec152011c061396a908858aff04826 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6f75527452b7a1e9637+b02446f6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aee43604169a77a4dfcca25e0500f7e750dc4f348c4512457473ae0e3d56da86 +size 1749734 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..de709e226cccee99c6bd0d9c9dce841ab7a400d9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4a76380f0d5727cafffe24033789a75885fe19f2cc182fe426098b202b12512 +size 110370 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e4e0d9e1a7158b65baf6a51809e39c388b57f0a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d84284794f25240dc9295cd82502bcdf43304e60a30774caf13173318c3a03f +size 482304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6e8f9029f456f956c23899d0a6843ed3edad4730 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d962f9089341da526ccb+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54a31372e905a4325cdb4be32a5b2915d795a60fa91e421906598d0ea9059dd +size 491105 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..71275d7139e76687b0013549c76f07a477136fc5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0002025273f0b98f953ba4142218f8f29a9bb772981ba47f8b130058d0018d6f +size 2492798 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b455f7e8f4dae4bd26d62874f3e95e9b7af79576 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee5f7363549a34d43bc58ead521933d456144c25b9999b64c48fd26b24a0b066 +size 18248704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e42f36d99226e9e5a5ab1b0ec3d1d2769bcf582b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_f2357c5e-a78b-407d-ada8-106c876de78c/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e748b36317f36367b89a4c1a92531498c132224b Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_d9f6b5c7521984f5fb4c+401b7c22/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..78a30ad51fc5ba7ff93979ff9016d0c8b4f090c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f15dda2e1fc2136c3ec65404054c304b554108de5dd8f26baa2a502e140a8e7 +size 1779218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..33fb0f70363ab105c91473f2038dc6e79664dd54 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30543d8500b1061bd0abc83042b4034fe4f1d855463277f314671cee55134689 +size 80927744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c299a648ccd24eba7ae90bf6fa829ba720be7dc8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45cc1db247cb9b9ce9701e74f8b2d0b1cf76d082d5464d606cdfa72dd327502f +size 655761 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8190798906486985a6519eeb67a640a6efc92af2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e39c5570c9bfe45da79fb0f3d9c0fce1d6d557de0de99ae87d0c5142d8b46e +size 3564544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c4e4bdad508a28a624fb922ba87d2c01472031d3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192fead1b3e39db169c9b34b5a513dd5d409c71030ba6625a14caa90ce1dcb16 +size 3687188 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..f4d43101d1020039c322dc82c841e8ead335c966 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..68248bddba6956bfd6a21d32a926ec7b10ad2885 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9cbf5a07dc41323b9bdb13df1c25954111476d9c955ed30173596aa61ae190 +size 384578 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b0f58829c868e5bcfa1b223428f65285e1edddd1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:813c7c5fd6f511b9cf7f90b07914f3f344629c28386293d81889473d493c49c6 +size 42159104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6e490f4578941fc8c839713cd9dcc193a0e9cd72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dbda60963b73c3571662+186ca4ef/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a435bca0c4372036202c1df7d58d42ebf80240f74b880b492077a9466cddea5 +size 42392222 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf6b35f2d09557f65ba2b1952f2caee06fc2f2d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_219f8b58-9358-4959-b8f2-8c2e7bdb7da4/compiler_workdir/NewSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41be8cc3984f4e6a0de845cea098fd53922b2fbd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2e93fef39637b2e7d1ceb514fa3024d9836275bca2d538f49802342c231f0e +size 5471 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dc2097d179680eead19bf3bddb3de3df3adfb198 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_dc39fc4b113d895bab57+2353e5b8/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..aeca0143a834dab10002c33290211b68e476d01c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_14874449-8afe-426d-9e3f-ce36339af28c/compiler_workdir/OldSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc8595724b949c0c1c3c933ab980970a6b43e6a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5114d4a4b6883064dcb776ed5bd26ee295ba206bf9cfbe32191a6df351c35e1e +size 4581 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..baba1b74a03a2ed79f3c9258a625cc60bdb1a50c Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_dd2c495035075b198594+70955acf/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a40bd2828c187293a24e2e3596941e589e7837a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f53252f239e5fe07901edc6150c25c17f3b31138be24368f153e4dd3ce016c5 +size 586812 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65936e92e7fd446d616313450a91428e7fc8fc56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce1604ae39ec335f4db3a45de7e302e01abe9ac192afecad610be83e95f09df6 +size 607450 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ca3104c6676bbaf0a5c7718eaecd2c190d65c6d1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd7524f0b25e28fa72601d2d422011f24dfa7381382a9c39239216f087ae6c8 +size 18791424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a84091a4b87298007c01b2d634966246ee04422d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c347aeb5f1d2da3a6aef823616e0579e07c386f4816340b3214e83c1603626a2 +size 18914068 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8cdaa336df2bdf16199c3c1b4dc3a6cd2d24d41c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_b756b52a-0ddc-4f1f-b58c-ac9222676e72/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..799e545b39ab1382b7d4fe3923096af6caf6ead2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a16e85e684bbec7a5bb9e9df72afa8ab2fd5510493638ea9f1eea4fc86e8e50e +size 7822 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..62520b6a47edd9e23bb0fe2c4f13d1fb90f9a312 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_e32e6af50f0251d3e9e8+9c556093/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7009c59275da902d425a5c1401686464dce15da9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65aa0ad4670313d1a6ab024b6c7eda14c234bb19897032be0348642707d0ab2b +size 532500 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..508e82ad6e65da3b4df9ca6880e925aa11f40cd7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ea76254177cf576ffcb2+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f854b50a4ed0c2aee3a7c82abfd2aaac425cea367d2f264360d5e3b6644c39 +size 1158144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..968f1f51c233ccb55252a3ed35eef202606829d3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07046888a067e6ae8cf67d0469290a9cab91b6ac9b6ffcd5151e7bfe573f08fb +size 891924 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5af573f47557ff01e7eeb8440759e7269e8ab458 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09714af0311f568e6b478aeb0785154a6e23c1e618565d57a35a743cb63f9f14 +size 16446464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b40d804b197e19993d54c2e25d041f4b8ae5cda8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:723169d7e8104355b405afe258abd592b82261b7124151036304dd4791cc5c0c +size 16623589 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bb240f2f0327b01bfd7c3953feaee6b40523d2cc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a01f77d5c8b229adaea2cad071e5ece57a9ec137edb90d1aa07cb2b65295cb1d +size 103256 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..45098da2d549d5cabc9d057c4eef89eca874991a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ede677b769dff6a4314c+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fdbbe06b6dc1ee58f18c021d93b639ff043dfb60acee40c549a08fa0fd4e2a4 +size 646144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..47a758d0d94c616e0ebf4445ab33d0d95c9d361b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a74b43399dd59cd6aaed48f2fe004c4941eb0914caa26aa6d689db7231ad50 +size 891940 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3b7347ccd114dae6dea1cb9d026a4e64eadab7e0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84cfa3a95da30bb1001806e374e8528fa1bcf68656ad7496f085f6aba3fdd336 +size 28283904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1e2139293a2f4d2a36e2ec9371e022a259c38222 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712a1f408624afbea41b4a676e4400917b5d8b20131898a1b86683c3b16676cc +size 28461029 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..177da96eec22ffd7be79ce43104a42b3026aab30 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_1d31f9eb-7e97-4a89-a499-bfbf3b399fe3/compiler_workdir/OldAttentionBlock/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc0376eb458882e480c9f2988be7a20d2a83a85f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5257f6d788833b506fbb149a4f7b96779ca2ed998a12765781b35e1b7f4ec017 +size 7691 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3e736072fbc6f23e33239b53f53491f8dcfd5ad3 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_f589e3da9d96b02dd9d5+668e5376/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1839053aac72c5c3447072c9d4c35eb4990533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/vision_encoder/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..419eb954fb968f352472ed1ea1fe63a5bcdcb278 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:551572816f96a147c6a041d71c24c3467a307790d28dae643853e0f47002f21c +size 40839 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e386a61aa6a16a21f3a57dacd2f74668800f9f3d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d2eb443bf5e478e815157e37195009decd7c89909cabc2d7281e0c2d00148c +size 4209664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..37c06f3b49f1a93054511fa82fed9fc07ded2da4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90d09e1438492736d3c+c4f887dc/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d12b35fb81a8de8c054653066f0677dbf30d503e703ab87c5b310fc0a5b5ae +size 4220570 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..41ac87f491bda9ccb8493d2cd69c8936bc3b630e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_9cbdb745-7f3c-4555-bbc8-66104075d5df/compiler_workdir/ScaledQKComparisonModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..25766bae20d721f9d5c2f3ea835c3631e3010787 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138b427bf7769ac62d72b0c008b58065b93c16f2f09976be109e29ef90de7854 +size 4940 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..02c68de6dce4f92c8beff3a53459073a5ceb6ce5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f9154701c36b9388a53d+00ac9e50/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1255cff07aa1052087c202289d6112c1f4d0a34e9c65366e27b8dd4eff475bd +size 390144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9c277888420f00defd99fc3c102007a98b09199d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..720da5aab4c7b6a1c92619e04dc2a73877c0de33 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f8d7ebdd5297c164e376c303231f84d63647b6e4e4f87c1720b62e5a47d205 +size 909436 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8916a45485fffe3fa0255cf9031289bf949ceebe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7eeb11a6645acc08b9f6489d4dd51b7a2e61eb1d20675565014900fc2b307bf +size 11664384