| model_name,coding,creative writing,current news,general culture,grammar,history,logics,math,science,technology,Average (All Topics) | |
| Claude-haiku-4.5,0.02782324,0.0193475,0.01189335,0.00953859,0.01749148,0.01262032,0.03149576,0.027155,0.01324839,0.0167359,0.0188 | |
| Claude-opus-4-1,0.22495095,0.10686,0.0989385,0.10063364,0.11554266,0.10658129,0.28635984,0.3074815,0.104945,0.09116274,0.1544 | |
| Claude-sonnet-4.5,0.06396089,0.03733868,0.03408552,0.02401341,0.03588736,0.03018813,0.06222236,0.0570202,0.02872903,0.03085006,0.0406 | |
| DeepSeek-R1-0528,0.01278123,0.00216561,0.00260693,0.00230966,0.00263934,0.00277899,0.01225792,0.01500126,0.00232862,0.00294141,0.0058 | |
| Deepseek-v3.2-exp,0.00182209,0.00070114,0.00070576,0.00052683,0.00097487,0.00050592,0.00287864,0.00287508,0.0004787,0.00058898,0.0012 | |
| Gemini-2.5-flash,0.02397599,0.00951452,0.00860898,0.00661878,0.01186397,0.00708986,0.02935418,0.02453523,0.00984654,0.00911329,0.0142 | |
| Gemini-2.5-flash-lite,0.00223114,0.00092407,0.00096084,0.00080245,0.00132023,0.00092236,0.00230427,0.0023624,0.00120676,0.00093211,0.0014 | |
| Gemini-2.5-pro,0.07182939,0.03296976,0.03360315,0.03220809,0.04184051,0.03482206,0.07787136,0.08005488,0.03648847,0.03748657,0.0484 | |
| Gemini-3-pro-preview,0.06339117,0.04200606,0.02907568,0.02870759,0.04727903,0.02902923,0.08468593,0.07668755,0.03539894,0.02877477,0.0462 | |
| Gemma-3-27b-it,0.00049348,0.00041525,0.00026073,0.00035877,0.00027512,0.00031754,0.00040566,0.00036665,0.00025037,0.00031837,0.0003 | |
| GLM-4.6,0.01122536,0.00466124,0.0054479,0.00483747,0.00837963,0.00522661,0.01611536,0.01419832,0.0068397,0.00565101,0.0083 | |
| Gpt-5,0.08995687,0.06581961,0.07143667,0.0606533,0.07537652,0.0710823,0.12402037,0.07928714,0.07037777,0.05913202,0.0771 | |
| Gpt-5.1,0.08578476,0.06460375,0.06781714,0.04438633,0.08095288,0.06698198,0.13158449,0.0932006,0.06264889,0.05239137,0.0753 | |
| Gpt-5-nano,0.00294854,0.00279771,0.00199923,0.0019683,0.00355923,0.00214949,0.00368366,0.00313596,0.00207279,0.00184626,0.0026 | |
| Gpt-oss-120b,0.00083633,0.00050696,0.00060142,0.00045741,0.00051434,0.00054701,0.00081747,0.00062792,0.00053388,0.00049203,0.0006 | |
| Grok-3-mini,0.0016217,0.00086379,0.00081357,0.00072475,0.00102859,0.00073543,0.00144468,0.00155123,0.00077988,0.00075479,0.0010 | |
| Grok-4.1-fast,0.00097423,0.00049472,0.0006853,0.00058176,0.00064959,0.00067308,0.00107506,0.00055874,0.00064664,0.00061459,0.0007 | |
| Grok-4.1-fast-thinking,0.00274141,0.00121613,0.00092416,0.00092805,0.00243251,0.00087374,0.00437985,0.00346987,0.00140277,0.00091148,0.0019 | |
| Kimi-k2-0905,0.00263697,0.00158462,0.00197556,0.00161077,0.00132902,0.00191064,0.00207122,0.00116034,0.00150342,0.00158279,0.0017 | |
| Kimi-k2-thinking,0.01249258,0.01247993,0.00483629,0.00473048,0.00806634,0.00479222,0.02349011,0.01635415,0.00596194,0.00476654,0.0098 | |
| Llama-3.3-70b-instruct,0.00055486,0.00027292,0.00032242,0.0003072,0.00032997,0.0003389,0.00042672,0.0004164,0.00033429,0.00030021,0.0004 | |
| Llama-3.3-nemotron-super-49b-v1.5,0.00225777,0.00086849,0.00071712,0.00070383,0.00127625,0.00071879,0.00292434,0.00219246,0.00079359,0.00063317,0.0013 | |
| Llama-4-maverick,0.00071101,0.00039311,0.00041975,0.00041546,0.00048458,0.00044022,0.00061473,0.00067953,0.00039694,0.00039935,0.0005 | |
| Magistral-medium-2506,0.01149008,0.00473003,0.00432403,0.00324703,0.00472455,0.00393006,0.02409512,0.02440416,0.00408835,0.00369019,0.0089 | |
| Mistral-small-3.2-24b-instruct,0.00025285,0.00013076,0.00014449,0.00010989,0.00015809,0.00011975,0.00021938,0.00020709,0.00013364,0.00011866,0.0002 | |
| Nemotron-nano-9b-v2,0.00087371,0.00020862,0.00015202,0.00017596,0.00029949,0.00016478,0.00083356,0.00073602,0.00023461,0.00013783,0.0004 | |
| Nova-premier-v1,0.01354635,0.00751391,0.00769306,0.00737309,0.0085828,0.00804317,0.0101078,0.01209453,0.00823439,0.00725153,0.0091 | |
| Nova-pro-v1,0.0028077,0.0019437,0.00151352,0.00135518,0.00169447,0.00142795,0.00158109,0.00166325,0.00139602,0.00136266,0.0017 | |
| Phi-4,0.00015383,0.00014198,0.00009868,0.00008972,0.00010519,0.00009403,0.00011515,0.0001382,0.00009863,0.00008935,0.0001 | |
| Qwen3-235b-a22b-2507,0.00395615,0.00158927,0.00170797,0.00148831,0.0030372,0.00162377,0.00628657,0.00544896,0.00189379,0.00147577,0.0029 | |
| Qwen3-235B-A22B-Thinking-2507,0.00342316,0.00087996,0.00108686,0.00094438,0.00125686,0.00107364,0.00426387,0.00382737,0.0011338,0.00101468,0.0018 | |
| Qwen3-30b-a3b-instruct-2507,0.00067842,0.00028585,0.00027735,0.00025813,0.00042092,0.00027243,0.00116349,0.00101686,0.00027423,0.00026961,0.0005 | |