{"benchmark_id":"jeebench_chat_cot","size":515,"models":49,"total_pairs":2208,"close_pairs":174,"no_solve":72,"tau-":27,"SE(A)":{"count":16.0,"mean":0.0163602691,"std":0.0022608455,"min":0.0135000849,"25%":0.0145244485,"50%":0.0157748008,"75%":0.0187878731,"max":0.0196879999},"SE_x(A)":{"count":16.0,"mean":0.0113678197,"std":0.0020518002,"min":0.0075375561,"25%":0.0097828462,"50%":0.0113691808,"75%":0.0131852992,"max":0.014170814},"SE_pred(A)":{"count":16.0,"mean":0.0113384819,"std":0.0033807456,"min":0.0,"25%":0.0108100749,"50%":0.0119662841,"75%":0.0132036453,"max":0.0140648307},"SE(A-B)":{"count":174.0,"mean":0.0204307235,"std":0.0016478423,"min":0.0168289324,"25%":0.0189945451,"50%":0.0202576356,"75%":0.0217433713,"max":0.0239541833},"SE_x(A-B)":{"count":174.0,"mean":0.0115912005,"std":0.0027580224,"min":0.0049723976,"25%":0.0095682847,"50%":0.0118385084,"75%":0.0135580456,"max":0.0169417394},"SE_pred(A-B)":{"count":174.0,"mean":0.0165058545,"std":0.0024052771,"min":0.0084564537,"25%":0.0155462154,"50%":0.0167990328,"75%":0.018404436,"max":0.0197787322},"SE_signtest":{"count":174.0,"mean":0.0205838705,"std":0.001743849,"min":0.0170313881,"25%":0.0191527119,"50%":0.0204434966,"75%":0.0219601396,"max":0.0241961478},"corr(A,B)":{"count":174.0,"mean":0.4002929903,"std":0.210368709,"min":-0.087886543,"25%":0.232949434,"50%":0.3963001926,"75%":0.5550011154,"max":0.8171701414},"sum(A!=B)":{"count":174.0,"mean":113.1766145219,"std":19.2031017573,"min":76.933333332,"25%":97.2942307678,"50%":110.84722222,"75%":127.9041666659,"max":155.2769230758}}
