{"benchmark_id":"math500_cot","size":500,"models":51,"total_pairs":968,"close_pairs":453,"no_solve":3,"tau-":5,"SE(A)":{"count":43.0,"mean":0.0197047419,"std":0.0023779376,"min":0.013465955,"25%":0.0181564703,"50%":0.020411564,"75%":0.021806655,"max":0.0223587902},"SE_x(A)":{"count":42.0,"mean":0.015062456,"std":0.0027788974,"min":0.008181565,"25%":0.0132718044,"50%":0.0149983372,"75%":0.0166559764,"max":0.0211407123},"SE_pred(A)":{"count":42.0,"mean":0.0122632768,"std":0.0028645511,"min":0.0021461735,"25%":0.0110131405,"50%":0.0125968753,"75%":0.0144539187,"max":0.0161785492},"SE(A-B)":{"count":453.0,"mean":0.020939814,"std":0.0021853916,"min":0.0161889803,"25%":0.0189739168,"50%":0.0214734423,"75%":0.0227194924,"max":0.0245795332},"SE_x(A-B)":{"count":429.0,"mean":0.010783944,"std":0.0029111219,"min":0.005116947,"25%":0.0088470915,"50%":0.0103158745,"75%":0.0120620883,"max":0.0206096169},"SE_pred(A-B)":{"count":429.0,"mean":0.0176905182,"std":0.0022971242,"min":0.0118089163,"25%":0.0158104939,"50%":0.0177166246,"75%":0.0193186465,"max":0.0221075882},"SE_signtest":{"count":453.0,"mean":0.0210960851,"std":0.0022177037,"min":0.0161945246,"25%":0.019237747,"50%":0.0217109105,"75%":0.0228997817,"max":0.024915927},"corr(A,B)":{"count":453.0,"mean":0.6906635211,"std":0.1195582562,"min":0.1177249335,"25%":0.6488289492,"50%":0.7055569117,"75%":0.7664696724,"max":0.8687833914},"sum(A!=B)":{"count":453.0,"mean":112.4880400794,"std":22.9474295879,"min":65.5656565634,"25%":92.5227272716,"50%":117.840909089,"75%":131.0999999992,"max":155.2008546993}}
