{"benchmark_id":"math_cot","size":5000,"models":49,"total_pairs":944,"close_pairs":142,"no_solve":54,"tau-":32,"SE(A)":{"count":43.0,"mean":0.0063244301,"std":0.0007665687,"min":0.0043053361,"25%":0.0058217288,"50%":0.006664664,"75%":0.0069035418,"max":0.0070705811},"SE_x(A)":{"count":43.0,"mean":0.00465799,"std":0.0009255306,"min":0.0027833124,"25%":0.0041070915,"50%":0.0045721208,"75%":0.0051808153,"max":0.0068261969},"SE_pred(A)":{"count":43.0,"mean":0.0040383616,"std":0.0013310363,"min":0.0005089672,"25%":0.0035832845,"50%":0.004319928,"75%":0.0049034635,"max":0.0056394713},"SE(A-B)":{"count":142.0,"mean":0.0072843598,"std":0.0008087546,"min":0.0052204358,"25%":0.0069684422,"50%":0.0073765083,"75%":0.007748882,"max":0.0089287208},"SE_x(A-B)":{"count":142.0,"mean":0.0036826444,"std":0.0013664963,"min":0.0015221634,"25%":0.0026388688,"50%":0.003337269,"75%":0.0050701106,"max":0.0069170959},"SE_pred(A-B)":{"count":142.0,"mean":0.0060788514,"std":0.0011629418,"min":0.0009078024,"25%":0.0052031912,"50%":0.0064011011,"75%":0.006942928,"max":0.0078998274},"SE_signtest":{"count":142.0,"mean":0.0072899876,"std":0.0008092045,"min":0.0052249982,"25%":0.0069694388,"50%":0.0073862469,"75%":0.0077585995,"max":0.0089305966},"corr(A,B)":{"count":142.0,"mean":0.6560573234,"std":0.1427843836,"min":0.2534136767,"25%":0.5784874745,"50%":0.7037523158,"75%":0.7636696931,"max":0.8700544482},"sum(A!=B)":{"count":142.0,"mean":1344.8529977596,"std":288.5632444067,"min":682.5151514933,"25%":1214.4009868386,"50%":1363.9160838753,"75%":1504.896694175,"max":1993.8888888766}}
