{"benchmark_id":"math500_cot","size":500,"models":48,"total_pairs":836,"close_pairs":372,"no_solve":2,"tau-":6,"SE(A)":{"count":40.0,"mean":0.0196210963,"std":0.0024811128,"min":0.013675347,"25%":0.0177317985,"50%":0.0205224742,"75%":0.0219146818,"max":0.0223585606},"SE_x(A)":{"count":40.0,"mean":0.0145927717,"std":0.0023959499,"min":0.0078844596,"25%":0.0134943403,"50%":0.0147061025,"75%":0.016330829,"max":0.0187964595},"SE_pred(A)":{"count":40.0,"mean":0.012995983,"std":0.0019075608,"min":0.0090198194,"25%":0.0115474492,"50%":0.0129351302,"75%":0.0145759019,"max":0.0165996489},"SE(A-B)":{"count":372.0,"mean":0.020660138,"std":0.0023702065,"min":0.0159381898,"25%":0.0183955131,"50%":0.021425197,"75%":0.022613589,"max":0.0242857333},"SE_x(A-B)":{"count":372.0,"mean":0.0093632775,"std":0.0018488838,"min":0.0044681202,"25%":0.008196434,"50%":0.0094683099,"75%":0.0102839008,"max":0.0156101331},"SE_pred(A-B)":{"count":372.0,"mean":0.0183076275,"std":0.002492269,"min":0.0133039136,"25%":0.015906271,"50%":0.018739751,"75%":0.0206151241,"max":0.0225674254},"SE_signtest":{"count":372.0,"mean":0.0208050609,"std":0.0023945231,"min":0.0159382038,"25%":0.0186675421,"50%":0.0216041943,"75%":0.0227383854,"max":0.0245362562},"corr(A,B)":{"count":372.0,"mean":0.7865948525,"std":0.117551706,"min":0.1528774236,"25%":0.7462906351,"50%":0.8052932628,"75%":0.8532733969,"max":0.9364760641},"sum(A!=B)":{"count":372.0,"mean":109.6422212824,"std":24.3486618403,"min":63.5065854625,"25%":87.1192819733,"50%":116.6853850351,"75%":129.2585421488,"max":150.5069669423}}
