{"benchmark_id":"aime2024_cot","size":30,"models":50,"total_pairs":1884,"close_pairs":196,"no_solve":1,"tau-":0,"SE(A)":{"count":15.0,"mean":0.0757661695,"std":0.0081888594,"min":0.06203941,"25%":0.0700547379,"50%":0.075390136,"75%":0.0815995044,"max":0.0883683125},"SE_x(A)":{"count":15.0,"mean":0.056987735,"std":0.0072390594,"min":0.0435321007,"25%":0.0513103512,"50%":0.0579599652,"75%":0.0613942273,"max":0.0676468475},"SE_pred(A)":{"count":15.0,"mean":0.0498117326,"std":0.0052145058,"min":0.0402665725,"25%":0.0463781853,"50%":0.0482111505,"75%":0.0541537784,"max":0.0570987953},"SE(A-B)":{"count":196.0,"mean":0.0804852379,"std":0.007200148,"min":0.0619025714,"25%":0.0756942298,"50%":0.0811409623,"75%":0.086854883,"max":0.0941627958},"SE_x(A-B)":{"count":196.0,"mean":0.0374020693,"std":0.0112788116,"min":0.0115324809,"25%":0.030235192,"50%":0.0377680728,"75%":0.0469246844,"max":0.0566508654},"SE_pred(A-B)":{"count":196.0,"mean":0.0705627494,"std":0.0049999756,"min":0.0590972845,"25%":0.0668009477,"50%":0.0712176493,"75%":0.0740455827,"max":0.0806477964},"SE_signtest":{"count":196.0,"mean":0.0821462439,"std":0.0081028734,"min":0.0621688952,"25%":0.076847729,"50%":0.0824477556,"75%":0.0883415506,"max":0.0998666144},"corr(A,B)":{"count":196.0,"mean":0.7743442459,"std":0.120073408,"min":0.5142527435,"25%":0.6900164855,"50%":0.7716210296,"75%":0.8638364166,"max":0.9847991594},"sum(A!=B)":{"count":196.0,"mean":6.1319942636,"std":1.1896738061,"min":3.4784743799,"25%":5.3150161059,"50%":6.117874673,"75%":7.0238066117,"max":8.9760066118}}
