{"benchmark_id":"human_eval_plus","size":164,"models":51,"total_pairs":1150,"close_pairs":1051,"no_solve":10,"tau-":2,"SE(A)":{"count":45.0,"mean":0.0361889095,"std":0.003021703,"min":0.0270387826,"25%":0.0345980155,"50%":0.0371984656,"75%":0.0386231041,"max":0.0390434405},"SE_x(A)":{"count":45.0,"mean":0.0275782922,"std":0.0054423762,"min":0.011792055,"25%":0.02311344,"50%":0.0278440092,"75%":0.0314648615,"max":0.0386231041},"SE_pred(A)":{"count":45.0,"mean":0.0214209336,"std":0.0084723212,"min":0.0,"25%":0.0164267611,"50%":0.0227110428,"75%":0.0285527841,"max":0.0325520678},"SE(A-B)":{"count":1051.0,"mean":0.0427830008,"std":0.0045542279,"min":0.0274702879,"25%":0.0400668771,"50%":0.0435306421,"75%":0.0461826836,"max":0.0509806409},"SE_x(A-B)":{"count":1051.0,"mean":0.026076579,"std":0.0057614603,"min":0.0094480451,"25%":0.0221031647,"50%":0.0257053837,"75%":0.0303035992,"max":0.0401447702},"SE_pred(A-B)":{"count":1051.0,"mean":0.0328718269,"std":0.0075797932,"min":0.0,"25%":0.027832832,"50%":0.0339414851,"75%":0.0385921064,"max":0.0459216308},"SE_signtest":{"count":1051.0,"mean":0.0436500317,"std":0.0046239447,"min":0.0274749293,"25%":0.0412513186,"50%":0.0443874528,"75%":0.046935379,"max":0.052100287},"corr(A,B)":{"count":1051.0,"mean":0.5311724569,"std":0.1142430611,"min":0.196580208,"25%":0.4540434004,"50%":0.5312440109,"75%":0.6136824923,"max":0.8597540551},"sum(A!=B)":{"count":1051.0,"mean":51.8201409708,"std":10.4258287872,"min":20.3030303021,"25%":45.7681623925,"50%":52.9917355362,"75%":59.2499999997,"max":73.0075757576}}
