{"benchmark_id":"human_eval_plus","size":164,"models":49,"total_pairs":1060,"close_pairs":986,"no_solve":10,"tau-":2,"SE(A)":{"count":44.0,"mean":0.0362431338,"std":0.0030590507,"min":0.0268500585,"25%":0.0345885236,"50%":0.0374104529,"75%":0.0387807191,"max":0.0390434155},"SE_x(A)":{"count":44.0,"mean":0.0271466058,"std":0.0048307503,"min":0.0120281586,"25%":0.0232856424,"50%":0.027533273,"75%":0.0304260428,"max":0.0360012079},"SE_pred(A)":{"count":44.0,"mean":0.0229110041,"std":0.006239927,"min":0.0093141782,"25%":0.0196778027,"50%":0.0230907479,"75%":0.0289344302,"max":0.0318547526},"SE(A-B)":{"count":986.0,"mean":0.0425193805,"std":0.0045292495,"min":0.0285767372,"25%":0.0397893523,"50%":0.0432867452,"75%":0.0459804204,"max":0.0504666992},"SE_x(A-B)":{"count":986.0,"mean":0.02439166,"std":0.0050545357,"min":0.0119442551,"25%":0.0207665751,"50%":0.0242015924,"75%":0.0278683187,"max":0.0391332502},"SE_pred(A-B)":{"count":986.0,"mean":0.0341694409,"std":0.0063565565,"min":0.0140982858,"25%":0.0300357101,"50%":0.0349005592,"75%":0.038806308,"max":0.0448805109},"SE_signtest":{"count":986.0,"mean":0.0433728429,"std":0.0046267917,"min":0.0285774113,"25%":0.0407722686,"50%":0.0442937803,"75%":0.0466523148,"max":0.0515468806},"corr(A,B)":{"count":986.0,"mean":0.6148890464,"std":0.1123099163,"min":0.2788394194,"25%":0.5375911844,"50%":0.6187559627,"75%":0.7000980764,"max":0.8953021571},"sum(A!=B)":{"count":986.0,"mean":51.172033558,"std":10.3992432449,"min":21.9651142542,"25%":44.7113157025,"50%":52.7683107437,"75%":58.5374892563,"max":71.4648479337}}
