{"benchmark_id":"human_eval","size":164,"models":51,"total_pairs":1056,"close_pairs":991,"no_solve":1,"tau-":0,"SE(A)":{"count":46.0,"mean":0.035367147,"std":0.0042969928,"min":0.0237545146,"25%":0.0337454035,"50%":0.0372998698,"75%":0.0386661228,"max":0.0390434405},"SE_x(A)":{"count":46.0,"mean":0.0257875581,"std":0.0059794621,"min":0.0120816415,"25%":0.0219912951,"50%":0.024488763,"75%":0.0303147885,"max":0.0389009192},"SE_pred(A)":{"count":46.0,"mean":0.022186305,"std":0.0088541002,"min":0.0,"25%":0.0171664889,"50%":0.0231923652,"75%":0.0301455336,"max":0.0331335386},"SE(A-B)":{"count":991.0,"mean":0.0438701896,"std":0.005121002,"min":0.0261990816,"25%":0.0407427714,"50%":0.0449295531,"75%":0.0475042454,"max":0.0525058098},"SE_x(A-B)":{"count":991.0,"mean":0.0261031178,"std":0.0059939311,"min":0.011928427,"25%":0.0218550194,"50%":0.0257316395,"75%":0.0302721592,"max":0.0420081867},"SE_pred(A-B)":{"count":991.0,"mean":0.0342553661,"std":0.0077561988,"min":0.0,"25%":0.02931898,"50%":0.0353448213,"75%":0.0400181795,"max":0.0467712593},"SE_signtest":{"count":991.0,"mean":0.0447112091,"std":0.0051487884,"min":0.026355164,"25%":0.0418227221,"50%":0.0456978194,"75%":0.0484502371,"max":0.0538705037},"corr(A,B)":{"count":991.0,"mean":0.4905175809,"std":0.1227061174,"min":0.1703289635,"25%":0.4110425257,"50%":0.4882031944,"75%":0.5764461087,"max":0.7998100855},"sum(A!=B)":{"count":991.0,"mean":54.4798783876,"std":11.8579670145,"min":18.6818181818,"25%":47.0448717946,"50%":56.1666666662,"75%":63.1363636356,"max":78.0530303024}}
