{"benchmark_id":"cruxeval_input_cot","size":800,"models":52,"total_pairs":1422,"close_pairs":669,"no_solve":5,"tau-":12,"SE(A)":{"count":45.0,"mean":0.0164803875,"std":0.0013315066,"min":0.0126531932,"25%":0.0159109355,"50%":0.0169748525,"75%":0.0175183401,"max":0.0176626233},"SE_x(A)":{"count":45.0,"mean":0.0118474057,"std":0.0022594281,"min":0.0058016523,"25%":0.0105885851,"50%":0.0122979799,"75%":0.0128735129,"max":0.0174364817},"SE_pred(A)":{"count":45.0,"mean":0.0109025937,"std":0.0030536468,"min":0.0,"25%":0.0108985508,"50%":0.0118069221,"75%":0.0122787233,"max":0.0134642308},"SE(A-B)":{"count":669.0,"mean":0.0192033416,"std":0.0009915732,"min":0.0143082713,"25%":0.0186493351,"50%":0.019314693,"75%":0.0198309065,"max":0.0218420248},"SE_x(A-B)":{"count":669.0,"mean":0.009992362,"std":0.0028644343,"min":0.0053401194,"25%":0.0080467164,"50%":0.0092510705,"75%":0.0112916955,"max":0.0206149415},"SE_pred(A-B)":{"count":669.0,"mean":0.0160344543,"std":0.0021478434,"min":0.0005175492,"25%":0.0155449464,"50%":0.0166327306,"75%":0.0173786376,"max":0.0190207721},"SE_signtest":{"count":669.0,"mean":0.0193049655,"std":0.0010030304,"min":0.0143089722,"25%":0.0187227812,"50%":0.0194409719,"75%":0.0199301466,"max":0.0221081893},"corr(A,B)":{"count":669.0,"mean":0.5978480811,"std":0.1378508569,"min":0.2159734539,"25%":0.4966452701,"50%":0.6193263555,"75%":0.7020495748,"max":0.8865539976},"sum(A!=B)":{"count":669.0,"mean":239.1592055502,"std":24.3357099282,"min":131.0378787845,"25%":224.3472222196,"50%":241.8888888864,"75%":254.2148760284,"max":312.8141025609}}
