import pandas as pd
import numpy as np
from scipy import stats
import scikit_posthocs as sp
rocs_tc=pd.read_csv('/public/gkxiao/work/dude_comparison/rocs_TanimotoCombo.csv')
rocs_tvc=pd.read_csv('/public/gkxiao/work/dude_comparison/rocs_RefTverskyCombo.csv')
glide=pd.read_csv('/public/gkxiao/work/dude_comparison/glide_score.csv')
gold=pd.read_csv('/public/gkxiao/work/dude_comparison/gold_score.csv')
dock=pd.read_csv('/public/gkxiao/work/dude_comparison/Mysinger2012.csv')
rocs_tc.head()
id | auc | logauc | adjusted_logauc | bedroc_3219 | bedroc_805 | bedroc_20 | ef_05 | ef_1 | ef_5 | ef_10 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | aa2ar | 0.686073 | 0.361098 | 0.216478 | 0.571057 | 0.324910 | 0.320903 | 36.077023 | 20.733921 | 5.809129 | 3.755187 |
1 | abl1 | 0.600522 | 0.309849 | 0.165229 | 0.494143 | 0.293588 | 0.292362 | 26.200999 | 18.013187 | 5.817787 | 3.241165 |
2 | ace | 0.760096 | 0.409436 | 0.264816 | 0.492874 | 0.324567 | 0.393120 | 26.152647 | 18.377536 | 7.580692 | 5.174890 |
3 | aces | 0.459566 | 0.136778 | -0.007842 | 0.094878 | 0.042937 | 0.059188 | 3.956608 | 1.985713 | 1.103174 | 0.728368 |
4 | ada | 0.935433 | 0.754320 | 0.609700 | 0.962906 | 0.790928 | 0.760584 | 59.591398 | 55.334869 | 14.576313 | 7.838148 |
rocs_tvc.head()
id | auc | logauc | adjusted_logauc | bedroc_3219 | bedroc_805 | bedroc_20 | ef_05 | ef_1 | ef_5 | ef_10 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | aa2ar | 0.771078 | 0.487842 | 0.343222 | 0.731100 | 0.485173 | 0.477556 | 51.420124 | 32.137578 | 9.460581 | 5.165975 |
1 | abl1 | 0.695311 | 0.348398 | 0.203778 | 0.551487 | 0.293764 | 0.309255 | 33.842957 | 18.013187 | 5.708018 | 4.065190 |
2 | ace | 0.761047 | 0.423822 | 0.279202 | 0.479790 | 0.339068 | 0.419762 | 26.152647 | 19.437778 | 8.076626 | 5.635668 |
3 | aces | 0.625177 | 0.263480 | 0.118860 | 0.151101 | 0.146089 | 0.229089 | 7.913215 | 7.060314 | 4.765712 | 3.222475 |
4 | ada | 0.928716 | 0.757569 | 0.612949 | 0.966054 | 0.798656 | 0.755293 | 59.591398 | 55.334869 | 14.147598 | 7.730776 |
glide.head()
id | auc | logauc | adjusted_logauc | bedroc_3219 | bedroc_805 | bedroc_20 | ef_05 | ef_1 | ef_5 | ef_10 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | aa2ar | 0.880935 | 0.492185 | 0.347565 | 0.363559 | 0.371942 | 0.503108 | 23.221992 | 22.185296 | 10.373444 | 6.618257 |
1 | abl1 | 0.810886 | 0.411228 | 0.266608 | 0.526228 | 0.330054 | 0.360812 | 31.659540 | 19.650749 | 6.366635 | 4.559605 |
2 | ace | 0.896240 | 0.605319 | 0.460699 | 0.789608 | 0.583127 | 0.618545 | 50.184810 | 36.401658 | 12.327481 | 7.088890 |
3 | aces | 0.650288 | 0.285071 | 0.140451 | 0.173581 | 0.193277 | 0.258135 | 10.550954 | 11.252375 | 5.251108 | 3.288690 |
4 | ada | 0.707181 | 0.297711 | 0.153091 | 0.097487 | 0.130502 | 0.246279 | 4.256528 | 6.384793 | 4.715866 | 3.650644 |
gold.head()
id | auc | logauc | adjusted_logauc | bedroc_3219 | bedroc_805 | bedroc_20 | ef_05 | ef_1 | ef_5 | ef_10 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | aa2ar | 0.702684 | 0.276809 | 0.132189 | 0.111483 | 0.117611 | 0.211602 | 6.220176 | 6.427516 | 4.522822 | 3.153527 |
1 | abl1 | 0.756282 | 0.323613 | 0.178993 | 0.301622 | 0.185755 | 0.252636 | 14.192208 | 10.371229 | 4.610322 | 3.790515 |
2 | ace | 0.724553 | 0.419914 | 0.275294 | 0.588642 | 0.400193 | 0.415882 | 33.927759 | 23.678748 | 8.005778 | 4.714112 |
3 | aces | 0.702930 | 0.413426 | 0.268806 | 0.687749 | 0.430125 | 0.391598 | 43.962308 | 27.799985 | 7.325075 | 4.259847 |
4 | ada | 0.712189 | 0.330099 | 0.185479 | 0.421469 | 0.282841 | 0.263216 | 27.667435 | 19.154378 | 4.715866 | 2.899041 |
dock.head()
target | AUC | logAUC | EF_1 | auc | logauc | |
---|---|---|---|---|---|---|
0 | aa2ar | 83.39 | 28.37 | 21.8 | 0.8339 | 0.2837 |
1 | abl1 | 81.03 | 26.11 | 19.2 | 0.8103 | 0.2611 |
2 | ace | 71.59 | 22.31 | 19.5 | 0.7159 | 0.2231 |
3 | aces | 80.67 | 24.73 | 20.1 | 0.8067 | 0.2473 |
4 | ada | 76.39 | 23.85 | 21.5 | 0.7639 | 0.2385 |
rocs_tc.mean()
auc 0.690637 logauc 0.401877 adjusted_logauc 0.257257 bedroc_3219 0.520945 bedroc_805 0.365542 bedroc_20 0.375949 ef_05 32.346422 ef_1 23.130959 ef_5 7.226497 ef_10 4.281168 dtype: float64
rocs_tvc.mean()
auc 0.755343 logauc 0.453545 adjusted_logauc 0.308925 bedroc_3219 0.543264 bedroc_805 0.405486 bedroc_20 0.432354 ef_05 33.882995 ef_1 25.341441 ef_5 8.416269 ef_10 5.000467 dtype: float64
glide.mean()
auc 0.798228 logauc 0.461166 adjusted_logauc 0.316546 bedroc_3219 0.481001 bedroc_805 0.384545 bedroc_20 0.440276 ef_05 28.791657 ef_1 22.934048 ef_5 8.652891 ef_10 5.341581 dtype: float64
gold.mean()
auc 0.717076 logauc 0.339548 adjusted_logauc 0.194928 bedroc_3219 0.291070 bedroc_805 0.232957 bedroc_20 0.294533 ef_05 17.183565 ef_1 13.414270 ef_5 5.722850 ef_10 3.824434 dtype: float64
dock.mean()
AUC 76.483137 logAUC 24.430686 EF_1 19.838235 auc 0.764831 logauc 0.244307 dtype: float64
rocs_tc.median()
auc 0.720996 logauc 0.372585 adjusted_logauc 0.227965 bedroc_3219 0.519991 bedroc_805 0.325774 bedroc_20 0.343566 ef_05 29.296569 ef_1 18.359102 ef_5 6.335236 ef_10 4.125275 dtype: float64
rocs_tvc.median()
auc 0.758210 logauc 0.433758 adjusted_logauc 0.289138 bedroc_3219 0.565297 bedroc_805 0.368938 bedroc_20 0.420751 ef_05 34.895504 ef_1 22.963946 ef_5 8.109239 ef_10 5.094502 dtype: float64
glide.median()
auc 0.820697 logauc 0.451755 adjusted_logauc 0.307135 bedroc_3219 0.517200 bedroc_805 0.377760 bedroc_20 0.429872 ef_05 30.449211 ef_1 22.477159 ef_5 8.521955 ef_10 5.258265 dtype: float64
gold.median()
auc 0.732792 logauc 0.329154 adjusted_logauc 0.184534 bedroc_3219 0.237596 bedroc_805 0.197072 bedroc_20 0.280156 ef_05 13.579315 ef_1 10.407030 ef_5 5.422138 ef_10 3.818062 dtype: float64
dock.median()
AUC 78.37500 logAUC 22.98500 EF_1 17.45000 auc 0.78375 logauc 0.22985 dtype: float64
logauc_rocs_tc=rocs_tc['adjusted_logauc'].tolist()
logauc_rocs_tvc=rocs_tvc['adjusted_logauc'].tolist()
logauc_glide=glide['adjusted_logauc'].tolist()
logauc_gold=gold['adjusted_logauc'].tolist()
logauc_dock=dock['logauc'].tolist()
data_logauc = np.array([logauc_rocs_tc,logauc_rocs_tvc,logauc_glide,logauc_gold,logauc_dock])
stats.friedmanchisquare(logauc_rocs_tc,logauc_rocs_tvc,logauc_glide,logauc_gold,logauc_dock)
FriedmanchisquareResult(statistic=87.47450980392159, pvalue=4.527108201001697e-18)
#perform Nemenyi post-hoc Test
sp.posthoc_nemenyi_friedman(data_logauc.T)
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
0 | 1.000000 | 0.001000 | 0.001000 | 0.130021 | 0.601174 |
1 | 0.001000 | 1.000000 | 0.525782 | 0.001000 | 0.028642 |
2 | 0.001000 | 0.525782 | 1.000000 | 0.001000 | 0.001000 |
3 | 0.130021 | 0.001000 | 0.001000 | 1.000000 | 0.001573 |
4 | 0.601174 | 0.028642 | 0.001000 | 0.001573 | 1.000000 |
0:ROCS TanimotoCombo;1:ROCS RefTverskyCombo;2:GLIDE;3:GOLD;4:DOCK