import pandas as pd
import numpy as np
from rdkit.ML.Scoring import metrics
# the metrics is stolen from oddt
# https://github.com/oddt/oddt
读入打分结果
df = pd.read_csv('/public/gkxiao/work/demo/pde5_score.csv')
预览
df.head(5)
score | label | |
---|---|---|
0 | -9.971069 | active |
1 | -9.694971 | active |
2 | -11.613330 | active |
3 | -9.017618 | active |
4 | -12.542497 | active |
按打分值从低到高排列,打分最好的那个排第一
df_sorted = df.sort_values(by=['score'],ascending=True)
预览
df_sorted.head(5)
score | label | |
---|---|---|
32 | -16.976294 | active |
25 | -16.287430 | active |
10 | -16.262144 | active |
11 | -15.785425 | active |
24 | -15.641940 | active |
将打分值乘以-1,转化为正值以便进行性能计算
score = df_sorted['score']
x = np.array(score)*(-1)
将实验值active与decoy转化为1与0
y = df_sorted['label']
label_map = {'active': 1, 'decoy': 0}
labels = [label_map[y_true] for y_true in y]
y = np.array(labels)
计算AUC
auc = metrics.roc_auc(y, x, pos_label=1, ascending_score=False)
print("ROC AUC = ",auc)
ROC AUC = 0.7996552281420028
计算alpha=20.0的BEDROC
bedroc = metrics.bedroc(y, x, alpha=20.0, pos_label=1)
print("alpha=20.0 BEDROC = ",bedroc)
alpha=20.0 BEDROC = 0.4518017076489426
计算top 1%的富集因子
ef = metrics.enrichment_factor(y, x, percentage=1, pos_label=1, kind='fold')
print("EF at 1% = ",ef)
EF at 1% = 13.991228070175438
计算[0.0001,1]的半对数AUC
logauc = metrics.roc_log_auc(y, x, pos_label=1, ascending_score=False, log_min=0.001, log_max=1.0)
print("logAUC [0.1%,100%] = ",logauc)
logAUC [0.1%,100%] = 0.4205614548770312