B
    3Rc-              t   @   s  d Z ddlmZ ddlZddlmZ ddlmZmZ ddlm	Z
 ddlmZ edd	Zed
dZeddddddddZeddddddddZeddddddddZedZdddddddd d!d"d#gZd$d% eD Zd&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddddddddddddddddddddddddgtZdd% eD ZeddddddddeddddddddeddddddddeddddddddeddddddddeddddddddeddddddddedddddddddќZddӄ ZddՄ Zedd׍edfddلZddۄ Zdd݄ Zdd߄ Zdd ZdddZ e!dkre   dS )ad  

QED stands for quantitative estimation of drug-likeness and the concept was for the first time
introduced by Richard Bickerton and coworkers [1]. The empirical rationale of the QED measure
reflects the underlying distribution of molecular properties including molecular weight, logP,
topological polar surface area, number of hydrogen bond donors and acceptors, the number of
aromatic rings and rotatable bonds, and the presence of unwanted chemical functionalities.

The QED results as generated by the RDKit-based implementation of Biscu-it(tm) are not completely
identical to those from the original publication [1]. These differences are a consequence of
differences within the underlying calculated property calculators used in both methods. For
example, discrepancies can be noted in the results from the logP calculations, nevertheless
despite the fact that both approaches (Pipeline Pilot in the original publication and RDKit
in our Biscu-it(tm) implementation) mention to use the Wildman and Crippen methodology for the
calculation of their logP-values [2]. However, the differences in the resulting QED-values
are very small and are not compromising the usefulness of using Qed in your daily research.

[1] Bickerton, G.R.; Paolini, G.V.; Besnard, J.; Muresan, S.; Hopkins, A.L. (2012)
    'Quantifying the chemical beauty of drugs',
    Nature Chemistry, 4, 90-98 [https://doi.org/10.1038/nchem.1243]

[2] Wildman, S.A.; Crippen, G.M. (1999)
    'Prediction of Physicochemical Parameters by Atomic Contributions',
    Journal of Chemical Information and Computer Sciences, 39, 868-873 [https://doi.org/10.1021/ci990307l]

    )
namedtupleN)Chem)MolSurfCrippen)rdMolDescriptors)setDescriptorVersionQEDpropertiesz%MW,ALOGP,HBA,HBD,PSA,ROTB,AROM,ALERTSADSparameterzA,B,C,D,E,F,DMAXg      ?g      ?g        g      ?gQ?gq=
ףp?g?gQ?gQ?g?gQ?gffffff?z[$([A;R][!a])]z[oH0;X2]z[OH1;X2;v2]z[OH0;X2;v2]z[OH0;X1;v2]z[O-;X1]z[SH0;X2;v2]z[SH0;X1;v2]z[S-;X1]z[nH0;X2]z[NH0;X1;v3]z[$([N;+0;X3;v3]);!$(N[C,S]=O)]c             C   s   g | ]}t |qS  )r   MolFromSmarts).0Zhbar
   r
   -lib/python3.7/site-packages/rdkit/Chem/QED.py
<listcomp>Y   s    r   z*1[O,S,N]*1z[S,C](=[O,S])[F,Br,Cl,I]z[CX4][Cl,Br,I]z[#6]S(=O)(=O)O[#6]z[$([CH]),$(CC)]#CC(=O)[#6]z[$([CH]),$(CC)]#CC(=O)O[#6]zn[OH]z[$([CH]),$(CC)]#CS(=O)(=O)[#6]zC=C(C=O)C=Ozn1c([F,Cl,Br,I])cccc1z	[CH1](=O)z[#8][#8]z[C;!R]=[N;!R]z[N!R]=[N!R]z[#6](=O)[#6](=O)z
[#16][#16]z	[#7][NH2]zC(=O)N[NH2]z[#6]=SzQ[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]zC1(=[O,N])C=CC(=[O,N])C=C1zC1(=[O,N])C(=[O,N])C=CC=C1za21aa3a(aa1aaaa2)aaaa3za31a(a2a(aa1)aaaa2)aaaa3za1aa2a3a(a1)A=AA=A3=AA=A2zc1cc([NH2])ccc1zt[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]IzOS(=O)(=O)[O-]z[N+](=O)[O-]z
C(=O)N[OH]zC1NC(=O)NC(=O)1z[SH]z[S-]z-c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]z-c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]z%[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1z[CR1]1[CR1][CR1]cc[CR1][CR1]1z*[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1z"[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1z-[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1z4[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1zC#Cz5[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]z![$([N+R]),$([n+R]),$([N+]=C)][O-]z
[#6]=N[OH]z
[#6]=NOC=Oz[#6](=O)[CX4,CR0X3,O][#6](=O)zc1ccc2c(c1)ccc(=O)o2z[O+,o+,S+,s+]zN=C=Oz[NX3,NX4][F,Cl,Br,I]zc1ccccc1OC(=O)[#6]z[CR0]=[CR0][CR0]=[CR0]z[C+,c+,C-,c-]zN=[N+]=[N-]zC12C(NC(N1)=O)CSC2zc1c([OH])c([OH,NH2,NH])ccc1Pz
[N,O,S]C#NzC=C=Oz[Si][F,Cl,Br,I]z[SX2]Oz([SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)ZO1CCCCC1OC2CCC3CCCCC3C2zN=[CR0][N,n,O,S]z^[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2z
C=[C!r]C#Nz:[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1z:[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1z:[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])z[OH]c1ccc([OH,NH2,NH])cc1zc1ccccc1OC(=O)Oz
[SX2H0][N]zc12ccccc1(SC(S)=N2)zc12ccccc1(SC(=S)N2)z
c1nnnn1C=Ozs1c(S)nnc1NC=Oz
S1C=CSC1=Sz	C(=O)OnnnzOS(=O)(=O)C(F)(F)FzN#CC[OH]zN#CC(=O)zS(=O)(=O)C#Nz	N[CH2]C#Nz
C1(=O)NCC1zS(=O)(=O)[O-,OH]zNC[F,Cl,Br,I]zC=[C!r]Oz[NX2+0]=[O+0]z[OR0,NR0][OR0,NR0]z&C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]z[CX2R0][NX3R0]zc1ccccc1[C;!R]=[C;!R]c2ccccc2z3[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]zS[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]z;[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]z[*]=[N+]=[*]z[SX3](=O)[O-,OH]zN#NzF.F.F.Fz[R0;D2][R0;D2][R0;D2][R0;D2]z[cR,CR]~C(=O)NC(=O)~[cR,CR]zC=!@CC=[O,S]z[#6,#8,#16][#6](=O)O[#6]zc[C;R0](=[O,S])[#6]zc[SX2][C;!R]zC=C=Czc1nc([F,Cl,Br,I,S])ncc1zc1ncnc([F,Cl,Br,I,S])c1zc1nc(c2c(n1)nc(n2)[F,Cl,Br,I])z[#6]S(=O)(=O)c1ccc(cc1)Fz[15N]z[13C]z[18O]z[34S]c             C   s   g | ]}t |qS r
   )r   r   )r   Zsmartsr
   r
   r   r      s    g䬅Y@gH:5x@gw+r@gJi[@g[ᆭH@g:WP@g,Y_n>Z@)ABCDEFDMAXg7a	@gae;a@ghFG@g*;I/tS@g2S?gXsq?gKw2j`@g;6IƖ@g"V6d@g@gc*s@gHՍ?gGM?g|טb@g]
?gXh@g>}?g&.>gkΞ?g2x?g2"p@gdp?gc7IN_@gۜ0tO@gI9ZU@g5=
(@g(&-d<@ghL7d$Z@g{Gz?g"q@g!w@gq{?g2
VX?gn@g fcJ\Z@gޢ$	@g-7@gD"p2@gB?g]w?gvSkes@gH`@g !g؎rȷ?gHc?gLUz@)MWALOGPHBAHBDPSAROTBAROMALERTSc             C   sz   |}dt d| |j |jd   |j  }dt d| |j |jd   |j  }|j|j| dd|    }||j S )z ADS function       )	mathexpr   r   r   r   r   r   r   )xZadsParameterpZexp1Zexp2Zdxr
   r
   r   ads   s
    ((r'   c                s    dkrt dt  tt t t fddt	D t
 t t tjjtttt tt fddtD d}|S )zR
  Calculates the properties that are required to calculate the QED descriptor.
  Nz#You need to provide a mol argument.c             3   s&   | ]}  |rt |V  qd S )N)HasSubstructMatchlenZGetSubstructMatches)r   pattern)molr
   r   	<genexpr>   s    zproperties.<locals>.<genexpr>c             3   s   | ]}  |rd V  qdS )r    N)r(   )r   Zalert)r+   r
   r   r,     s    )r   r   r   r   r   r   r   r   )
ValueErrorr   ZRemoveHsr   rdmdZ
_CalcMolWtr   ZMolLogPsum	AcceptorsZ
CalcNumHBDr   ZTPSAZCalcNumRotatableBondsZNumRotatableBondsOptionsZStrictr)   ZGetSSSRZDeleteSubstructsZMolAliphaticRingsStructuralAlerts)r+   qedPropertiesr
   )r+   r   
properties   s    
r4   z1.1.0)versionc             C   sP   |dkrt | }dd |  D }tdd t||D }t|t| S )a   Calculate the weighted sum of ADS mapped properties

  some examples from the QED paper, reference values from Peter G's original implementation
  >>> m = Chem.MolFromSmiles('N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O')
  >>> qed(m)
  0.253...
  >>> m = Chem.MolFromSmiles('CNC(=NCCSCc1nc[nH]c1C)NC#N')
  >>> qed(m)
  0.234...
  >>> m = Chem.MolFromSmiles('CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12')
  >>> qed(m)
  0.234...
  Nc             S   s   g | ]\}}t |t| qS r
   )r'   adsParameters)r   nameZpir
   r
   r   r     s    zqed.<locals>.<listcomp>c             s   s    | ]\}}|t | V  qd S )N)r#   log)r   ZwiZdir
   r
   r   r,      s    zqed.<locals>.<genexpr>)r4   _asdictitemsr/   zipr#   r$   )r+   wr3   dtr
   r
   r   qed  s
    r?   c             C   s   t | tdS )zE
  Calculates the QED descriptor using maximal descriptor weights.
  )r<   )r?   
WEIGHT_MAX)r+   r
   r
   r   weights_max$  s    rA   c             C   s   t | tdS )zE
  Calculates the QED descriptor using average descriptor weights.
  )r<   )r?   WEIGHT_MEAN)r+   r
   r
   r   weights_mean+  s    rC   c             C   s   t | tdS )z7
  Calculates the QED descriptor using unit weights.
  )r<   )r?   WEIGHT_NONE)r+   r
   r
   r   weights_none2  s    rE   c             C   s   t | S )zE
  Calculates the QED descriptor using average descriptor weights.
  )rC   )r+   r
   r
   r   default9  s    rF   c             C   s2   dd l }dd l}|j|j| d\}}|| d S )Nr   )Zoptionflagsverbose)sysdoctestZtestmodELLIPSISexit)rG   rH   rI   Zfailed_r
   r
   r   _runDoctestsD  s    rM   __main__)N)"__doc__collectionsr   r#   Zrdkitr   Z
rdkit.Chemr   r   r   r.   Z(rdkit.Chem.ChemUtils.DescriptorUtilitiesr   r   r	   r@   rB   rD   r   r1   ZAcceptorSmartsr0   ZStructuralAlertSmartsr2   r6   r'   r4   r?   rA   rC   rE   rF   rM   __name__r
   r
   r
   r   <module>8   sP  


	

