B
    3RcvP              M   @   sD  d dl mZ d dlmZ d dlmZ d dlmZmZ d dlZd dl	Z
ddlmZ G dd	 d	eZdeddZedddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXgLZdfdYdZZdgd[d\Zd]d^ Zdhd_d`Zdadb Zdcdd ZdS )i    )Chem)AllChem)	rdqueries)defaultdictCounterN   )utilsc               @   sb   e Zd ZdddddddgZdd	 Zd
d ZdddZdd Zdd Zdd Z	dd Z
dddZdS )MoleculeDetailsdetailFP
scaffoldFPbitInfoDetailFPbitInfoScaffoldFP
reactivityZbitReactivitymoleculec             C   s   | d | }t| | }dg| }xt| D ]\}}g }||  ||	  ||
  ||| d ||| d ||  ||  tt|d@ ||< q<W |S )NFr         l    )ZUpdatePropertyCacheGetNumAtomsr   ZGetSSSRZGetRingInfo	enumerateGetAtomsappendGetAtomicNumZGetTotalDegreeGetTotalNumHsZIsAtomInRingOfSizeZGetIdxIsInRingGetIsAromatichashtuple)selfmol	num_atomsZrinfo
invariantsiadescriptors r#   :share/RDKit/Contrib/RxnRoleAssignment/identifyReactants.py_atomDetailInvariant3   s     


z$MoleculeDetails._atomDetailInvariantc             C   sV   |  }dg| }x>t| D ].\}}g }||  tt|d@ ||< q W |S )Nr   l    )r   r   r   r   r   r   r   )r   r   r   r   r    r!   r"   r#   r#   r$   _atomScaffoldInvariantE   s    
z&MoleculeDetails._atomScaffoldInvariantTr   c             C   s   t j|||||dS )N)r   radiusr   useBondTypesZbitInfo)r   ZGetMorganFingerprint)r   r   Z	invariantZbitinfor(   r'   r#   r#   r$   	_createFPN   s    zMoleculeDetails._createFPc             C   s   |  dkS )N)r   r   )r   )r   r!   r#   r#   r$   _isHeteroAtomQ   s    zMoleculeDetails._isHeteroAtomc             C   sB   |  dkrdS | rdS x | D ]}| dkr&dS q&W dS )Nr   FTg      ?)r   r   GetBondsGetBondTypeAsDouble)r   r!   br#   r#   r$   _isSp3OrAromaticCarbonT   s    z&MoleculeDetails._isSp3OrAromaticCarbonc             C   s  |  |s&t| dkr*| dkr*dS d}| }| |sL| dkrT|d7 }| rn| rv|d7 }n|d7 }| r|d7 }x|D ]~}|	|}|
 dkr|d7 }| dkr|d7 }| |r|d7 }| dkr| dkr|d7 }q| dkr|d7 }qW |S )Nr   r   g      ?   g      ?)      )         .   2   )r.   lenZGetNeighborsZGetFormalCharger+   r*   r   r   r   ZGetOtherAtomr,   r   )r   r!   r   r-   ZboZnir#   r#   r$   _calcReactivityAtom^   s2    &




z#MoleculeDetails._calcReactivityAtomc                s    fdd|  D }|S )Nc                s   g | ]}  |qS r#   )r8   ).0r!   )r   r#   r$   
<listcomp>   s    z;MoleculeDetails._calcReactivityMolecule.<locals>.<listcomp>)r   )r   r   reactivityAtomsr#   )r   r$   _calcReactivityMolecule   s    z'MoleculeDetails._calcReactivityMoleculer   c             C   sx   || _ i | _| || || j| _i | _| j|| || jdd| _| |}t	|}t
|tkrn|d9 }|| _d S )NF)r(   g?)r   r   r)   r%   r
   r   r&   r   r<   sumr   ZMolToSmilesfrequentReagentsr   )r   r   verboser;   r   r#   r#   r$   __init__   s    
zMoleculeDetails.__init__N)Tr   )r   )__name__
__module____qualname__	__slots__r%   r&   r)   r*   r.   r8   r<   r@   r#   r#   r#   r$   r	   /   s   	

(r	   Fc          
   C   s"  |rt d d}||  }tt| }|dkr8t d| tt|}|dkrXt d| tt|}|dkrxt d| tt|}	|dkrt d|	 d}
d}|d k	rt||\}
}|dkrt d	|
 d
||  }|	| }t|||  d}|d
krt d|d|d|| d|d	 |||
|gS )Nz--- _calcScore ---r   r/   znum RBits: znum PBits: znum UnmappedPBits: znum UnmappedRBits: znum UnmappedPAtoms: r   zscore: (,))printfloatr   ZgetNumPositiveCountsZgetNumNegativeCounts getNumPositiveBitCountsOfRadius0max)Z
reactantFPZ	productFPbitInfoProdoutputscoreZdFPZnumRBitsZnumPBitsZnumUnmappedPBitsZnumUnmappedRBitsnumUnmappedPAtomsbitsUnmappedPAtomsZratioMappedPBitsZratioUnmappedRBitsr#   r#   r$   
_calcScore   s8    





rR   z	CCN(CC)CCz[Li+]z[Na+]zO=C(O)CC(O)(CC(=O)O)C(=O)OzO=S(=O)(O)Oz
CN1CCCC1=OzCCN(C(C)C)C(C)CZc1ccncc1z[K]z	CC(C)(C)OZCCOzCc1ccc(S(=O)(=O)O)cc1zClC(Cl)(Cl)Clz[Na]zCC(C)(C)[O-]z
O=C([O-])OZCOCCOCz[NH4+]zCC(C)OC(C)CzO=C([O-])[O-]zCC(=O)OC(C)=OzO=C=Oz[Cl-]zc1ccc(P(c2ccccc2)c2ccccc2)cc1z[H-]zN#NZ	CN1CCOCC1ZC1COCCO1Zc1ccccc1z[Cs+]z[K+]z[OH-]ZCCCCCCZCCCCCzCN(C)C=OzC[O-]Z	Cc1ccccc1zC1CCC2=NCCCN2CC1ZCOZCCCCOzO=C(O)C(F)(F)FzO=P([O-])([O-])[O-]z	CCOC(C)=Oz[Mg+2]ZC1CCCCC1ONZIIzO=COzCC(=O)N(C)CzCC(=O)OZCCOCCzCC(C)OzC[Si](C)(C)ClZ
Cc1ccccc1CzCC(C)=OzCS(=O)(=O)OzCN(C)c1ccncc1ZClZClCCClz	O=S(Cl)Clz	ClC(Cl)Clz[Li]CCCCz[Pd]z[H][H]z[Br-]zCS(C)=Oz
COC(C)(C)CzO=S(=O)([O-])[O-]zCC(Cl)Clz
CC(=O)[O-]zCCCC[N+](CCCC)(CCCC)CCCCZClCClzCC#NZC1CCOC1ZCCCCCCCc                s  |rt d g }t }xtd|d D ]}xtt||D ]~}g }xb|D ]Z} | d s | d j }	|	dkr| | d j |f qL|dkrLt d| qLW ||kr>|| q>W q(W d}
d}g g}t	dd |D }t	dd |D }d}x|D ]}||j 7 }qW i }i }x&|D ]}|
|j |
|j q*W t||\}}t| t| ||gg}xr|D ]h}t|dk rqt|d d df  }|d	| ks||d
 k rԐq|dkrt d| t	 fdd|D }t	 fdd|D }t||||d}t||||d}|d |d  }|dkrt d| t d|d |d  t d|d |d |d |d  ||
kr||}
|d }|d d = |d d = |dd |D  ||d |d |d |d g |dkrt d|
 t d| |d dkr2|d d
kr2||fS t|dkrt|dkr|d d dkr|d d dkr||fS nnt||
 dk r|dkr|dd |D  ||d |d |d |d g |dkrt d t d| qW ||fS )Nz--- _getBestCombination ---r   r      zFrequent reagent found: c             S   s   g | ]
}|j qS r#   )r
   )r9   r    r#   r#   r$   r:      s    z'_getBestCombination.<locals>.<listcomp>c             S   s   g | ]
}|j qS r#   )r   )r9   r    r#   r#   r$   r:      s    r   g?zCombination: c                s   g | ]} |d   d j qS )r   r   )r
   )r9   r    )rfpsr#   r$   r:      s    c                s   g | ]} |d   d j qS )r   r   )r   )r9   r    )rV   r#   r$   r:      s    )rM   rN   z>>>> score: z >>>> scores (detail, scaffold): z">>>> num unmapped productFP bits: r/   c             S   s   g | ]}|d  qS )r   r#   )r9   r    r#   r#   r$   r:     s    rE   z >> maxScore: z >> Final reactants: gH.?gHz>g        c             S   s   g | ]}|d  qS )r   r#   )r9   r    r#   r#   r$   r:   "  s    z >> Added alternative result)rI   r7   range	itertoolscombinationsr   r   r   r   Z	getSumFpsupdater   r   rK   ZGetNonzeroElementsnparrayr=   rR   abs)rV   pfpsrN   ZtestsnumReactantsr    xtempjZnumAtmsZmaxScoreZmaxDetailScoreZfinalReactsZproductsDetailFPZproductsScaffoldFPZnumProductAtomsZproductsDetailFPBitInfoZproductsScaffoldFPBitInforP   rQ   ZfinalNumUnmappedProdAtomsZtestZnumReactantAtomsZreactantsDetailFPZreactantsScaffoldFPZdetailFPScoreZscaffoldFPScorerO   r#   )rV   r$   _getBestCombination   s    







 


"


$
"
rc   c          	      st  |rt d t|s|S g }t }x4t||D ]$\}}|| |d dkr2tt|t|}	t|	 fdddd}	g }
xn|d D ]b\}}xXtt|D ]H}||| jkr|| j| d }|
	|| j
|d  g|  qW qW t|
}
|dkrt |
 d	d
 |
D }d}d}g }x |	D ]}|dkrbt d| | jt | j
   d}x8|D ]0\}}t | j
||
| krl|d7 }qlW |dkrt d| t d| ||kr|} | jt | j
  }|g}n@|r0||kr0 | jt | j
  |kr0|}|| |dkr.t d| q.W |d 	| q2W |dkrpt d| |S )Nz%--- _findMissingReactiveReactants ---r   r   c                s    |  j t |  j  S )N)r   rJ   r   r   )r`   )rV   r#   r$   <lambda>:      z/_findMissingReactiveReactants.<locals>.<lambda>T)keyreverserE   c             S   s   g | ]}t ||fqS r#   )r   ZAtomNumEqualsQueryAtom)r9   r!   r#   r#   r$   r:   G  s    z1_findMissingReactiveReactants.<locals>.<listcomp>z >> Reactantz Max reactivityz Max fulfilled queriesz Added reactantsz >> Final reactants)rI   r7   zipr   setrW   
differencesortedr   extendr   ZGetAtomWithIdxr   r   r   rJ   r   ZGetAtomsMatchingQuery)rV   r^   ZcurrentReactantsZunmappedPAtomsrN   ZfinalReactantsr_   reactsZumPAZremainingReactantsZmissingPAtomsbitcZpbir!   ZqueriesZmaxFullfilledQueriesZmaxReactivityZaddReactantsrZcountFullfilledQueriesqr#   )rV   r$   _findMissingReactiveReactants*  sb    
,

$



"



rr   c             C   s\   t  }t  }xFt| D ]:\}}x0t|D ]$\}}||kr(|| || q(W qW ||fS )N)ri   r   add)	reactantsproductsZunchangedReactsZunchangedProdsr    rp   rb   pr#   r#   r$   _detectObviousReagentse  s    
rw   c                s  t | }t | |r td | |  t\}t \}}t|\}|rrtd| td t	 t	krt
 dd tt
| D fddtt
| D }fddD } fdd|D }	fd	dD }
t|
|	|d
\}}t|jdkr@fddD }
t|
|	|d
\}}t||	|||d
}g }x.|D ]&}fdd|D }|t
| q\W ||fS )Nz--- identifyReactants ---z"  >>> Found reagents in reactants:z!  >>> Found reagents in products:c             S   s   g | ]}|qS r#   r#   )r9   rp   r#   r#   r$   r:     s    z%identifyReactants.<locals>.<listcomp>c                s   g | ]}| kr|qS r#   r#   )r9   rv   )unmodifiedProductsr#   r$   r:     s    c                s   g | ]}t  | qS r#   )r	   )r9   rp   )rt   r#   r$   r:     s    c                s   g | ]}t  | qS r#   )r	   )r9   rv   )ru   r#   r$   r:     s    c                s$   g | ]}t |  | tkfqS r#   )r	   r>   )r9   rp   )reactantSmilesrt   r#   r$   r:     s    )rN   )r   r   c                s   g | ]}t  | d fqS )r   )r	   )r9   rp   )rt   r#   r$   r:     s    c                s   g | ]} | qS r#   r#   )r9   rb   )uniquePotentialReactantsr#   r$   r:     s    )r   ChemicalReactionZ!RemoveMappingNumbersFromReactionsrI   ZGetReactantsZGetProductsr   ZuniqueMoleculesrw   r7   ri   rk   valuesrc   r[   r\   shaperr   r   )ZreactionrN   rxnZuniqueReactantsZuniqueProductsZproductSmilesZunmodifiedReactantsZuniquePotentialProductsrV   r^   ZrfpsPreprm   ZunmappedProdAtomsZfinalreactsr    ra   r#   )ru   ry   rt   rz   rx   r$   identifyReactantso  s:    




r   c             C   s   t |  t| \}}}t|dk r(d S t }x$t|  D ]}|| 	| q>W xBt| 
 D ]2}||d kr|| | qd|| | qdW |S )Nr   r   )r   ZtransferAgentsToReactantsr   r7   r   r{   rW   ZGetNumProductTemplatesZAddProductTemplateZGetProductTemplateZGetNumReactantTemplatesZAddReactantTemplateZGetReactantTemplateZAddAgentTemplate)r~   rm   ZrAgentsZpAgentsnew_rxnr    r#   r#   r$   reassignRXNRoles  s    
r   c             C   s0   t j| dd}t|}|d kr"dS t |}|S )NT)Z	useSmiles )r   ZReactionFromSmartsr   ZReactionToSmiles)Zsmir~   r   Zsmi_newr#   r#   r$   reassignReactionRoles  s    
r   )NF)F)F)F)Zrdkitr   Z
rdkit.Chemr   r   collectionsr   r   rX   Znumpyr[   r   r   objectr	   rR   ri   r>   rc   rr   rw   r   r   r   r#   r#   r#   r$   <module>$   s,   h
"
g
;

,