Member-only story

Molecule Dataframe in Python

Patrick Chirdon
2 min readOct 31, 2020

--

This will produce a dataframe with the following columns.

newdf=pd.DataFrame(columns=(‘smiles’,’qed’, ‘MW’,’LogP’,’NumHDonors’,’NumHAcceptors’, ‘avgenergy’))

The values will be sorted by QED.

import numpy as np
import json
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from model import MoleculeVAE
from utils import encode_smiles, decode_latent_molecule, interpolate, get_unique_mols
from rdkit import Chem
from rdkit import RDLogger

import numpy as np
from rdkit import Chem
from rdkit.Chem import Descriptors, Lipinski
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import Descriptors, Lipinski

def lipinski(moldata, verbose=False):


baseData= np.arange(1,1)
i=0
for mol in moldata:

desc_MolWt = Descriptors.MolWt(mol)
desc_MolLogP = Descriptors.MolLogP(mol)
desc_NumHDonors = Lipinski.NumHDonors(mol)
desc_NumHAcceptors = Lipinski.NumHAcceptors(mol)

row = np.array([desc_MolWt,
desc_MolLogP,
desc_NumHDonors,
desc_NumHAcceptors])

if(i==0):
baseData=row
else:
baseData=np.vstack([baseData, row])
i=i+1

--

--

No responses yet