Saturday 31 July 2010

This may seem obvious

For some, it may be obvious / straight forward to implement, however the number of times I have been approached to ask for a script to transform the output of integer based fingerprints, such as MACCS keys or pharmacophore fingerprints coming out of MOE (the Molecular Operating Environment) into binary 000110101 representations so that molecules can be compared based on their Tanimoto similarity or some other measure or for input into a machine learning algorithm is astounding.

So here is a useful script that will convert integer based fingerprints into binary fingerprints.

import os, sys

class ConvertIntegerFPToBinary():
def __init__(self,descriptorFile,outputFile,label):
self.iFile = descriptorFile
self.oFile = outputFile
self.binaryFingerprints = []
self.integerList = []
self.label = label

def populateList(self,maxBitSize):
for i in range(1,maxBitSize+1):
self.integerList.append(str(i))

def convertData(self,maximumBitSize):
self.populateList(maximumBitSize)
inputFile = open(self.iFile,'r')
data = inputFile.readlines()
for i in range(1,len(data)):
splitdata = str(data[i]).replace("\"", "").split()
binaryFingerprint = []
for j in range(len(self.integerList)):
if self.integerList[j] in splitdata:
binaryFingerprint.append(1)
else:
binaryFingerprint.append(0)
binaryFingerprint.append(self.label)
self.binaryFingerprints.append(binaryFingerprint)
return self.binaryFingerprints

def postProcessing(self,fp):
binaryFingerprint = str(fp).replace("[", "").replace("]", "").replace("'", "")
return binaryFingerprint

def writeToFile(self):
outputFile = open(self.oFile,'w')
for i in range(len(self.binaryFingerprints)):
processedFingerprint = self.postProcessing(str(self.binaryFingerprints[i]))
if i == len(self.binaryFingerprints)-1:
outputFile.write(processedFingerprint)
else:
outputFile.write(processedFingerprint+"\n")

if __name__ == '__main__':
converter = ConvertIntegerFPToBinary(sys.argv[1],sys.argv[2],sys.argv[3])
binaryFingerprints = converter.convertData(166)
converter.writeToFile()

No comments:

Post a Comment