Professional Documents
Culture Documents
%Output: gives two continuously-valued scores for each putative drug-target interaction. Also,
%we calculate ROC curves and Area under them (AUC).
%General remarks:
%1) The code should work on similarity matrices also, and not just positive semi-definite
% kernel matrices.
%2) You have to have pre-calculated your similarity/kernel matrices in advance.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%
%here we load the dataset(s) and, as they are textfiles converted from a format needing to be read by
%"R", they have one extra column to be removed before proceeding. You have to comment or
uncomment the
%data set that you want to use.
% %1) ENZYMES
% %adjacency matrix:
% load e_admat_dgc2.txt
% y = e_admat_dgc2(:,2:(size(e_admat_dgc2,2)));
% %compound similarity matrix:
% load e_simmat_dc2.txt
% kCompound = e_simmat_dc2(:,2:(size(e_simmat_dc2,2)));
% %target similarity matrix:
% load e_simmat_dg2.txt
% kTarget = e_simmat_dg2(:,2:(size(e_simmat_dg2,2)));
% %3)GPCRs
% %adjacency matrix:
% load gpcr_admat_dgc2.txt
% y = gpcr_admat_dgc2(:,2:(size(gpcr_admat_dgc2,2)));
% %compound similarity matrix:
% load gpcr_simmat_dc2.txt
% kCompound = gpcr_simmat_dc2(:,2:(size(gpcr_simmat_dc2,2)));
% %target similarity matrix:
% load gpcr_simmat_dg2.txt
% kTarget = gpcr_simmat_dg2(:,2:(size(gpcr_simmat_dg2,2)));
%4)NUCLEAR RECEPTORS
%adjacency matrix:
load nr_admat_dgc2.txt
y = nr_admat_dgc2(:,2:(size(nr_admat_dgc2,2)));
%compound similarity matrix:
load "nr_simmat_dc2.txt"
kCompound = nr_simmat_dc2(:,2:(size(nr_simmat_dc2,2)));
%target similarity matrix:
load nr_simmat_dg2.txt
kTarget = nr_simmat_dg2(:,2:(size(nr_simmat_dg2,2)));
%We had problems with kCompound having imaginary eigenvalues. Here, we symmetrize kCompound
%before continuing:
kCompound = (kCompound + kCompound')/2;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Checking for positive semi-definite. This is because, at present, these are "similarity" matrices rather
%than "kernel" matrices. For now, we just add increments of a small epsilon to the diagonal, until the
%matrix becomes positive semi-definite:
epsilon = .1;
while sum(eig(kCompound) >= 0) < size(kCompound,1) | isreal(eig(kCompound))==0
kCompound = kCompound + epsilon*eye(size(kCompound,1));
end
while sum(eig(kTarget) >= 0) < size(kTarget,1) | isreal(eig(kTarget))==0
kTarget = kTarget + epsilon*eye(size(kTarget,1));
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
theseIndices = zeros(numOfSplits,2);
thebasic = floor(size(y,1)/numOfSplits);
theseIndices(1,1) = 1;
for gg = 1:(numOfSplits-1)
theseIndices(gg,2) = thebasic*gg;
theseIndices(gg+1,1) = thebasic*gg + 1;
end
theseIndices(numOfSplits,2) = size(y,1);
theseIndices2 = zeros(numOfSplits,2);
thebasic2 = floor(size(y,2)/numOfSplits);
theseIndices2(1,1) = 1;
for gg = 1:(numOfSplits-1)
theseIndices2(gg,2) = thebasic2*gg;
theseIndices2(gg+1,1) = thebasic2*gg + 1;
end
theseIndices2(numOfSplits,2) = size(y,2);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Fixing each target in turn, then doing ten fold cross validation
%on the set of compounds, to predict which ones
%are/are not targeting the target in question:
for i=1:size(y,1);
i
pause(.3)
currentY = y(i,:)';
for j = 1:lengthKCompound
%%%!%%%
whatSplit = sum(j >= theseIndices2(:,1));
%%%!%%%
%j;
if sum(currentY(setdiff(1:lengthKCompound,theseIndices2(whatSplit,1):theseIndices2(whatSplit,2)))
== 1) > 0
%these are the weights for positive and negative examples. They alter the behavior of libsvm.
m2a = 1;
m2b = 1;
%defining the train and test matrices that will be input into libsvm.
%%%!%%%
K1 = [[1:(lengthKCompound-(theseIndices2(whatSplit,2)-theseIndices2(whatSplit,1)+1))]'
kCompound(setdiff(1:lengthKCompound,
[theseIndices2(whatSplit,1):theseIndices2(whatSplit,2)]),setdiff(1:lengthKCompound,
[theseIndices2(whatSplit,1):theseIndices2(whatSplit,2)]))];
K2 = [(j)' kCompound(j,setdiff(1:lengthKCompound,
[theseIndices2(whatSplit,1):theseIndices2(whatSplit,2)]))];
%%%!%%%
%training:
model =
svmtrain(currentY(setdiff(1:lengthKCompound,theseIndices2(whatSplit,1):theseIndices2(whatSplit,2))),K
1,strcat(['-t 4 -c 1 -w1 ',num2str(m2a),' -w-1 ',num2str(m2b)]));
%testing:
[predict_label,accuracy,dec_values] = svmpredict(currentY(j),K2, model);
%%!%%
%this line is to fix a weakness in libsvm, the first training label is always automatically
%labeled +1, even if it's -1 !
firstLabel = currentY(1)*(j>theseIndices2(1,2)) +
currentY(theseIndices2(2,1))*(j<=theseIndices2(1,2));
%%!%%
myPredictions(j,i) = dec_values*sign(firstLabel - 1/2);
numbEdges1(j,i) =
sum(currentY(setdiff(1:lengthKCompound,theseIndices2(whatSplit,1):theseIndices2(whatSplit,2))) == 1);
else
myPredictions(j,i) = -1;
numbEdges1(j,i) = 0;
end
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Fixing each compound in turn, then doing ten fold cross validation
%on the set of targets, to predict which ones
%are/are not targeted by the compound in question:
for i= 1:size(y,2);
i;
currentY = y(:,i);
for j = 1:lengthKTarget
%%%!%%%
whatSplit = sum(j >= theseIndices(:,1));
%%%!%%%
if sum(currentY(setdiff(1:lengthKTarget,theseIndices(whatSplit,1):theseIndices(whatSplit,2))) == 1) >
0
%these are the weights for positive and negative examples. They alter the behavior of
%libsvm.
m2a = 1;
m2b = 1;
%matrices to input to libsvm:
K1 = [[1:(lengthKTarget-(theseIndices(whatSplit,2)-theseIndices(whatSplit,1)+1))]'
kTarget(setdiff(1:lengthKTarget,
[theseIndices(whatSplit,1):theseIndices(whatSplit,2)]),setdiff(1:lengthKTarget,
[theseIndices(whatSplit,1):theseIndices(whatSplit,2)]))];
K2 = [(j)' kTarget(j,setdiff(1:lengthKTarget,[theseIndices(whatSplit,1):theseIndices(whatSplit,2)]))];
%training:
model =
svmtrain(currentY(setdiff(1:lengthKTarget,theseIndices(whatSplit,1):theseIndices(whatSplit,2))),K1,strca
t(['-t 4 -c 1 -w1 ',num2str(m2a),' -w-1 ',num2str(m2b)]));
%testing:
[predict_label,accuracy,dec_values] = svmpredict(currentY(j),K2, model);
%fixing libsvms desire to label -1 examples as +1 examples if they are the first item in the
%training set.
firstLabel = currentY(1)*(j>theseIndices(1,2)) + currentY(theseIndices(2,1))*(j<=theseIndices(1,2));
myPredictions2(j,i) = dec_values*sign(firstLabel - 1/2);
numbEdges2(j,i) =
sum(currentY(setdiff(1:lengthKTarget,theseIndices2(whatSplit,1):theseIndices2(whatSplit,2))) == 1);
else
myPredictions2(j,i) = -1;
numbEdges2(j,i) = 0;
end
end
end
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%AREA UNDER ROC CURVE for fixing targets in turn:
myNumb = length((-2.5):.001:(2.5));
TruPos = zeros(1,myNumb);
FalPos = zeros(1,myNumb);
countMan = 0;
for moveit = (-2.5):.001:(2.5)
countMan = countMan + 1;
TruPos(countMan) = sum(sign(myScores + moveit)==1 & myTrueLabels==1);
FalPos(countMan) = sum(sign(myScores + moveit)==1 & myTrueLabels==0);
end
plot(FalPos/max(FalPos),TruPos/max(TruPos),'r');
hold;
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %calculating the area under the ROC curve:
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
xx = FalPos/max(FalPos);
yy = TruPos/max(TruPos);
areaUnderROC = 0;
old = [0 0];
for bb = 1:myNumb
new = [xx(bb) yy(bb)];
end
end
areaUnderROC
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %AREA UNDER ROC CURVE for fixing compounds in turn:
myNumb = length((-2.5):.001:(2.5));
TruPos2 = zeros(1,myNumb);
FalPos2 = zeros(1,myNumb);
countMan = 0;
for moveit = (-2.5):.001:(2.5)
countMan = countMan + 1;
TruPos2(countMan) = sum(sign(myScores2 + moveit)==1 & myTrueLabels2==1);
FalPos2(countMan) = sum(sign(myScores2 + moveit)==1 & myTrueLabels2==0);
end
plot(FalPos2/max(FalPos2),TruPos2/max(TruPos2),'b');
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %calculating the area under the ROC curve:
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
xx = FalPos2/max(FalPos2);
yy = TruPos2/max(TruPos2);
areaUnderROC = 0;
old = [0 0];
for bb = 1:myNumb
new = [xx(bb) yy(bb)];
end
end
areaUnderROC
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %AREA UNDER ROC CURVE for both situations added together:
%various ways of combining the two predictions made for each edge:
%1) adding them:
%myPredictions3 = myPredictions + myPredictions2';
%2) taking the largest:
myPredictions3 = max(myPredictions,myPredictions2');
myScores3 =[];
myTrueLabels3 = [];
for k = 1:lengthKCompound
myScores3 = [myScores3 myPredictions3(k,:)];
myTrueLabels3 = [myTrueLabels3 y(:,k)'];
end
myNumb = length((-5):.001:(5));
TruPos3 = zeros(1,myNumb);
FalPos3 = zeros(1,myNumb);
countMan = 0;
for moveit = (-5):.001:(5)
countMan = countMan + 1;
TruPos3(countMan) = sum(sign(myScores3 + moveit)==1 & myTrueLabels3==1);
FalPos3(countMan) = sum(sign(myScores3 + moveit)==1 & myTrueLabels3==0);
end
plot(FalPos3/max(FalPos3),TruPos3/max(TruPos3),'g');
hold off;
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %calculating the area under the ROC curve:
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
xx3 = FalPos3/max(FalPos3);
yy3 = TruPos3/max(TruPos3);
areaUnderROC3 = 0;
old3 = [0 0];
for bb = 1:length(xx3)
new3 = [xx3(bb) yy3(bb)];
end
end
areaUnderROC3
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%here we calculate area under precision-recall curve, only for the 3rd "max" data set.
%by copying and pasting and changing all the '3' to '1' or '2',
%you can do this for the TruePos1 etc., and TruePos2 etc.
%area under precision accuracy curve: (actually it's a code for area under the FDR curve, so
%in the last line we do "1 - result".
xxPR = TruPos3/max(TruPos3);
yyPR = FalPos3./(FalPos3 + TruPos3 + .000000001*(FalPos3==0 & TruPos3==0));
xRun = 0;
addToSum = 0;
myXa = 0;
for myCounta = 1:myNumb
if xxPR(myCounta) > xRun & myCounta > 1
myXb = xxPR(myCounta);
addToSum = addToSum + (myXb - myXa)*(yyPR(myCounta - 1) + yyPR(myCounta))/2;
myXa = myXb;
xRun = xxPR(myCounta);
elseif xxPR(myCounta) > xRun & myCounta == 1
myXa = xxPR(1);
xRun = xxPR(myCounta);
end
end
AUPR = 1 - addToSum
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%