function area = calc_roc(fileName,varargin) % % RECEIVER-OPERATING CHARACTERISTICS (ROC) CURVE % % Receiver-Operating Characteristics (ROC) curve and the area % under the curve is returned for one or more experiments on the % same dataset. % % area = calc_roc(fileName,exp) % % fileName: The name of the output file. It is an image % exp: a structure representing an experiment with three fields % - exp.name: a string specifying the experiment name % - exp.pos: positive class predictions % - exp.neg: negative class predictions % area: a 1x1-dimensional cell array of the area under the roc curve. % % The function works for unlimited number of experiments. Therefore, the followings % are all possible. % % area = roc(fileName,exp1,exp2) % fileName: the same as above. % exp1: the same as exp given above. % exp2: another experiment result on the same dataset. % area: a 1x2-dimensional cell array of the area under the roc curves. % % Similarly, % area = roc(fileName,exp1,exp2,...,expN) is also possible. % % Anshul Kundaje % Jan 12, 2009 clc; close all; colors='rbmkgcye'; types='.ox+*sw'; exp_n = nargin-1; % number of experiments handle=figure; hold on; title('ROC CURVE (Boosting with alternating decision tree)'); xlabel('FALSE POSITIVE RATIO'); ylabel('TRUE POSITIVE RATIO'); axis([-0.01 1.00001 -0.0001 1.00001]); leg_names = cell(1,exp_n); %leg_names{nargin} = ''; area = cell(1,exp_n); for i=1:1:exp_n % for all experiments scolor=colors(mod(i,size(colors,2))); stype=types(mod(i,size(types,2))); leg_names{i} = varargin{i}.name; preds = [varargin{i}.pos; varargin{i}.neg]; % all predicted values nb_pos = length(varargin{i}.pos); % number of positives nb_neg = length(varargin{i}.neg); % number of negatives nb_total = nb_pos + nb_neg; % total number of values labels = [ones(nb_pos,1);zeros(nb_neg,1)]; % label all positives with 1 and negatives with 0 [preds,idx] = sort(preds,1,'descend'); % sort the predictions in descending order labels = labels(idx); % sort labels accordingly truepos = cumsum(labels); % TRUE POSITIVE RATE falsepos = ((1:nb_total)' - truepos); % FALSE POSITIVE RATE flags = ([diff(preds);1]~=0); % Select unique thresholds truepos = truepos(flags)/nb_pos; falsepos = falsepos(flags)/nb_neg; % form = sprintf('-%c%c', scolor, stype); form = sprintf('-%c.', scolor); plot(falsepos, truepos, form); area{1,i} = sum((falsepos(2:end)-falsepos(1:end-1)).* ... (truepos(2:end)+truepos(1:end-1))/2); leg_names{i} = [leg_names{i},sprintf(' AUC=%.4f',area{1,i})]; end legend(leg_names,'Location','SouthEast'); fprintf('SAVING FILE %s\n',fileName); print('-depsc','-r300',fileName); hold off;