Source code for defcom.performance_stats

#---------------------------------------------------------------------
# DeFCoM: A supervised learning genomic footprinter
# Copyright (C) 2016  Bryan Quach
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#----------------------------------------------------------------------

import uuid
import subprocess
from numpy import concatenate
from sklearn.metrics import roc_curve, auc

"""Methods for computing performance statistics."""

[docs]def pAUC(y_true, y_score, fpr_cutoff):
    """ Calculate partial Area Under ROC (pAUC).

    Computes a pAUC value given a specified false positive rate (FPR) 
    cutoff. It is important to note that the exact pAUC cannot be computed.
    The accuracy of the calculation depends on the resolution of data 
    points produced by an intermediary ROC curve. The FPR data point 
    closest to and greater than the cutoff specified will be used for 
    interpolation to determine the pAUC at the specified FPR cutoff. For these
    FPR values, the highest associated TPR values are used.

    Args:
        y_true: Array-like of true binary class labels in range {0, 1} or 
            {-1, 1} corresponding to y_score. The larger value represents 
            the positive class.

        y_score: Array-like of target scores with higher scores indicating
            more confidence in the positive class.

        fpr_cutoff: A float specifying the FPR cutoff to use in computing 
            the pAUC. Must be in the interval (0,1).

    Returns:
        A float representing the pAUC value.

    Raises:
        AssertionError: The FPR cutoff is not in the interval (0,1)
    """
    error_msg = "FPR cutoff must be in (0,1)"
    assert fpr_cutoff > 0.0 and fpr_cutoff < 1.0
    fpr, tpr, trash = roc_curve(y_true, y_score, drop_intermediate=False)
    index_low = len([1 for i in fpr if i < fpr_cutoff])-1
    index_high = index_low + 1
    #Get interpolated TPR value from FPR cutoff
    if index_low == -1:  #No ROC data points lower than cutoff
        x0 = fpr[0]
        try:
            x1 = min([xv for (c,xv) in zip([x>x0 for x in fpr],fpr) if c])
        except ValueError:
            x1 = x0
        y0 = max([yv for (c,xv,yv) in zip([x==x0 for x in  fpr],fpr,tpr) if c])
        y1 = max([yv for (c,xv,yv) in zip([x==x1 for x in  fpr],fpr,tpr) if c])
        #Apply line derived from two closest points from FPR cutoff
        tpr_cutoff = fpr_cutoff*((y1-y0)/(x1-x0)) + ((x1*y0-x0*y1)/(x1-x0))
        #Segment full ROC to get partial ROC
        fpr = [0.0] + [fpr_cutoff]
        tpr = [0.0] + [tpr_cutoff]
    elif index_high == len(fpr):  #No ROC data points higher than cutoff
        try:
            x0 = max([xv for (c,xv) in zip([x<x1 for x in fpr],fpr) if c])
        except ValueError:
            x0 = x1
        x1 = fpr[index_high-1]
        y0 = max([yv for (c,xv,yv) in zip([x==x0 for x in  fpr],fpr,tpr) if c])
        y1 = max([yv for (c,xv,yv) in zip([x==x1 for x in  fpr],fpr,tpr) if c])
        #Apply line derived from two closest points from FPR cutoff
        tpr_cutoff = fpr_cutoff*((y1-y0)/(x1-x0)) + ((x1*y0-x0*y1)/(x1-x0))
        #Segment full ROC to get partial ROC
        fpr = concatenate((fpr,[fpr_cutoff]), axis=0)
        tpr = concatenate((tpr, [tpr_cutoff]), axis=0)
    else:
        x0 = fpr[index_low]
        x1 = fpr[index_high]
        y0 = max([yv for (c,xv,yv) in zip([x==x0 for x in  fpr],fpr,tpr) if c])
        y1 = max([yv for (c,xv,yv) in zip([x==x1 for x in  fpr],fpr,tpr) if c])
        #Apply line derived from two closest points from FPR cutoff
        tpr_cutoff = fpr_cutoff*((y1-y0)/(x1-x0)) + ((x1*y0-x0*y1)/(x1-x0))
        #Segment full ROC to get partial ROC
        fpr = concatenate((fpr[:index_high], [fpr_cutoff]), axis=0)
        tpr = concatenate((tpr[:index_high], [tpr_cutoff]), axis=0)
    return auc(fpr,tpr)/fpr_cutoff