E030

ROC曲线计算

 

#include "stdafx.h"

#include "orsciJWVCL.h"
#include "orsciVM.h"
using namespace orsci;
using namespace orsci::vmt;

#include "orsciSTAT.h"
using namespace dm;


int main()
{
cout << " orsci:Receiver Operating Characteristic curve (ROC)" << endl
<< " --- http://www.orsci.cn" << endl;
colint tag = "1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0";
coldouble score = "0.9 0.8 0.75 0.7 0.6 0.55 0.5 0.4 0.38 0.32 0.21 0.2 0.18 0.16 0.13 0.13 0.13 0.12 0.1 0.08";
if (tag.size() != score.size()) throw TException("[Error]tag.size() should equal score.size()!");
cout << "ROC data ..." << endl;
for (int k = 0; k < tag.size(); ++ k)
{
cout << "[ " << k << " ]\t" << tag(k) << "\t" << score(k) << endl;
}
coldouble FPR, TPR;
coldouble mThresholdList;
double mBestThresholdScore, mBestDistanceToLeftUp;
double mAUC; //ROC曲线下面的面积。
bool mFlag = dm::ROC(tag, score, FPR, TPR, mThresholdList, mBestThresholdScore, mBestDistanceToLeftUp, mAUC);

cout << "FPR = " << endl;
cout << FPR << endl;
cout << "TPR = " << endl;
cout << TPR << endl;

cout << "BestThresholdScore = " << mBestThresholdScore << " BestDistanceToLeftUp = " << mBestDistanceToLeftUp << endl;
cout << "AUC = " << mAUC << endl;
cout << endl;

cout << "距离(0,1)最近的点计算:";
vdouble basePoint = "0, 1";
vdouble curPoint;
curPoint.Resize(2);
for (int k = 0; k < FPR.size(); k ++)
{
curPoint(0) = FPR[k];
curPoint(1) = TPR[k];
cout << "[ " << k << " ] point(" << curPoint(0) << ", " << curPoint(1) << ")距离值为:" << vmt::dist_Euclid(curPoint, basePoint) << " 相应的阈值:" << mThresholdList[k] << endl;

}

cout << endl;
cout << "press any key to stop..." << endl;
char pp;
cin >> pp;

return 1;
}

输出

(一)运行结果

orsci:Receiver Operating Characteristic curve (ROC)
--- http://www.orsci.cn
ROC data ...
[ 0 ] 1 0.9
[ 1 ] 0 0.8
[ 2 ] 1 0.75
[ 3 ] 1 0.7
[ 4 ] 1 0.6
[ 5 ] 0 0.55
[ 6 ] 1 0.5
[ 7 ] 1 0.4
[ 8 ] 1 0.38
[ 9 ] 1 0.32
[ 10 ] 0 0.21
[ 11 ] 1 0.2
[ 12 ] 1 0.18
[ 13 ] 0 0.16
[ 14 ] 0 0.13
[ 15 ] 0 0.13
[ 16 ] 1 0.13
[ 17 ] 1 0.12
[ 18 ] 1 0.1
[ 19 ] 0 0.08
FPR =
0
0
0.142857
0.142857
0.142857
0.142857
0.285714
0.285714
0.285714
0.285714
0.285714
0.428571
0.428571
0.428571
0.571429
0.714286
0.857143
0.857143
1

TPR =
0
0.0769231
0.0769231
0.153846
0.230769
0.307692
0.307692
0.384615
0.461538
0.538462
0.615385
0.615385
0.692308
0.769231
0.769231
0.769231
0.923077
1
1

BestThresholdScore = 0.32 BestDistanceToLeftUp = 0.479126
AUC = 0.626374

距离(0,1)最近的点计算:[ 0 ] point(0, 0)距离值为:1 相应的阈值:1.9
[ 1 ] point(0, 0.0769231)距离值为:0.923077 相应的阈值:0.9
[ 2 ] point(0.142857, 0.0769231)距离值为:0.934066 相应的阈值:0.8
[ 3 ] point(0.142857, 0.153846)距离值为:0.858128 相应的阈值:0.75
[ 4 ] point(0.142857, 0.230769)距离值为:0.782384 相应的阈值:0.7
[ 5 ] point(0.142857, 0.307692)距离值为:0.706893 相应的阈值:0.6
[ 6 ] point(0.285714, 0.307692)距离值为:0.748948 相应的阈值:0.55
[ 7 ] point(0.285714, 0.384615)距离值为:0.678477 相应的阈值:0.5
[ 8 ] point(0.285714, 0.461538)距离值为:0.609568 相应的阈值:0.4
[ 9 ] point(0.285714, 0.538462)距离值为:0.542817 相应的阈值:0.38
[ 10 ] point(0.285714, 0.615385)距离值为:0.479126 相应的阈值:0.32
[ 11 ] point(0.428571, 0.615385)距离值为:0.575849 相应的阈值:0.21
[ 12 ] point(0.428571, 0.692308)距离值为:0.527587 相应的阈值:0.2
[ 13 ] point(0.428571, 0.769231)距离值为:0.486752 相应的阈值:0.18
[ 14 ] point(0.571429, 0.769231)距离值为:0.616267 相应的阈值:0.16
[ 15 ] point(0.714286, 0.769231)距离值为:0.750639 相应的阈值:0.13
[ 16 ] point(0.857143, 0.923077)距离值为:0.860588 相应的阈值:0.12
[ 17 ] point(0.857143, 1)距离值为:0.857143 相应的阈值:0.1
[ 18 ] point(1, 1)距离值为:1 相应的阈值:0.08

press any key to stop...

(二)ROC曲线

确定观察分类方法(分类模型)参数对性能影响;寻找最优配置参数;进行多个分类方法(分类模型)的对比。

(三)说明:

(1)ROC的应用较多,在无线电、医疗诊断、机器学习、数据分析和数据挖掘上都有较多应用。

(2)具体理论方法,请参看配套书籍。orsci提供ROC曲线计算支持。

(3)orsci包ROC曲线计算计算,可下载配套软件orsci应用。

书籍 姜维. 《数据分析与数据挖掘》、《数据分析与数据挖掘实践》、《文本分析与文本挖掘》
软件 orsci开发包(C++语言、Delphi语言和C语言)。