我有电子邮件数据集,我使用SVM来判断电子邮件是否是垃圾邮件
我将数据集划分为测试和训练,然后选择一组随机的500条记录来调整svm。我正在使用RBF内核。以下是原始数据
make,address,all,num3d,our,over,remove,internet,order,mail,receive,will,people,report,addresses,free,business,email,you,credit,your,font,num000,money,hp,hpl,george,num650,lab,labs,telnet,num857,data,num415,num85,technology,num1999,parts,pm,direct,cs,meeting,original,project,re,edu,table,conference,charSemicolon,charRoundbracket,charSquarebracket,charExclamation,charDollar,charHash,capitalAve,capitalLong,capitalTotal,type
0,0.64,0.64,0,0.32,0,0,0,0,0,0,0.64,0,0,0,0.32,0,1.29,1.93,0,0.96,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.778,0,0,3.756,61,278,spam
0.21,0.28,0.5,0,0.14,0.28,0.21,0.07,0,0.94,0.21,0.79,0.65,0.21,0.14,0.14,0.07,0.28,3.47,0,1.59,0,0.43,0.43,0,0,0,0,0,0,0,0,0,0,0,0,0.07,0,0,0,0,0,0,0,0,0,0,0,0,0.132,0,0.372,0.18,0.048,5.114,101,1028,spam
0.06,0,0.71,0,1.23,0.19,0.19,0.12,0.64,0.25,0.38,0.45,0.12,0,1.75,0.06,0.06,1.03,1.36,0.32,0.51,0,1.16,0.06,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.06,0,0,0.12,0,0.06,0.06,0,0,0.01,0.143,0,0.276,0.184,0.01,9.821,485,2259,spam
0,0,0,0,0.63,0,0.31,0.63,0.31,0.63,0.31,0.31,0.31,0,0,0.31,0,0,3.18,0,0.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.137,0,0.137,0,0,3.537,40,191,spam
0,0,0,0,0.63,0,0.31,0.63,0.31,0.63,0.31,0.31,0.31,0,0,0.31,0,0,3.18,0,0.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.135,0,0.135,0,0,3.537,40,191,spam
svmFit = tune.svm(type~., data = randomTrainSample,
gamma = 2^(0.000001:0.001), cost = 2^(10:100))
这个过程持续了很长时间,似乎没有结束。
我也用gamma = 2 ^( - 1:1)和cost = 2 ^(2:4)运行它。它工作正常但是使用上述值几乎没有结果。
我怀疑这是由于伽玛的价值和我提供的成本。有人可以建议吗?
答案 0 :(得分:3)
我使用tune.svm()
的一些经历也需要很长时间才能运行。它最终结束了,但仅在1小时左右。
我测试了你的命令(带有一些缺点),大约30分钟后就完成了。
这就是输出的样子:
parameters <- tune.svm(class~., data = train_set, gamma = 10^(-5:-1), cost = 10^(-3:1))
summary(parameters )
Parameter tuning of ‘svm’:
- sampling method: 10-fold cross validation
- best parameters:
gamma cost
0.1 1
- best performance: 0.1409453
- Detailed performance results:
gamma cost error dispersion
1 1e-05 0.1 0.2549098 0.010693238
2 1e-04 0.1 0.2548908 0.010689828
3 1e-03 0.1 0.2546062 0.010685683
4 1e-02 0.1 0.2397427 0.010388229
5 1e-01 0.1 0.1776163 0.014591070
6 1e-05 1.0 0.2549043 0.010691266
7 1e-03 1.0 0.2524830 0.010660262
8 1e-02 1.0 0.2262167 0.010391502
9 1e-01 1.0 0.1409453 0.009898745
10 1e-05 10.0 0.2548687 0.010690819
11 1e-04 10.0 0.2545997 0.010686525
12 1e-03 10.0 0.2403118 0.010394169
13 1e-02 10.0 0.1932509 0.009984875