-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.R
124 lines (101 loc) · 5.13 KB
/
model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#**********************SVM***********************
library(gdata)
library(e1071)
library(caret)
doSvmLearning <- function(first, last, testCount) {
set.seed(7)
allData <- read.csv("so-data.csv")
allData$Label <- ifelse(allData$Label==1, " YES ", " NO ")
allData$Label <- factor(allData$Label)
test <- sample(first:last, testCount)
testData <- allData[test, ]
trainData <- allData[-test, ]
modFit <- svm(Label~AnswerCount+SSVC+ViewCount+SumAnswersScores+HasAcceptedAnswer+AvgAnswerersReputation, data=trainData, kernel = "polynomial",
method="class", gamma=0.03125, cost=262144)# According to the output of tune.svm that is commented below
pre <-predict(modFit, newdata = testData, type="class")
confMatrix <- confusionMatrix(pre, testData$Label, positive = " YES ")
confMatrix
}
#tmodel <- tune.svm(Label~AnswerCount+SSVC+ViewCount+SumAnswersScores+HasAcceptedAnswer+AvgAnswerersReputation, data=trainData,
# gamma=2^(-15:-1), cost=2^(0:30), tunecontrol = tune.control(cross=2460))# if you want to run this, at first set trainData
#*************************Neural Network*********************
library(gdata)
library(e1071)
library(nnet)
library(caret)
doNNet <- function(first, last, testCount) {
set.seed(7)
allData <- read.csv("so-data.csv")
allData$Label <- ifelse(allData$Label==1, " YES ", " NO ")
allData$Label <- factor(allData$Label)
test <- sample(first:last, testCount)
testData <- allData[test, ]
trainData <- allData[-test, ]
fitControl <- trainControl(method = "LOOCV",
classProbs = TRUE,
summaryFunction = twoClassSummary)
nnetGrid <- expand.grid(size = seq(from = 1, to = 10, by = 1), decay = seq(from = 0.1, to = 0.5, by = 0.1))
modFit <- nnet(Label~SSVC+ViewCount+SumAnswersScores+HasAcceptedAnswer, data=trainData, trControl = fitControl, tuneGrid = nnetGrid,
size=66, maxit = 4000, method="class") # According to the output of tune.nnet that is commented below
pre <-predict(modFit, newdata = testData, type="class")
pre <- factor(pre)
confMatrix <- confusionMatrix(pre, testData$Label, positive = " YES ")
confMatrix
}
#tmodel = tune.nnet(Label~SSVC+ViewCount+SumAnswersScores+HasAcceptedAnswer,
# trControl = fitControl, tuneGrid = nnetGrid, data = trainData, maxit = 4000, size = 1:100)# if you want to run this, at first set trainData
#****************************Decision Tree***************
library(gdata)
library(rpart)
library(rpart.plot)
library(caret)
library(RColorBrewer)
library(rattle)
doDTreeLearning <- function(first, last, testCount) {
set.seed(7)
allData <- read.csv('so-data.csv')
allData$Label <- ifelse(allData$Label==1, " YES ", " NO ")
allData$Label <- factor(allData$Label)
test <- sample(first:last, testCount)
testData <- allData[test, ]
trainData <- allData[-test, ]
fitControl <- rpart.control(cp = 0.012, xval = 2460)# cp has been set according to plotcp(cp = 0.005) that is commented below
modFit <- rpart(Label~AnswerCount+SSVC+ViewCount+SumAnswersScores+HasAcceptedAnswer, data=trainData, control = fitControl, method="class")
rpart.plot(modFit)
pre <-predict(modFit, newdata = testData, type="class")
confMatrix <- confusionMatrix(pre, testData$Label, positive = " YES ")
confMatrix
}
#plotcp(modFit)# if you want to run this, at first set modFit
#***********************Sample Call*******************
doDTreeLearning(1, 3075, 615)
#***********************Feature Selection****************
set.seed(7)
allData <- read.csv('so-data.csv')
allData$Label <- ifelse(allData$Label==1, "needing", "notneeding")
allData$Label <- factor(allData$Label)
test <- sample(first:last, testCount)
testData <- allData[test, ]
trainData <- allData[-test, ]
library(Hmisc)
library(caret)
ctrl <- rfeControl(functions = lmFuncs,
method = "repeatedcv",
repeats = 1,
verbose = FALSE)
lmProfile <- rfe(trainData[, c("AnswerCount", "SSVC", "Score", "SumAnswersScores", "ViewCount", "AvgCommentCount",
"AvgAnswerersReputation", "AskerReputation", "FavoriteCount", "CommentCount", "HasAcceptedAnswer")],
as.vector(trainData[, c("Label")]),
sizes = 5:7,
metric = "Accuracy",
rfeControl = ctrl, method = "rpart")
ctrl <- gafsControl(functions = caretGA)
obj <- gafs(trainData[, c("AnswerCount", "SSVC", "Score", "SumAnswersScores", "ViewCount", "AvgCommentCount",
"AvgAnswerersReputation", "AskerReputation", "FavoriteCount", "CommentCount", "HasAcceptedAnswer")],
as.vector(trainData[, c("Label")]),
gafsControl = ctrl, method = "rpart")
ctrl <- safsControl(functions = caretSA)
obj <- safs(trainData[, c("AnswerCount", "SSVC", "Score", "SumAnswersScores", "ViewCount", "AvgCommentCount",
"AvgAnswerersReputation", "AskerReputation", "FavoriteCount", "CommentCount", "HasAcceptedAnswer")],
as.vector(trainData[, c("Label")]),
safsControl = ctrl, method = "rpart")