forked from faridcher/ml-course
-
Notifications
You must be signed in to change notification settings - Fork 0
/
naive_bayes_old.R
59 lines (31 loc) · 924 Bytes
/
naive_bayes_old.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Libraries
library(naivebayes)
library(dplyr)
library(ggplot2)
library(psych)
# Data
data <- read.csv(file.choose(), header = T)
# Study data structure
str(data)
# Cross tabulation
xtabs(~admit+rank, data = data)
# Convert the admit and rank values from integer to factors
data$rank <- as.factor(data$rank)
data$admit <- as.factor(data$admit)
# Visualization
pairs.panels(data[-1])
# Data Partition
# Set a random seed
set.seed(1234)
# Sample the avialble data in two sets for training and testing
# 2 -> split into 2 parts
# number of rows = nrow(data) -> whatever we have in data
# With replacement, replace = T
# prob = c(0.8, 0.2), 80% for training data and 20% for testing
ind <- sample(2, nrow(data), replace = T, prob = c(0.8, 0.2))
train <- data[ind==1,]
test <- data[ind==2,]
# Naive Bayes Model
# Train model using training data set
model <- naive_bayes(admit ~ ., data = train)
model