If you are citizen of an European Union member nation, you may not use this service unless you are at least 16 years old.
You already know Dokkio is an AI-powered assistant to organize & manage your digital files & messages. Very soon, Dokkio will support Outlook as well as One Drive. Check it out today!

cs229_homework_2_3

Page history last edited by Stephen O'Connell 13 years, 11 months ago

# R Solution to Homework 2, problem 3

# Author: Stephen OConnell

###############################################################################

## rm(list=ls())

## R PACKAGE WITH NAIVE BAYES

require(e1071)

## SETUP THE WORKING DIRECTORY

setwd("/Users/oconste/Downloads/MachineLearning/materials/HW_2_Data")

getwd()

## THE naiveBayes FUNCTION NEEDS THE DATA IN A SPECIFIC FORMAT

get_frame <- function(input, num_features=1448) {

## READ THE TRAINING DATA IN

in_line <- readLines(input)

## CLEAN UP THE FIRST LINE, REMOVE THE ']','[', and ' '

Y_line = gsub(' ', '', in_line[1])

Y_line = gsub('\\[', '', Y_line)

Y_line = gsub('\\]', '', Y_line)

## SPLIT THE THIS BY THE ',' AND CONVERT THE RESULT TO NUMERIC

Y <- as.numeric(strsplit(Y_line, ',')[[1]])

num_samp <- length(Y)

Y <- matrix(Y, nrow=num_samp, ncol=1)

Y <- as.data.frame(Y)

names(Y) <- 'spam_ind'

Y$spam_ind[Y$spam_ind == 0] <- 'spam'

Y$spam_ind[Y$spam_ind == 1] <- 'non_spam'

M_line = gsub(' ', '', in_line[2])

M_line = gsub('\\[', '', M_line)

M_line = gsub('\\]', '', M_line)

M <- as.numeric(strsplit(M_line, ',')[[1]])

M <- matrix(M, nrow=num_samp, ncol=num_features, byrow=TRUE)

M <- as.data.frame(M)

out_frame <- Y

out_frame <- cbind(out_frame, M)

out_frame$spam_ind <- as.factor(out_frame$spam_ind)

return(out_frame)

}

## GET THE TEST DATA ( OUTPUT OF MIKE B. python program)

test_spam <- get_frame("MATRIX.TEST.csv")

## GET THE TRAINING DATA SET ( OUTPUT OF MIKE B. python program)

spam_train <- get_frame("MATRIX.TRAIN.1400.csv")

model <- naiveBayes(spam_ind ~ ., data = spam_train)

pred <- predict(model, test_spam[,-1])

### CHECK THE RESULTS

table(pred, test_spam$spam_ind)

answer <- pred

solution <- test_spam$spam_ind

error <- 0

for (i in 1:length(test_spam$spam_ind)) {

if (answer[i] != solution[i]) {

error <- error + 1

}

## PRINT THE ERROR RATE

error/length(test_spam$spam_ind)

********************************* RESULTS WITH 1400 TRAINING ****************************

> ### CHECK THE RESULTS

> table(pred, test_spam$spam_ind)

pred non_spam spam

non_spam 353 50

spam 47 350

> ## PRINT THE ERROR RATE

> error/length(test_spam$spam_ind)

[1] 0.12125

Comments (0)

You don't have permission to comment on this page.

Loading…

This is your Sidebar, which you can edit like any other page in your workspace.

This Sidebar appears everywhere on your workspace. Add to it whatever you like -- a navigation section, a link to your favorite web sites, or anything else.

New games for PC

Download or play onlineСкачать Мини Игры Нарды Играть Backgammon Online Super Mario Bros Game

Loading…

cs229_homework_2_3

cs229_homework_2_3

Page Tools

Insert links

Comments (0)

Navigator

SideBar

New games for PC

Recent Activity