if (! require('SnowballC')) install.packages('SnowballC') library(SnowballC) readBookFromFile<-function(path, range=NULL, stem=FALSE) { lines<-readLines(path) if (is.vector(range)) lines<-lines[range] lines<-tolower(lines) lines<-lines[lines!=""] words<-strsplit(lines, '\\W+') words<-unlist(words) words<-words[words!=""] if (stem) words <- unlist(wordStem(words, language="english") return(words) } # example usage: # md<-readBookFromFile('/moby-dick.txt', stem=TRUE) # md<-readBookFromFile('/moby-dick.txt', range=1:100) # ts<-readBookFromFile('/tom-sawyer.txt', range=1000:2000,stem=TRUE)