This data set represents eight chemical measurements on different specimen of olive oil produced in various regions in Italy (northern Apulia, southern Apulia, Calabria, Sicily, inland Sardinia and coast Sardinia, eastern and western Liguria, Umbria) and further classifiable into three macro-areas: Centre-North, South, Sardinia.
The dataset contains 572 rows, each corresponding to a different specimen of olive oil, and 10 columns. The first and the second column correspond to the macro-area and the region of origin of the olive oils respectively; here, the term 'region' refers to a geographical area and only partially to administrative borders. Columns 3-10 represent the following eight chemical measurements on the acid components for the oil specimens: palmitic, palmitoleic, stearic, oleic, linoleic, linolenic, arachidic, eicosenoic.
library(FlexDir)
data(oliveoil)
attach(oliveoil)
str(oliveoil)
## 'data.frame': 572 obs. of 10 variables:
## $ macro.area : Factor w/ 3 levels "South","Sardinia",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ region : Factor w/ 9 levels "Apulia.north",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ palmitic : int 1075 1088 911 966 1051 911 922 1100 1082 1037 ...
## $ palmitoleic: int 75 73 54 57 67 49 66 61 60 55 ...
## $ stearic : int 226 224 246 240 259 268 264 235 239 213 ...
## $ oleic : int 7823 7709 8113 7952 7771 7924 7990 7728 7745 7944 ...
## $ linoleic : int 672 781 549 619 672 678 618 734 709 633 ...
## $ linolenic : int 36 31 31 50 50 51 49 39 46 26 ...
## $ arachidic : int 60 61 63 78 80 70 56 64 83 52 ...
## $ eicosenoic : int 29 29 29 35 46 44 29 35 33 30 ...
table(macro.area)
## macro.area
## South Sardinia Centre.North
## 323 98 151
Let's apply LDA to classify 3 macro-areas using 8 chemical measurements.
area=macro.area
levels(area)=c("s","a","c")
cols=rep("red",length(area)) #red: South
cols[which(area=="a")]="blue" #blue: Sardinia
cols[which(area=="c")]="darkgreen" #darkgreen: Centre.North
library(MASS) #lda function is in MASS library
fit.lda<-lda(as.matrix(oliveoil[,3:10]),area)
ld.hat<-predict(fit.lda,as.matrix(oliveoil[,3:10]),dimen=2)$x
eqscplot(ld.hat,type="n",xlab="LD1", ylab="LD2",tol=0.2, las=1)
text(ld.hat,labels=area,col=cols)
fit.lda2<-lda(ld.hat,area)
x1 <- seq(-8,6,0.2)
x2 <- seq(-6,6,0.2)
Xcon <- matrix(c(rep(x1,length(x2)), rep(x2, rep(length(x1), length(x2)))),,2)
post <- predict(fit.lda2,Xcon)$post
post1 <- post[,c("s","a")] %*% c(1,1)
post2 <- post[,c("s","c")] %*% c(1,1)
post3 <- post[,c("a","c")] %*% c(1,1)
#Class boundary separates South and Sardinia
contour(x1, x2, matrix(post1,length(x1),length(x2)),
levels=0.5, add=T, lty=2,method="simple",lwd=2,drawlabels=F)
#Class boundary separates South and Centre North
contour(x1, x2, matrix(post2,length(x1),length(x2)),
levels=0.5, add=T, lty=2,method="simple",lwd=2,drawlabels=F)
#Class boundary separates Sardinia and Centre North
contour(x1, x2, matrix(post3,length(x1),length(x2)),
levels=0.5, add=T, lty=2,method="simple",lwd=2,drawlabels=F)
detach(oliveoil)