Description of Olive Oil Data

This data set represents eight chemical measurements on different specimen of olive oil produced in various regions in Italy (northern Apulia, southern Apulia, Calabria, Sicily, inland Sardinia and coast Sardinia, eastern and western Liguria, Umbria) and further classifiable into three macro-areas: Centre-North, South, Sardinia.

The dataset contains 572 rows, each corresponding to a different specimen of olive oil, and 10 columns. The first and the second column correspond to the macro-area and the region of origin of the olive oils respectively; here, the term 'region' refers to a geographical area and only partially to administrative borders. Columns 3-10 represent the following eight chemical measurements on the acid components for the oil specimens: palmitic, palmitoleic, stearic, oleic, linoleic, linolenic, arachidic, eicosenoic.

library(FlexDir)
data(oliveoil)
attach(oliveoil)
str(oliveoil)
## 'data.frame':    572 obs. of  10 variables:
##  $ macro.area : Factor w/ 3 levels "South","Sardinia",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ region     : Factor w/ 9 levels "Apulia.north",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ palmitic   : int  1075 1088 911 966 1051 911 922 1100 1082 1037 ...
##  $ palmitoleic: int  75 73 54 57 67 49 66 61 60 55 ...
##  $ stearic    : int  226 224 246 240 259 268 264 235 239 213 ...
##  $ oleic      : int  7823 7709 8113 7952 7771 7924 7990 7728 7745 7944 ...
##  $ linoleic   : int  672 781 549 619 672 678 618 734 709 633 ...
##  $ linolenic  : int  36 31 31 50 50 51 49 39 46 26 ...
##  $ arachidic  : int  60 61 63 78 80 70 56 64 83 52 ...
##  $ eicosenoic : int  29 29 29 35 46 44 29 35 33 30 ...
table(macro.area)
## macro.area
##        South     Sardinia Centre.North 
##          323           98          151

Linear Discriminant Analysis

Let's apply LDA to classify 3 macro-areas using 8 chemical measurements.

area=macro.area
levels(area)=c("s","a","c")
cols=rep("red",length(area))       #red: South
cols[which(area=="a")]="blue"      #blue: Sardinia
cols[which(area=="c")]="darkgreen" #darkgreen: Centre.North

library(MASS) #lda function is in MASS library
fit.lda<-lda(as.matrix(oliveoil[,3:10]),area)
ld.hat<-predict(fit.lda,as.matrix(oliveoil[,3:10]),dimen=2)$x

eqscplot(ld.hat,type="n",xlab="LD1", ylab="LD2",tol=0.2, las=1)
text(ld.hat,labels=area,col=cols)

fit.lda2<-lda(ld.hat,area)
x1    <- seq(-8,6,0.2)
x2    <- seq(-6,6,0.2)
Xcon  <- matrix(c(rep(x1,length(x2)), rep(x2, rep(length(x1), length(x2)))),,2)
post  <- predict(fit.lda2,Xcon)$post
post1 <- post[,c("s","a")] %*% c(1,1)
post2 <- post[,c("s","c")] %*% c(1,1)
post3 <- post[,c("a","c")] %*% c(1,1)
#Class boundary separates South and Sardinia
contour(x1, x2, matrix(post1,length(x1),length(x2)),
        levels=0.5, add=T, lty=2,method="simple",lwd=2,drawlabels=F)
#Class boundary separates South and Centre North
contour(x1, x2, matrix(post2,length(x1),length(x2)),
        levels=0.5, add=T, lty=2,method="simple",lwd=2,drawlabels=F)
#Class boundary separates Sardinia and Centre North
contour(x1, x2, matrix(post3,length(x1),length(x2)),
        levels=0.5, add=T, lty=2,method="simple",lwd=2,drawlabels=F)

detach(oliveoil)