In [ ]:
#  Module: Anomaly detection
#  Version 1.1
#  Topic : Statistical approach
#  Example source:
#####################################################################
#  For support or questions, contact Sri Krishnamurthy at
#  sri@quantuniversity.com
#  Copyright 2016 QuantUniversity LLC.
#####################################################################

### R packages
#install.packages("outliers")
#install.packages("ggplot2")
library(outliers)
library(ggplot2)
In [ ]:
### Chisq test
set.seed(123)
data <- rnorm(10,5,1)
chisq.flag <- function(x) {
  outliers <- NULL
  test <- x
  chisq.result <- chisq.out.test(test)
  pv <- chisq.result$p.value
  while(pv < 0.05) {
    outliers <- c(outliers,as.numeric(strsplit(chisq.result$alternative," ")[[1]][3]))
    test <- x[!x %in% outliers]
    chisq.result <- chisq.out.test(test)
    pv <- chisq.result$p.value
  }
  return(data.frame(data=x,Outlier=(x %in% outliers)))
}
chisq.flag(data)
In [ ]:
### Grubbs' test
data <- c(151,131,100,95,87,84,82,75,73,72,76,72,67,64,62,61,56)
grubbs.flag <- function(x) {
  outliers <- NULL
  test <- x
  grubbs.result <- grubbs.test(test,type=10)
  pv <- grubbs.result$p.value
  while(pv < 0.05) {
    outliers <- c(outliers,as.numeric(strsplit(grubbs.result$alternative," ")[[1]][3]))
    test <- x[!x %in% outliers]
    grubbs.result <- grubbs.test(test)
    pv <- grubbs.result$p.value
  }
  return(data.frame(X=x,Outlier=(x %in% outliers)))
}

grubbs.flag(data)
ggplot(grubbs.flag(data),aes(x=data,color=Outlier,fill=Outlier))+geom_histogram()
In [ ]:
### Scores
x=data.frame(data)
x
In [ ]:
scores <- scores(x,type="z", prob=0.95) # beyond 95th %ile based on z-scores
scores
In [ ]:
scores2 <- scores(x,type="z",prob=0.5)
scores2
In [ ]:
scores3 <- score.iqr <- scores(x,type="iqr") 
scores3
In [ ]: