hsmeth1 <- function(X, y, initial="hat", alpha = 0.05) { # This S-Plus function computes estimates based on the procedure Method # 1 of Hadi and Simonoff (1993) and finds the corresponding set of # outliers. This code was written by Merrill Heddy. # # This code requires the Matrix library of S-Plus; before calling it, # enter the command # library(Matrix) # If you do not have access to the Matrix library, you will need to make a few # changes. First, create the following determinant function: # det <- function(M) Re(prod(eigen(M, only.values = T)$values)) # You will need to change one line in the hsmeth1 function: # ddet <- det(t(Xs) %*% Xs, logarithm = F)$modulus # to # ddet <- det(t(Xs) %*% Xs) # # Input parameters # y is an nx1 response vector # X is an nxp matrix of explantatory variables (no column of 1's assumed in input) # initial determines the diagnostic used to create the initial subset; choices # are "hat", "cook", or "resid" # alpha is the significance level for the test in step 3.a. of the Main Algorithm # # Output parameters # outliers: the set of outlier observations # regline: lm object of rejection-plus-least squares regression estimate # # Reference: A.S. Hadi and J.S. Simonoff, "Procedures for the Identification of # Multiple Outliers in Linear Models" (1993), Journal of the American # Statistical Association, 88, 1264-1272. # h <- hat(X) X <- cbind(1, X) n <- nrow(X) p <- ncol(X) beta <- solve(t(X) %*% X) %*% t(X) %*% y t1 <- rep(0, length = n) tt <- rep(0, length = n) if (initial=="hat") Ind <- order(h) else { e <- y - X %*% beta if (initial=="resid") Ind <- order(abs(e)) else { d <- (e * e * h)/((1 - h)^2) Ind <- order(d)}} medio <- floor((n + p - 1)/2) j <- p + 1 bdet<-0 while(bdet==0){ Jnd <- Ind[1:j] Xs <- X[Jnd, ] ddet<-det(t(Xs) %*% Xs,logarithm=F)$modulus if (abs(ddet<1.e-13)) j<-(j+1) else bdet<-1} # Beginning of while loop to find initial subset M while(j <= medio) { # beginning of while loop for finding subset M Jnd <- Ind[1:j] Xs <- X[Jnd, ] ys <- y[Jnd] beta <- solve(t(Xs) %*% Xs) %*% t(Xs) %*% ys e <- y - X %*% beta sind <- sqrt(t(e[Jnd]) %*% e[Jnd]/(length(Jnd) - p)) iXs <- solve(t(Xs) %*% Xs) h <- diag(X %*% iXs %*% t(X)) t1[Ind[1:j]] <- e[Ind[1:j]]/sqrt(1 - h[Ind[1:j]]) t1[Ind[(j+1):n]]<- e[Ind[(j + 1):n]]/sqrt(1 + h[Ind[(j + 1):n]]) Ind <- order(abs(t1)) j <- (j + 1) } # end of while loop for finding subset M rr <- c(0) while((rr == 0) & (j <= (n - 1))) { # The loop for The Main Algorithm Jnd <- Ind[1:j] Xs <- X[Jnd, ] ys <- y[Jnd] beta <- solve(t(Xs) %*% Xs) %*% t(Xs) %*% ys e <- y - X %*% beta sind <- sqrt(t(e[Jnd]) %*% e[Jnd]/(length(Jnd) - p)) iXs <- solve(t(Xs) %*% Xs) for(i in (j + 1):n) { h[Ind[i]] <- X[Ind[i], ] %*% iXs %*% X[Ind[i], ] t1[Ind[i]] <- e[Ind[i]]/sqrt(1 + h[Ind[i]]) } for(i in 1:j) { h[Ind[i]] <- X[Ind[i], ] %*% iXs %*% X[Ind[i], ] t1[Ind[i]] <- e[Ind[i]]/sqrt(1 - h[Ind[i]]) } tt[Ind] <- t1[Ind]/sind Ind <- order(abs(tt)) u <- sort(abs(tt)) if(u[j + 1] > qt(1 - alpha/(2 * (j + 1)), j - p)) { rr <- 1 } else { j <- j + 1 } } # End of the loop for The Main Algorithm step K <- (1:n)[abs(tt) > qt(1 - alpha/(2 * (j + 1)), j - p)] y[K] <- NA X <- X[, 2:ncol(X)] out <- lm(y ~ X, na.action = na.omit) return(list(outliers = K, regline = out)) } # Auxiliary function; omit these lines if you do not have access to the Matrix # library, and create your own det() function if(exists("det", mode="function", where=1)) rm(det)# just in case library(Matrix) #-- defines 'det' det.default <- function(x,...) det(as.Matrix(x),...)