hs94 <- function(X, y, alpha = 0.05, method = "lms") { # This S-Plus function computes estimates based on the procedure Method # 2 of Hadi and Simonoff (1994) and finds the corresponding set of # outliers. This code is based on code written by Merrill Heddy. # # Input parameters # y is an nx1 response vector # X is an nxp matrix of explantatory variables (no column of 1's assumed in input) # alpha is the significance level for the test in step 3.a. of the Main Algorithm # method: "lms" if the initial clean data set is based on least median of # squares regression, "lts" if it is based on least trimmed squares # # Output parameters # outliers: the set of outlier observations # regline: lm object of rejection-plus-least squares regression estimate # # Reference: A.S. Hadi and J.S. Simonoff, "Improving the Estimation and Outlier # Identification Properties of the Least Median of Squares and Minimum # Volume Ellipsoid Estimators" (1994), Parisankhyan Samikkha, 1, 61-70. # n <- nrow(X) p <- ncol(X) + 1 hlms <- floor(n/2) + floor((p + 1)/2) if(method == "lms") { robres <- lmsreg(X, y)$resid } else { robres <- ltsreg(X, y)$residuals } Ind <- order(abs(robres)) t1 <- rep(0, n) tt <- rep(0, n) d <- rep(0, n) h <- rep(0, n) X <- cbind(1, X) j <- hlms rr <- c(0) while((rr == 0) & (j <= (n - 1))) { # The loop for The Main Algorithm Jnd <- Ind[1:j] Xs <- X[Jnd, ] ys <- y[Jnd] beta <- solve(t(Xs) %*% Xs) %*% t(Xs) %*% ys e <- y - X %*% beta sind <- sqrt(t(e[Jnd]) %*% e[Jnd]/(length(Jnd) - p)) iXs <- solve(t(Xs) %*% Xs) for(i in (j + 1):n) { h[Ind[i]] <- X[Ind[i], ] %*% iXs %*% X[Ind[i], ] t1[Ind[i]] <- e[Ind[i]]/sqrt(1 + h[Ind[i]]) } for(i in 1:j) { h[Ind[i]] <- X[Ind[i], ] %*% iXs %*% X[Ind[i], ] t1[Ind[i]] <- e[Ind[i]]/sqrt(1 - h[Ind[i]]) } tt[Ind] <- t1[Ind]/sind Ind <- order(abs(tt)) u <- sort(abs(tt)) if(u[j + 1] > qt(1 - alpha/(2 * (j + 1)), j - p)) { rr <- 1 } else { j <- j + 1 } } # End of the loop for The Main Algorithm step K <- (1:n)[abs(tt) > qt(1 - alpha/(2 * (j + 1)), j - p)] y[K] <- NA X <- X[, 2:ncol(X)] out <- lm(y ~ X, na.action = na.omit) return(list(outliers = K, regline = out)) }