# This is SPLUS code for fitting a nonlinear
# model to the mixture data from Myers (1994)
# Technometrics, 343-356. Maximum likelihood
# estimation is used.
# This file stored as myers.ssc
# First access the MASS library
library(MASS, first=T)
# Enter the data stored in the file
# myers.dat
# There are five numbers on each line in the
# following order:
# mixture: Identification code
# x1: percent nitroglycerine (NG)
# x2: percent triacetin (TA)
# x3: percent 2-nitrodiphenylamine (2N)
# y: specific gravity of the mixture
myers <- read.table("c:/courses/st511/snew/myers.dat",
col.names=c("mixture","x1","x2","x3","y"))
# Create a scatterplot matrix with smooth
# curves. Unix users should first use motif( )
# to open a graphics window
points.lines <- function(x, y){
points(x, y)
lines(loess.smooth(x, y, 0.90))}
par(din=c(7,7), pch=18, mkh=.15, cex=1.2, lwd=3)
pairs(myers[ ,-1], panel=points.lines)
# Use maximum likelihood estimation to model
# specific gravity of the mixture as the
# inverse of a linear combination of the
# percentages of NG, TA, and 2N plus a
# random error with a normal distribution.
# Initial values for coefficients are obtained
# from a regression of the inverse of the
# specific gravity of the mixture on a
# linear combination of the percentages of
# NG, TA, and 2N.
myers$d <- 1/myers$y;
myers.lm <- lm(d~x1+x2+x3-1,data=myers)
b <- as.matrix(coef(myers.lm),ncol=1)
b
# Use the sum of squared errors to get a
# starting value for the variance estimate
n <- length(myers$y)
ss <- var(myers$y-1/fitted(myers.lm))*
(n-1.)/(n-length(b))
ss
# Compute maximum likelihood estimates for
# a model where the conditional responses
# are independent and normally distributed
# with homogeneous variances. Use deriv3( )
# to create a function to specify the
# log-likelihood and compute first and
# second partial derivatives. You must
# access the MASS library to use deriv3( ).
# Note that S-PLUS stores the value of
# pi in an object called pi. Furthermore,
# ms( ) is a minimization function so we
# must give it the negative of the log-
# likelihood.
lmyers <- deriv3( ~ 0.50*(log(2*pi)+log(ss)+
((y-1/(b1*x1+b2*x2+b3*x3))^2)/ss),
c("b1", "b2", "b3", "ss"),
function(y, x1, x2, x3, b1, b2, b3, ss) NULL)
# List the contents of this function
lmyers
# We wil trace the iterative process that miinimizes
# the negative of the log-likelihood with the
# following function that provides the value
# of the negative of the log-likelihood, parameter
# estimates and the gradient.
tr.ms <- function(info, theta, grad, scale, flags, fit.pars)
{
cat(signif(info[3]),":",signif(theta),"\n",
":",signif(grad[1:length(theta)]),"\n")
invisible(list())
}
# Now maximize the likelihood
myers.ms <- ms( ~ lmyers(y, x1, x2, x3, b1, b2, b3, ss),
data = myers,
start = c(b1=b[1], b2=b[2], b3=b[3], ss=ss),
trace = tr.ms,
control=list(maxiter=50,tolerance=10^(-10),
rel.tolerance=10^(-10),miniscale=1000))
# Check the gradient
myers.ms$gradient
# Check residual plots. The scatter.smooth
# passes a smooth curve through the points
# on the residual plot
myers.ms$fitted <- (as.matrix(myers[ ,c("x1","x2","x3")])%*%
as.vector(myers.ms$parameters[c("b1","b2","b3")]))^(-1)
myers.ms$residuals <- myers$y-myers.ms$fitted
scatter.smooth(myers.ms$fitted,
myers.ms$residuals, span=1, degree=1,
xlab="Fitted values",
ylab="Residuals",
main="Residuals")
qqnorm(myers.ms$residuals)
qqline(myers.ms$residuals)
# Compute confidence intervals for parameters
# using standard large sample normal theory.
# We will have to create our own function.
# The intervals( ) function cannot be
# applied to objects created by ms( ).
conf.ms <- function(obj, level=0.95) {
b <- coef(obj)
thessian<-obj$hessian+t(obj$hessian)-diag(diag(obj$hessian))
vcov <- solve(thessian)
stderr <- sqrt(diag(vcov))
low <- b - qnorm(1 - (1 - level)/2)*stderr
high <- b + qnorm(1 - (1 - level)/2)*stderr
a <- cbind(b, stderr, low, high)
a
}
conf.ms(myers.ms)
# In this case, least squares estimation
# gives essentially the same estimates
myers$d <- 1/myers$y;
myers.lm <- lm(d~x1+x2+x3-1,data=myers)
b <- as.matrix(coef(myers.lm),ncol=1)
myers.nls <- nls(
formula = y ~ 1/(b1*x1+b2*x2+b3*x3),
data = myers,
start = c(b1=b[1], b2=b[2], b3=b[3]),
trace = T)
summary(myers.nls)
# The first case may be an outlier. Delete
# the first case and refit the model.
myers.1 <- myers[-1, ]
myers.1$d <- 1/myers.1$y;
myers.lm <- lm(d~x1+x2+x3-1,data=myers.1)
b <- as.matrix(coef(myers.lm),ncol=1)
b
n <- length(myers.1$y)
ss <- var(myers.1$y-1/fitted(myers.lm))*
(n-1.)/(n-length(b))
ss
myers.ms <- ms( ~ lmyers(y, x1, x2, x3, b1, b2, b3, ss),
data = myers.1,
start = c(b1=b[1], b2=b[2], b3=b[3], ss=ss),
trace = tr.ms,
control=list(maxiter=50,tolerance=10^(-10)))
# Check the gradient
myers.ms$gradient
# Check residual plots. The scatter.smooth
# passes a smooth curve through the points
# on the residual plot
myers.ms$fitted <- (as.matrix(myers.1[ ,c("x1","x2","x3")])%*%
as.vector(myers.ms$parameters[c("b1","b2","b3")]))^(-1)
myers.ms$residuals <- myers.1$y-myers.ms$fitted
scatter.smooth(myers.ms$fitted,
myers.ms$residuals, span=1, degree=1,
xlab="Fitted values",
ylab="Residuals",
main="Residuals")
qqnorm(myers.ms$residuals)
qqline(myers.ms$residuals)
# Compute confidence intervals for parameters
# using standard large sample normal theory.
# We will have to create our own function.
# The intervals( ) function cannot be
# applied to objects created by ms( ).
conf.ms <- function(obj, level=0.95) {
b <- coef(obj)
thessian<-obj$hessian+t(obj$hessian)-diag(diag(obj$hessian))
vcov <- solve(thessian)
stderr <- sqrt(diag(vcov))
low <- b - qnorm(1 - (1 - level)/2)*stderr
high <- b + qnorm(1 - (1 - level)/2)*stderr
a <- cbind(b, stderr, low, high)
a
}
conf.ms(myers.ms)