### Nonlinear Least Squares Fitting of Selection Curves, including the Peak-Logistic
### J.A.Smith & M.D. Taylor 2014 (The University of NSW)
### Version 1.0, 08.01.2014; made using R console v3.0.2
### Visit www.famer.unsw.edu.au/software.html for updates

##Housekeeping
rm(list=ls(all=TRUE))
graphics.off()
setwd("C:/R_scripts/PeakLogistic") #for example
#install.packages("MASS")
library(MASS)

##LOAD data
mydata <- read.table("Example_Data.csv",header=T,sep=",") #2 columns with headers; example data is mulloway (see Smith & Taylor 2014)
x <- mydata[,1] #Size classes
y_raw <- mydata[,2] #Relative number (proportion, etc.) of fish caught in each size class
y <- (y_raw/sum(y_raw))/max((y_raw/sum(y_raw))) #Standardised selection (range from 0-1)

##ENTER data for peak-logistic (these are parameter starting values for the algorithm; these must be close to the final values; change these if error during nls())
plot(x,y) #evaluate plot to generate approximate staring values for peak-logistic parameters
dL_1 = 200  #delta length 1 (length range between 1% and 99% increasing catchability 1)
dL_2 = 300   #delta length 2 (length range between 1% and 99% decreasing catchability 2)
L50_1 = 500   #length at 50% selection 1
L50_2 = 700   #length at 50% selection 2
a_1 = 1  #maximum relative selection 1
a_2 = 0.5  #maximum relative selection 2
upperlim <- c(500, 600, 600, 900, 2, 2) #Upper bounds for parameters - use these to avoid unrealistic curve shapes
lowerlim <- c(100, 100, 300, 600, 0.3, 0.3) #Lower bounds for parameters - use these to avoid unrealistic curve shapes

##PLOT best guess for peak-logistic; use to generate better parameter starting values
yy <- a_1/(1+exp(-(log(99^2)/dL_1)*(x-L50_1))) - a_2/(1+exp(-(log(99^2)/dL_2)*(x-L50_2)))
lines(x,yy)

##FIT 5-parameter peak-logistic (a_1 = 1)
nlfit1 <- nls(y ~ 1/(1+exp(-(log(99^2)/dL_1)*(x-L50_1))) - a_2/(1+exp(-(log(99^2)/dL_2)*(x-L50_2))),
            data = mydata, start = list (dL_1=dL_1,dL_2=dL_2,L50_1=L50_1,L50_2=L50_2,a_2=1),
              algorithm = "port", lower = lowerlim, upper = upperlim)
summary(nlfit1) #Prints the parameter estimates
#summary(nlfit1,correlation=TRUE) #use to evaluate correlations between parameters
n <- nrow(mydata)
P1 <- coef(nlfit1)
SSR1 = sum(residuals(nlfit1)^2); SSR1 #Sum of the squared residuals
MSE1 = SSR1/(n-5); MSE1 #The mean square error - use this to compare different curves
Rsq1 = 1-(sum(residuals(nlfit1)^2)/sum((y-mean(y))^2)); Rsq1 #R-squared value; be aware of the limits of Rsq for non-linear models (Kvalseth 1985, "Cautionary note about R2". Amer. Statist. 39)
if ((0.5*P1[1]+P1[3]) > (0.5*P1[2]+P1[4])) {
  print("WARNING - logistic curves overlapping unrealistically: change upper and lower limits to constrain algorithm")
} #a rule of thumb to avoid unrealistic solutions: L50_1+0.5*dL_1 < L50_2+0.5*dL_2

##FIT 6-parameter peak-logistic (a_1 unfixed)
nlfit2 <- nls(y ~ a_1/(1+exp(-(log(99^2)/dL_1)*(x-L50_1))) - a_2/(1+exp(-(log(99^2)/dL_2)*(x-L50_2))),
             data = mydata,
             start = list (dL_1=dL_1,dL_2=dL_2,L50_1=L50_1,L50_2=L50_2,a_1=a_1,a_2=a_2),
              algorithm = "port", lower = lowerlim, upper = upperlim)
summary(nlfit2) #Prints the parameter estimates
P2 <- coef(nlfit2)
SSR2 = sum(residuals(nlfit2)^2); SSR2 #Sum of the squared residuals
MSE2 = SSR2/(n-6); MSE2 #The mean square error - use this to compare different curves
Rsq2 = 1-(sum(residuals(nlfit2)^2)/sum((y-mean(y))^2)); Rsq2
if ((0.5*P2[1]+P2[3]) > (0.5*P2[2]+P2[4])) {
  print("WARNING - logistic curves overlapping unrealistically: change upper and lower limits to constrain algorithm")
} #a rule of thumb to avoid unrealistic solutions: L50_1+0.5*dL_1 < L50_2+0.5*dL_2

##PLOT the two peak-logistic functions
plot(x,y)
names <- c("peaklogistic 5par","peaklogistic 6par","lognormal","assymetric normal","binormal")
legend(min(x),max(y),names,lty=c(1,1,1,1,1),lwd=c(2,2,2,2,2),col=c("black","blue","green","red","purple"),cex=0.8)
xx <- seq(min(x),max(x),by=((max(x)-min(x))/100))
y1 <- 1/(1+exp(-(log(99^2)/P1[1])*(xx-P1[3]))) - P1[5]/(1+exp(-(log(99^2)/P1[2])*(xx-P1[4])))
lines(xx,y1) #5 parameter peak logistic
y2 <- P2[5]/(1+exp(-(log(99^2)/P2[1])*(xx-P2[3]))) - P2[6]/(1+exp(-(log(99^2)/P2[2])*(xx-P2[4])))
lines(xx,y2,col="blue") #6 parameter peak logistic

##FIT log-normal for comparison
xl <- fitdistr(x, "lognormal") #estimate starting values for nls
mu1 <- as.numeric(xl$estimate[1])
sigma1 <- as.numeric(xl$estimate[2])
sf <- 100; #a scaling factor used to normalise the lognormal y axis
nlfit3 <- nls(y ~ (sf/(x*(sqrt(2*pi*(sig1^2)))))*(exp(-1*((log(x)-mu1)^2)/(2*sig1^2))), 
              data = mydata, start = list (mu1=mu1,sig1=sigma1,sf=sf), algorithm = "port")
summary(nlfit3)
P3 <- coef(nlfit3)
SSR3 = sum(residuals(nlfit3)^2); SSR3 #Sum of the squared residuals
MSE3 = SSR3/(n-3); MSE3 #The mean square error - use this to compare different curves
Rsq3 = 1-(sum(residuals(nlfit3)^2)/sum((y-mean(y))^2)); Rsq3 
y3 <- (P3[3]/(xx*(sqrt(2*pi*(P3[2]^2)))))*(exp(-1*((log(xx)-P3[1])^2)/(2*P3[2]^2)))
lines(xx,y3,col="green")

##FIT two-sided normal for comparison
xn <- fitdistr(x, "normal") #estimate starting values for nls
mu2 <- as.numeric(xn$estimate[1])
sigma2 <- as.numeric(xn$estimate[2])
a_n <- 0 #the non-zero asymptote (between 0-1) for the RHS of the assymetric normal curve
lowerlim2 <- c(mu2/10, sigma2/10, sigma2/10, 0) #lower limits; specify that a_n must be > 0
upperlim2 <- c(mu2*10, sigma2*10, sigma2*10, 1)
nlfit4 <- nls(formula =  y ~ ifelse(test = x <= mu2, ye = exp(-0.5*((x-mu2)/sig2)^2), no = exp(-0.5*((x-mu2)/sig3)^2)*(1-a_n)+a_n),
              data = mydata, start = list (mu2=mu2,sig2=sigma2,sig3=sigma2, a_n=a_n),
              algorithm = "port", lower = lowerlim2, upper = upperlim2)
summary(nlfit4)
P4 <- coef(nlfit4)
SSR4 = sum(residuals(nlfit4)^2); SSR4 #Sum of the squared residuals
MSE4 = SSR4/(n-4); MSE4 #The mean square error - use this to compare different curves
Rsq4 = 1-(sum(residuals(nlfit4)^2)/sum((y-mean(y))^2)); Rsq4
y4 <- ifelse(test = xx <= P4[1], ye = exp(-0.5*((xx-P4[1])/P4[2])^2), no = exp(-0.5*((xx-P4[1])/P4[3])^2)*(1-P4[4])+P4[4])
lines(xx,y4,col="red")
##IF FITTING ERROR, plot best guess for two-sided normal to generate better starting estimates
yyy <- ifelse(test = x <= mu2, ye = exp(-0.5*((x-mu2)/sigma2)^2), no = exp(-0.5*((x-mu2)/sigma2)^2)*(1-a_n)+a_n)
plot(x,y)
lines(x,yyy)

##ENTER data for binormal
plot(x,y) #evaluate plot to generate approximate staring values for binormal parameters
mu3 = 600  #length at maximum catchability in first normal curve
mu4 = 900   #length at maximum catchability in second normal curve
sigma4 = 70   #width of first normal curve (standard deviation)
sigma5 = 70   #width of second normal curve (standard deviation)
delta = 1  #maximum height of first normal curve (if 1, scales to maximum of 1)
w = 0.5  #maximum relative height of second normal curve
lowerlim3 <- c(mu3/10, mu4/10, sigma4/10, sigma5/10, 0, 0) #lower limits; all parameters must be > 0
upperlim3 <- c(mu3*10, mu4*10, sigma4*10, sigma5*10, 2.5, 1)
##Plot best guess for binormal to generate better starting estimates
yyyy <- (1/delta)*(exp(-1*((x-mu3)^2)/(2*sigma4^2))+w*exp(-1*((x-mu4)^2)/(2*sigma5^2)))
plot(x,y)
lines(x,yyyy)
##FIT binormal for comparison
nlfit5 <- nls(formula = y ~ (1/del)*(exp(-1*((x-mu3)^2)/(2*sig4^2))+w*exp(-1*((x-mu4)^2)/(2*sig5^2))),
              data = mydata, start = list (mu3=mu3,mu4=mu4,sig4=sigma4,sig5=sigma5,del=delta,w=w),
              algorithm = "port", lower = lowerlim3, upper = upperlim3)
summary(nlfit5)
P5 <- coef(nlfit5)
SSR5 = sum(residuals(nlfit5)^2); SSR5 #Sum of the squared residuals
MSE5 = SSR5/(n-6); MSE5 #The mean square error - use this to compare different curves
Rsq5 = 1-(sum(residuals(nlfit5)^2)/sum((y-mean(y))^2)); Rsq5
y5 <- (1/P5[5])*(exp(-1*((xx-P5[1])^2)/(2*P5[3]^2))+P5[6]*exp(-1*((xx-P5[2])^2)/(2*P5[4]^2)))
lines(xx,y5,col="purple") 

##KEY RESULTS; lowest MSE is usually best
SSR <- c(SSR1,SSR2,SSR3,SSR4,SSR5)
MSE <- c(MSE1,MSE2,MSE3,MSE4,MSE5)
Results <- cbind(names,SSR,MSE); Results