######################## Exercise sheet 2 ########### Exercise 1: Moran's I and Geary's C library(fields) library(spdep) #read in the data nycov <- read.table("nycovariates.txt",header=TRUE) names(nycov) # average age is the response variable y<-nycov$averageage # locations coords<-cbind(nycov$xcoord,nycov$ycoord) # neighbourhood definition using voronoi tesselation tessel<-tri2nb(coords) n1<-nb2listw(tessel,style="B") # neighbourhood definition using k nearest neighbours # k=10; nearneigh10<-knearneigh(coords,k=10) n2k10<-nb2listw(knn2nb(nearneigh10)) # k=20; nearneigh20<-knearneigh(coords,k=20) n2k20<-nb2listw(knn2nb(nearneigh20)) # moran's I - exact test morantest1<-moran.test(y,n1,randomisation=FALSE) morantest2k10<-moran.test(y,n2k10,randomisation=FALSE) morantest2k20<-moran.test(y,n2k20,randomisation=FALSE) morantest1$p.value morantest2k10$p.value morantest2k20$p.value # moran's I - monte carlo test moranmc1<-moran.mc(y,n1,nsim=999) moranmc2k10<-moran.mc(y,n2k10,nsim=999) moranmc2k20<-moran.mc(y,n2k20,nsim=999) moranmc1$p.value moranmc2k10$p.value moranmc2k20$p.value # geary's C - monte carlo test gearymc1<-geary.mc(y,n1,nsim=999) gearymc2k10<-geary.mc(y,n2k10,nsim=999) gearymc2k20<-geary.mc(y,n2k20,nsim=999) gearymc1$p.value gearymc2k10$p.value gearymc2k20$p.value ########### Exercise 2: Test CSR of a point pattern library(spatstat) library(splancs) # load dataset southlancs data(southlancs) # extract lung cancer cases and plot them lung<-southlancs[southlancs[,3]==0,] loclung<-cbind(lung$x,lung$y) plot(loclung,xlab="eastings",ylab="northings",main="locations of lung cancer cases",pch="+") # define a ppp object which can be used by package spatstat lppp<-ppp(loclung[,1],loclung[,2],xrange=c(min(southlancs.bdy[,1]),max(southlancs.bdy[,1])),yrange=c(min(southlancs.bdy[,2]),max(southlancs.bdy[,2]))) # quadrat counting test for CSR quadrat<-quadrat.test(lppp,nx=5) quadrat ########### Exercise 3: Stone's Test for IBM endicott waste site library(DCluster) newyork<-read.table("newyork2.txt",header=TRUE) # internal standardization to obtain the expected number of cases ratio<-sum(newyork$ncases)/sum(newyork$npopulation) expected<-round(ratio*newyork$npopulation,digits=1) # create the required dataframe for the R package DCluster Observed<-newyork$ncases Expected<-expected Population<-newyork$npopulation x<-newyork$xcoords y<-newyork$ycoords nydata<-data.frame(Observed,Expected,Population,x,y) # define closest centroid to endicott waste site endicott<-c(-4.47,-67.65) all_locations<-rbind(cbind(newyork$xcoords,newyork$ycoords),endicott) z<-knearneigh(all_locations,k=1) indexknn<-z$nn[nrow(all_locations)] # stone's test for endicott waste site stendicotttest<-stone.test(Observed~offset(log(Expected)),nydata,model="poisson",R=999,region=indexknn) stendicotttest ########### Exercise 4: Detection of clusters using the scan statistic # use function opgam with option iscluster=kn.iscluster knny<-opgam(data=nydata,thegrid=cbind(nydata$x,nydata$y),alpha=0.05,iscluster=kn.iscluster,fractpop=0.20,R=49,model="poisson",mle=calculate.mle(nydata)) head(knny) # define the order of the clusters by the size of the scan statistic o<-order(knny$statistic,decreasing=TRUE) ## plot the 10 cluster centres with the highest scan statistic # plot all population centroids plot(newyork$xcoords,newyork$ycoords,pch=".",main="Clusters centres found by the Scan statistic",xlab="xcoord",ylab="ycoord") # plot the locations of the cluster centres points(knny$x[o[1]],knny$y[o[1]],pch=20,col="red") points(knny$x[o[2:10]],knny$y[o[2:10]],pch=20,col="blue") # add locations of waste sites waste<-read.table("wastesites.txt",header=TRUE) points(waste[,1],waste[,2],pch="+",col="red") ############ Exercise 5: Implementing a Monte Carlo Test library(splancs) library(fields) # attach dataset southlancs data(southlancs) larynx<-southlancs[southlancs[,3]==1,] # simulate a random point pattern within the given polygon and plot it r<-csr(southlancs.bdy,nrow(larynx)) plot(r,xlab="xcoord",ylab="ycoord",main="random point pattern") # initialize vector of minimum distances min<-rep(0,1000) # simulate 999 random point pattern and compute the minimum distances for (i in 1:999){ randompattern<-csr(southlancs.bdy,nrow(larynx)) alldist<-rdist(randompattern) diag(alldist)<-100000000 min[i]<-min(as.vector(alldist))} # compute the minimum distance for the larynx cancer pattern loclarynx<-cbind(larynx[,1],larynx[,2]) obsdist<-rdist(loclarynx) diag(obsdist)<-100000000 min[1000]<-min(as.vector(obsdist)) # order the 1000 minima o<-order(min) # compute rank of observed point pattern rank<-which(o==1000) # compute Monte Carlo p-value pvalue<-rank/1000 pvalue