Spatial11

References

Lecture9 Point Processes Lab 2018, Prof. Meredith Franklin
Tutorial Analysing spatial point patterns in R, Adrian Baddeley
Point pattern analysis tutorial, Prof. Emily Burchfield

Setup

Packages

Install any packages you might be missing:

#library(sp)
library(spatstat)
library(splancs)
library(dplyr)
library(dbscan)
library(leaflet)
library(proj4)

Data

-Set working directory where data are found -Read in data

data.dir = "/Users/abigailhorn/Dropbox/0 2019/Classes/PM569 Spatial Statistics/Spatial10_my_lecture/Data/"

food_outlets_la = read.csv(paste0(data.dir, "df_food_places_edit.csv")) %>% 
  filter(city == "los angeles") 

summary(food_outlets_la)

##  checkinsCount              city          latitude       longitude     
##  Min.   :    1.0   los angeles:10241   Min.   :33.71   Min.   :-118.7  
##  1st Qu.:   93.0   akron      :    0   1st Qu.:34.02   1st Qu.:-118.4  
##  Median :  257.0   albany     :    0   Median :34.06   Median :-118.3  
##  Mean   :  642.5   albuquerque:    0   Mean   :34.07   Mean   :-118.4  
##  3rd Qu.:  679.0   alexandria :    0   3rd Qu.:34.15   3rd Qu.:-118.3  
##  Max.   :22044.0   allentown  :    0   Max.   :34.32   Max.   :-118.1  
##                    (Other)    :    0                                   
##            shop_name             type     
##  Subway         : 281   Mexican    : 981  
##  Starbucks      : 265   Fast Food  : 646  
##  McDonald's     : 162   Coffee Shop: 624  
##  Jack in the Box: 105   American   : 592  
##  Burger King    :  81   Pizza      : 591  
##  Taco Bell      :  79   Sandwiches : 558  
##  (Other)        :9268   (Other)    :6249

Mapping the food outlets

# Map the points, coloring according to use (# checkinsCount)  
checkins.pal = colorNumeric(c('blue','purple','pink','yellow'),
                       domain=food_outlets_la$checkinsCount)
food_outlets_la %>% 
  arrange(checkinsCount) %>% # to show higher checkins more prominently
  leaflet() %>% 
  addProviderTiles('CartoDB.Positron') %>% 
  setView(-118.3260,33.99938, zoom = 10) %>% 
  addCircles(~longitude,  ~latitude,  label=food_outlets_la$shop_name,
             weight = 1, radius=20,
             color=~checkins.pal(food_outlets_la$checkinsCount)) %>%  
  addLegend("bottomleft", pal=checkins.pal, values=food_outlets_la$checkinsCount,
            title='Checkins', opacity=1)

# Log plot is better to see checkins
checkins.pal = colorNumeric(c('blue','purple','pink','yellow'),
                       domain=log(food_outlets_la$checkinsCount))
food_outlets_la %>% 
  arrange(checkinsCount) %>% # to show higher checkins more prominently
  leaflet() %>% 
  addProviderTiles('CartoDB.DarkMatter') %>% 
  setView(-118.3260,33.99938, zoom = 10) %>% 
  addCircles(~longitude,  ~latitude,  label=food_outlets_la$shop_name,
             weight = 1, radius=20,
             color=~checkins.pal(log(food_outlets_la$checkinsCount))) %>%  
  addLegend("bottomleft", pal=checkins.pal, values=log(food_outlets_la$checkinsCount),
            title='Log checkins', opacity=1)

# Filter to look only at specific types of places
food_outlets_la %>% 
  filter(food_outlets_la$type == "Korean") %>% 
  arrange(checkinsCount) %>% # to show higher checkins more prominently
  leaflet() %>% 
  addProviderTiles('CartoDB.Positron') %>% 
  setView(-118.3260,33.99938, zoom = 10) %>% 
  addCircles(~longitude,  ~latitude, weight = 1, radius=20,
             color=~checkins.pal(log(food_outlets_la$checkinsCount))) %>%  
  addLegend("bottomleft", pal=checkins.pal, values=log(food_outlets_la$checkinsCount),
            title='Log checkins', opacity=1)

Project into meters

use WGS84 projection
plot projected coordinates

proj4string <- "+proj=utm +zone=11 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs "

newcoords<-project(as.matrix(cbind(food_outlets_la$longitude, food_outlets_la$latitude)), proj=proj4string)
food_outlets_la$x<-newcoords[,1]
food_outlets_la$y<-newcoords[,2]

plot(x = food_outlets_la$x, y = food_outlets_la$y, ylab = "N-S", 
    xlab = "E-W", main = "Food outlets proj", cex = 0.25)

# Focus on the area of greatest density:
food.dens <- food_outlets_la[(food_outlets_la$y < 3775000 & food_outlets_la$y > 
    3750000) & (food_outlets_la$x > 360000 & food_outlets_la$x < 385000),]
plot(x = food.dens$x, y = food.dens$y, ylab = "N-S", 
    xlab = "E-W", main = "Food outlets highest density", cex = 0.25)

Use SpatStat to make the point pattern

# 1) Define the Window: SS: Use convexhull.xy() which traces the outermost
## points of a scatter plot. Give the function the x and y coordinates of
## the points, and it returns an owin object – which is necessary for
## making a point pattern object.
food.dens.chull <- convexhull.xy(x = food.dens$x, y = food.dens$y)

# 2) Create a ppp object using x and y points from above, window just defined
food.ppp <- ppp(x = food.dens$x, y = food.dens$y, window = food.dens.chull)

## Warning: data contain duplicated points

##Assign names to units
unitname(food.ppp) <- c("meter", "meters")  

# To change the window of an existing ppp object use:
## food.ppp[newWINDOWobject] 
## food.ppp <- food.ppp[food.win]

# Visualize ppp object:
p1 <- plot.ppp(food.ppp, main = "Food Outlets as ppp object")

# Create points
food.ppp.pts<-as.points(food.ppp$x,food.ppp$y)

Testing for CSR

Tests for CSR use this basic approach: compare our empirical (sampled) data to a theoretical (modeled) ideal and see if the two are significantly different. In our case, the theoretical model is a CSR processes.

K-S Test for CSR

A test for CSR is the Kolmogorov-Smirnov test, a widely used test to assess goodness-of-fit and/or how well sample data from some unknown population fits some hypothetical model of a specific population (i.e. a CSR). We can plot both the empirical and theoretical CSR models together in cumulative form, scaled so their cumulative sums are zero (i.e. express both as cumulative probability distributions). We then look for the greatest difference between the two. The maximum distance is the Kolmogorov-Smirnov statistic, i.e. D=max|CDF−EDF| where CDF is the theoretical cumulative distribution function and EDF is the empirical cumulative distribution function.
With point data, we specify a real function T(x,y) defined at all locations x,y in our sampling window. We evaluate this function at each of the data points and compare the empirical values of T with the predicted distribution of T under the CSR assumptions. spatstat makes this painless:

# Evaluate the sample data based on the "x" coordinate
ks <- cdf.test(food.ppp, "x")

## Warning in ks.test(U, "punif", ...): ties should not be present for the
## Kolmogorov-Smirnov test

plot(ks)

pval <- ks$p.value
pval

## [1] 2.198242e-14

# To evaluate the overall spatial randomness, input a density function (see below) as a covariate:
ds <- density(food.ppp)
ks <- cdf.test(food.ppp, ds)
plot(ks)

pval <- ks$p.value
pval

## [1] 0

## In both cases, the low (or 0) p-values suggest that the food outlet data is not drawn from a population distributed according to the assumptions of CSR. We can also see this in our plots - the observed and expected (if our data were CSR) cumulative distributions are pretty different.

Distance tests for detecting clusteredness vs. regularity (review)

To test how our data is non-random, i.e. are points more clustered or structured than a random process, we use distance based tests that compare theoretical CSR and empirical models of distance between points in a point pattern process.
G-function: measures the distribution of distances from an arbitrary event to its nearest event (i.e. uses nearest neighbor distances).
K-function: measures the number of events found up to a given distance of any particular event (i.e. uses pairwise distances). Think of this function as the expected number of points of the process within a given distance of a typical point in the process.
Interpretation: Empirical values less than our theoretical ones suggest a regular pattern. Empirical values greater than theoretical values suggest clustering.

# G test: Nearest neighbor distribution test

Gtest<-Gest(food.ppp)
plot(envelope(food.ppp,Gest))

## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

## For all values of r (distances between points) our empirical function is greater than the theoretical one. This suggests that nearest neighbor distances in the point pattern are shorter than for a Poisson process, suggesting a clustered pattern.

# Ripley's K test
K<-Kest(food.ppp)

## number of data points exceeds 3000 - computing border correction estimate only

plot(K)

plot(K, cbind(r, sqrt(border/pi)) ~ r)

plot(K, border - theo ~ r)

## takes too long
#plot(envelope(food.ppp,Kest))

Intensity

We can think of the intensity of a point process as similar to the sample mean. The intensity is the average density of points (i.e. number of points per unit area). Intensity can be constant (uniform or homogenous) or may vary across space.

Quadrat counting

We can visualize how the intensity of food outlets varies across space by creating a grid (often called quadrats) and counting the number of outlets in each grid cell:

Q <- quadratcount(food.ppp, nx = 20, ny = 20)

plot(food.ppp.pts, cex = 0.5, pch="+", main="Food Outlets in Quadrats", xlab="X",ylab="Y")
plot(Q, add = TRUE, cex = .5)

hist(Q, 100, main = "Distribution of food outlets in quadrats")

mean(Q)

## [1] 15.57101

Kernel density

If we want to estimate a continuous density surface, use Kernel density estimation (KDE) density()
KDE is an approach to estimate the probability density function of a random variable. Think of KDE this way: for each event (food outlet), there’s a mini-function that spreads the effect of this crime across space. A KDE essentially sums all of these mini-functions to create a surface indicative of the density of events across space.
KDE estimation in density.ppp uses an isotropic Gaussian kernel

# Use density to plot kernel density
## The density function creates a kernel density surface which is stored in R as spatstat class im or image. This is how spatstat handles rasters.
ds <- density(food.ppp)
class(ds)

## [1] "im"

# Plot the density surface:
plot(ds, main = "Food outlet density")

# Plot a color image of the kernel estimate ds with the point data superimposed.
plot(ds, main = "Food outlet density")
plot(food.ppp, add = TRUE, cols = "white", cex = 0.1, pch = 16)

# Get the maximum and minimum values of the intensity estimate over the study region
range(ds)

## [1] 1.204507e-06 2.878417e-05

summary(ds)

## real-valued pixel image
## 128 x 128 pixel array (ny, nx)
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## dimensions of each pixel: 195 x 193.6689 meters
## Image is defined on a subset of the rectangular grid
## Subset area = 480298384.956008 square meters
## Subset area fraction = 0.777
## Pixel values (inside window):
##  range = [1.204507e-06, 2.878417e-05]
##  integral = 5266.945
##  mean = 1.096599e-05

Dirichlet Tesselation

An adaptive estimator of intensity
A “tessellation” is a division of space into non-overlapping regions.
Dirichlet(X) computes the Dirichlet tessellation or Voronoi tessellation of the point pattern X, which estimates a constant intensity in each tile of the tessellation

plot(dirichlet(food.ppp), main="Dirichlet Tesselation of Food Outlets")

## Warning: 4 duplicated points were removed

## 
##      PLEASE NOTE:  The components "delsgs" and "summary" of the
##  object returned by deldir() are now DATA FRAMES rather than
##  matrices (as they were prior to release 0.0-18).
##  See help("deldir").
##  
##      PLEASE NOTE: The process that deldir() uses for determining
##  duplicated points has changed from that used in version
##  0.0-9 of this package (and previously). See help("deldir").

Fitting Poisson process (fitting intensity function)

Fit inhomogeneous poisson process with ppm() function

# Fit a model with dependence on x
food.fit1<-ppm(food.ppp,~log(x)+log(y))
summary(food.fit1)

## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = food.ppp, trend = ~log(x) + log(y))
## Edge correction: "border"
##  [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
## 
## Data pattern:
## Planar point pattern:  5263 points
## Average intensity 1.1e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
## 
## Dummy quadrature points:
##      150 x 150 grid of dummy points, plus 4 corner points
##      dummy spacing: 166.1640 x 165.2641 meters
## 
## Original dummy parameters: =
## Planar point pattern:  17491 points
## Average intensity 3.64e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
## Quadrature weights:
##      (counting weights based on 150 x 150 array of rectangular tiles)
## All weights:
##  range: [947, 27500] total: 4.79e+08
## Weights on data points:
##  range: [947, 13700] total: 36900000
## Weights on dummy points:
##  range: [947, 27500] total: 4.42e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
## 
## Nonstationary Poisson process
## 
## ---- Intensity: ----
## 
## Log intensity: ~log(x) + log(y)
## 
## Fitted trend coefficients:
##  (Intercept)       log(x)       log(y) 
## -6819.213321    -3.648892   452.709025 
## 
##                 Estimate        S.E.      CI95.lo      CI95.hi Ztest
## (Intercept) -6819.213321 147.6230117 -7108.549107 -6529.877535   ***
## log(x)         -3.648892   0.8111888    -5.238793    -2.058992   ***
## log(y)        452.709025   9.7836776   433.533370   471.884681   ***
##                   Zval
## (Intercept) -46.193430
## log(x)       -4.498204
## log(y)       46.271867
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta):
##  (Intercept)       log(x)       log(y) 
## -6819.213321    -3.648892   452.709025 
## 
## Fitted exp(theta):
##   (Intercept)        log(x)        log(y) 
##  0.000000e+00  2.601993e-02 4.064729e+196

coef(food.fit1)

##  (Intercept)       log(x)       log(y) 
## -6819.213321    -3.648892   452.709025

plot(food.fit1) # this plots the density

AIC(food.fit1)

## [1] 128162.2

# Plot the fitted model intensity (using plot(predict(fit)))
#par(mar=c(0,3)) 
plot(predict(food.fit1), main = "density of prediction")

plot(density(food.ppp), main = "density of data")

plot(predict(food.fit1, se=TRUE)$se, main = "SE")

# Generate 10 simulated realisations of the fitted model, and plot them, using plot(simulate(fit, nsim=10)) where fit is the fitted model.
par(mar=rep(0.5,4))
plot(simulate(food.fit1, nsim=10), main = "")

## Generating 10 simulated patterns ...1, 2, 3, 4, 5, 6, 7, 8, 9,  10.

# Generate simulation envelopes of the G function from this fitted model. Do they suggest the model is plausible?
plot(envelope(food.fit1, Gest, nsim = 39, global = TRUE))

## Generating 78 simulated realisations of fitted Poisson model (39 to 
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77,  78.
## 
## Done.

Fitting other inhomogeneous intensity models using kppm()

kppm(): Fit Poisson cluster process or Cox point process model

# Fit Poisson Cluster Process by minimum contrast
pcp.mod1<-kppm(food.ppp,method="mincon")
summary(pcp.mod1)

## Stationary cluster point process model
## Fitted to point pattern dataset 'food.ppp'
## Fitted by minimum contrast
##  Summary statistic: K-function
## Minimum contrast fit (object of class "minconfit")
## Model: Thomas process
## Fitted by matching theoretical K function to food.ppp
## 
## Internal parameters fitted by minimum contrast ($par):
##        kappa       sigma2 
## 3.175541e-08 1.511772e+06 
## 
## Fitted cluster parameters:
##        kappa        scale 
## 3.175541e-08 1.229541e+03 
## Mean cluster size:  346.1318 points
## 
## Converged successfully after 185 function evaluations
## 
## Starting values of parameters:
##        kappa       sigma2 
## 1.096176e-05 1.460337e+04 
## Domain of integration: [ 0 , 5389 ]
## Exponents: p= 2, q= 0.25
## 
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates, 
##     covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE, 
##     nd = nd, eps = eps)
## Edge correction: "border"
##  [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
## 
## Data pattern:
## Planar point pattern:  5263 points
## Average intensity 1.1e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
## 
## Dummy quadrature points:
##      150 x 150 grid of dummy points, plus 4 corner points
##      dummy spacing: 166.1640 x 165.2641 meters
## 
## Original dummy parameters: =
## Planar point pattern:  17491 points
## Average intensity 3.64e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
## Quadrature weights:
##      (counting weights based on 150 x 150 array of rectangular tiles)
## All weights:
##  range: [947, 27500] total: 4.79e+08
## Weights on data points:
##  range: [947, 13700] total: 36900000
## Weights on dummy points:
##  range: [947, 27500] total: 4.42e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
## 
## Stationary Poisson process
## 
## ---- Intensity: ----
## 
## 
## Uniform intensity:
## [1] 1.099156e-05
## 
##              Estimate       S.E.  CI95.lo   CI95.hi Ztest      Zval
## (Intercept) -11.41838 0.01378426 -11.4454 -11.39137   *** -828.3641
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta):
## (Intercept) 
##   -11.41838 
## 
## Fitted exp(theta):
##  (Intercept) 
## 1.099156e-05 
## 
## ----------- CLUSTER MODEL -----------
## Model: Thomas process
## 
## Fitted cluster parameters:
##        kappa        scale 
## 3.175541e-08 1.229541e+03 
## Mean cluster size:  346.1318 points
## 
## Final standard error and CI
## (allowing for correlation of cluster process):
##              Estimate      S.E.  CI95.lo   CI95.hi Ztest      Zval
## (Intercept) -11.41838 0.2400639 -11.8889 -10.94787   *** -47.56394

parameters(pcp.mod1)

## $trend
## [1] 1.099156e-05
## 
## $kappa
## [1] 3.175541e-08
## 
## $scale
## [1] 1229.541
## 
## $mu
## [1] 346.1318

plot(pcp.mod1)

plot(simulate(pcp.mod1, drop = TRUE), main = "")

plot(envelope(pcp.mod1, Gest, nsim = 39, global = TRUE))

## Generating 78 simulated realisations of fitted cluster model (39 to 
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77,  78.
## 
## Done.

# Fit Log Gaussian Cox process to simulated data
lgcp.mod1<-kppm(food.ppp,clusters="LGCP",method="mincon")
summary(lgcp.mod1)

## Stationary Cox point process model
## Fitted to point pattern dataset 'food.ppp'
## Fitted by minimum contrast
##  Summary statistic: K-function
## Minimum contrast fit (object of class "minconfit")
## Model: Log-Gaussian Cox process
##   Covariance model: exponential
## Fitted by matching theoretical K function to food.ppp
## 
## Internal parameters fitted by minimum contrast ($par):
##      sigma2       alpha 
##    1.443587 1953.699010 
## 
## Fitted covariance parameters:
##         var       scale 
##    1.443587 1953.699010 
## Fitted mean of log of random intensity:  -12.14018
## 
## Converged successfully after 117 function evaluations
## 
## Starting values of parameters:
##   sigma2    alpha 
##   1.0000 120.8444 
## Domain of integration: [ 0 , 5389 ]
## Exponents: p= 2, q= 0.25
## 
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates, 
##     covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE, 
##     nd = nd, eps = eps)
## Edge correction: "border"
##  [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
## 
## Data pattern:
## Planar point pattern:  5263 points
## Average intensity 1.1e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
## 
## Dummy quadrature points:
##      150 x 150 grid of dummy points, plus 4 corner points
##      dummy spacing: 166.1640 x 165.2641 meters
## 
## Original dummy parameters: =
## Planar point pattern:  17491 points
## Average intensity 3.64e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
## Quadrature weights:
##      (counting weights based on 150 x 150 array of rectangular tiles)
## All weights:
##  range: [947, 27500] total: 4.79e+08
## Weights on data points:
##  range: [947, 13700] total: 36900000
## Weights on dummy points:
##  range: [947, 27500] total: 4.42e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
## 
## Stationary Poisson process
## 
## ---- Intensity: ----
## 
## 
## Uniform intensity:
## [1] 1.099156e-05
## 
##              Estimate       S.E.  CI95.lo   CI95.hi Ztest      Zval
## (Intercept) -11.41838 0.01378426 -11.4454 -11.39137   *** -828.3641
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta):
## (Intercept) 
##   -11.41838 
## 
## Fitted exp(theta):
##  (Intercept) 
## 1.099156e-05 
## 
## ----------- COX MODEL -----------
## Model: log-Gaussian Cox process
## 
##  Covariance model: exponential
## Fitted covariance parameters:
##         var       scale 
##    1.443587 1953.699010 
## Fitted mean of log of random intensity: -12.14018
## 
## Final standard error and CI
## (allowing for correlation of Cox process):
##              Estimate      S.E.  CI95.lo   CI95.hi Ztest      Zval
## (Intercept) -11.41838 0.2615944 -11.9311 -10.90567   *** -43.64919

plot(lgcp.mod1) # is this on residuals?

plot(envelope(lgcp.mod1, Gest, nsim = 39, global = TRUE))

## Generating 78 simulated realisations of fitted cluster model (39 to 
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77,  78.
## 
## Done.

# Plot 11 simulations of the fit process
XX <- simulate(lgcp.mod1, nsim = 11)

## Generating 11 simulations... 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  11.
## Done.

XX[[12]] <- food.ppp
plot(XX, main = "", main.panel = "")

Marked data

Marked data is data marked with specific attributes. We have categorial marked data, marked by outlet type and shop brand. We will focus on outlet type.
Note that for Poisson process data, each mark can be considered to be generated from an independent Poisson process.

Reading in marked data

Filter to following types of outlet: – Fast Food – Burgers – American – Mexican – Coffee Shop – Korean

# Filter to the 6 categories above
food_outlets_la_filt <-
  food_outlets_la %>% 
  filter(food_outlets_la$type == "Fast Food" | type == "Burgers" | type == "American" | type == "Coffee Shop" | type == "Korean" | type =="Mexican") 

# Print out and read back in filtered data --> hack to force only these 6 categories
write.csv(food_outlets_la_filt, file = paste0(data.dir, "food_outlets_la_filt.csv"))
food.outlets.filt = read.csv(paste0(data.dir, "food_outlets_la_filt.csv"))

# Focus on the area of greatest density again
food.dens.filt <- food.outlets.filt[(food.outlets.filt$y < 3775000 & food.outlets.filt$y > 
    3750000) & (food.outlets.filt$x > 360000 & food.outlets.filt$x < 385000),]
plot(x = food.dens.filt$x, y = food.dens.filt$y, ylab = "N-S", 
    xlab = "E-W", main = "Food outlets highest density (type filtered)", cex = 0.25)

food.dens.chull <- convexhull.xy(x = food.dens.filt$x, y = food.dens.filt$y)

# Make the point pattern
food.ppp.marks <- ppp(x = food.dens.filt$x, y = food.dens.filt$y, window = food.dens.chull, marks = food.dens.filt$type)

Visualize density of the outlet types

# Plot the point pattern with all marks
p1 <- plot.ppp(food.ppp.marks, which.marks = "type", main = "Food Outlets as ppp object")   #which.marks isn't necessary here

# Plot each outlet type
plot(split(food.ppp.marks), main = "")

# Plot density of each outlet type
plot(density(split(food.ppp.marks)), main = "")

#To extract one of the patterns:
spp <- split(food.ppp.marks)
Mexican <- spp$Mexican
Korean <- spp$Korean
fastfood <- spp$`Fast Food`
plot(fastfood)

plot(density(fastfood), main = "Fast food outlet density")
plot(fastfood, add = TRUE, cols = "white", cex = 0.25, pch = 16)

plot(dirichlet(fastfood), main="Dirichlet Tesselation of Fast Food Outlets")

# To study the relationship between two or more density plots use pairs() to show how two density surfaces interact. Think of this as a 3D covariance matrix. 
pairs(density(spp[c(2,3,5)]))

Studying difference in point patterns using “differenced Ripley’s k”

# calculating the Lest difference in the lung and larynx point patterns
Kdiff<-function(Xpp,r,cr="border"){
  k1<-Kest(fastfood,r=r,correction=cr)
  k2<-Kest(Mexican,r=r,correction= cr)
  res<-data.frame(r=r, D=k1[[cr]]-k2[[cr]])
  fv(res, valu="D",fname="D")
}

r<-seq(0,0.15,by=0.001)
envKdif <- envelope(fastfood, Kdiff, r=r, nsim=99, cr="border", nrank=2, savefuns=TRUE)

## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

plot(envKdif, main="Difference between Fast-Food and Mexican Food Outlet Point Processes")

## Error in yp[whole] <- yp[whole] + epsilon : 
##   NAs are not allowed in subscripted assignments

Fitting intensity for a mark (fast food)

# Fit a model with dependence on x
fastfood.fit1<-ppm(fastfood,~log(x)+log(y))
summary(fastfood.fit1)

## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = fastfood, trend = ~log(x) + log(y))
## Edge correction: "border"
##  [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
## 
## Data pattern:
## Planar point pattern:  264 points
## Average intensity 5.58e-07 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
## 
## Dummy quadrature points:
##      64 x 64 grid of dummy points, plus 4 corner points
##      dummy spacing: 389.4469 x 387.3027 units
## 
## Original dummy parameters: =
## Planar point pattern:  3202 points
## Average intensity 6.77e-06 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
## Quadrature weights:
##      (counting weights based on 64 x 64 array of rectangular tiles)
## All weights:
##  range: [9670, 151000]   total: 4.73e+08
## Weights on data points:
##  range: [9670, 75400]    total: 17200000
## Weights on dummy points:
##  range: [21200, 151000]  total: 4.55e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
## 
## Nonstationary Poisson process
## 
## ---- Intensity: ----
## 
## Log intensity: ~log(x) + log(y)
## 
## Fitted trend coefficients:
## (Intercept)      log(x)      log(y) 
## -2506.88918    18.39645   149.02383 
## 
##                Estimate       S.E.     CI95.lo     CI95.hi Ztest      Zval
## (Intercept) -2506.88918 552.623842 -3590.01201 -1423.76635   *** -4.536339
## log(x)         18.39645   3.883858    10.78423    26.00867   ***  4.736643
## log(y)        149.02383  36.335004    77.80853   220.23913   ***  4.101385
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta):
## (Intercept)      log(x)      log(y) 
## -2506.88918    18.39645   149.02383 
## 
## Fitted exp(theta):
##  (Intercept)       log(x)       log(y) 
## 0.000000e+00 9.760591e+07 5.250807e+64

coef(fastfood.fit1)

## (Intercept)      log(x)      log(y) 
## -2506.88918    18.39645   149.02383

plot(fastfood.fit1) # this plots the density

AIC(fastfood.fit1)

## [1] 8096.897

# Plot the fitted model intensity (using plot(predict(fit)))
#par(mar=c(0,3)) 
plot(predict(fastfood.fit1), main = "density of prediction")

plot(density(fastfood), main = "density of data")

plot(predict(fastfood.fit1, se=TRUE)$se, main = "SE")

par(mar=rep(0.5,4))
plot(simulate(fastfood.fit1, nsim=10), main = "")

## Generating 10 simulated patterns ...1, 2, 3, 4, 5, 6, 7, 8, 9,  10.

# Gest on fitted data
plot(envelope(fastfood.fit1, Gest, nsim = 39, global = TRUE))

## Generating 78 simulated realisations of fitted Poisson model (39 to 
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77,  78.
## 
## Done.

# Fit Poisson Cluster Process by minimum contrast
pcp.mod1<-kppm(fastfood,method="mincon")
summary(pcp.mod1)

## Stationary cluster point process model
## Fitted to point pattern dataset 'fastfood'
## Fitted by minimum contrast
##  Summary statistic: K-function
## Minimum contrast fit (object of class "minconfit")
## Model: Thomas process
## Fitted by matching theoretical K function to fastfood
## 
## Internal parameters fitted by minimum contrast ($par):
##        kappa       sigma2 
## 1.025697e-07 7.403431e+05 
## 
## Fitted cluster parameters:
##        kappa        scale 
## 1.025697e-07 8.604319e+02 
## Mean cluster size:  5.446121 points
## 
## Converged successfully after 129 function evaluations
## 
## Starting values of parameters:
##        kappa       sigma2 
## 5.580523e-07 7.863436e+05 
## Domain of integration: [ 0 , 6197 ]
## Exponents: p= 2, q= 0.25
## 
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates, 
##     covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE, 
##     nd = nd, eps = eps)
## Edge correction: "border"
##  [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
## 
## Data pattern:
## Planar point pattern:  264 points
## Average intensity 5.58e-07 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
## 
## Dummy quadrature points:
##      64 x 64 grid of dummy points, plus 4 corner points
##      dummy spacing: 389.4469 x 387.3027 units
## 
## Original dummy parameters: =
## Planar point pattern:  3202 points
## Average intensity 6.77e-06 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
## Quadrature weights:
##      (counting weights based on 64 x 64 array of rectangular tiles)
## All weights:
##  range: [9670, 151000]   total: 4.73e+08
## Weights on data points:
##  range: [9670, 75400]    total: 17200000
## Weights on dummy points:
##  range: [21200, 151000]  total: 4.55e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
## 
## Stationary Poisson process
## 
## ---- Intensity: ----
## 
## 
## Uniform intensity:
## [1] 5.586069e-07
## 
##              Estimate       S.E.   CI95.lo   CI95.hi Ztest      Zval
## (Intercept) -14.39782 0.06154575 -14.51845 -14.27719   *** -233.9369
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta):
## (Intercept) 
##   -14.39782 
## 
## Fitted exp(theta):
##  (Intercept) 
## 5.586069e-07 
## 
## ----------- CLUSTER MODEL -----------
## Model: Thomas process
## 
## Fitted cluster parameters:
##        kappa        scale 
## 1.025697e-07 8.604319e+02 
## Mean cluster size:  5.446121 points
## 
## Final standard error and CI
## (allowing for correlation of cluster process):
##              Estimate      S.E.   CI95.lo   CI95.hi Ztest      Zval
## (Intercept) -14.39782 0.1505921 -14.69297 -14.10266   *** -95.60809

parameters(pcp.mod1)

## $trend
## [1] 5.586069e-07
## 
## $kappa
## [1] 1.025697e-07
## 
## $scale
## [1] 860.4319
## 
## $mu
## [1] 5.446121

plot(pcp.mod1)

plot(simulate(pcp.mod1, drop = TRUE), main = "")

plot(envelope(pcp.mod1, Gest, nsim = 39, global = TRUE))

## Generating 78 simulated realisations of fitted cluster model (39 to 
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77,  78.
## 
## Done.

# Fit Log Gaussian Poisson cluster process to simulated data
lgcp.mod1<-kppm(fastfood,clusters="LGCP",method="mincon")
summary(lgcp.mod1)

## Stationary Cox point process model
## Fitted to point pattern dataset 'fastfood'
## Fitted by minimum contrast
##  Summary statistic: K-function
## Minimum contrast fit (object of class "minconfit")
## Model: Log-Gaussian Cox process
##   Covariance model: exponential
## Fitted by matching theoretical K function to fastfood
## 
## Internal parameters fitted by minimum contrast ($par):
##     sigma2      alpha 
##    1.24723 1075.38408 
## 
## Fitted covariance parameters:
##        var      scale 
##    1.24723 1075.38408 
## Fitted mean of log of random intensity:  -15.02143
## 
## Converged successfully after 77 function evaluations
## 
## Starting values of parameters:
##   sigma2    alpha 
##   1.0000 886.7602 
## Domain of integration: [ 0 , 6197 ]
## Exponents: p= 2, q= 0.25
## 
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates, 
##     covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE, 
##     nd = nd, eps = eps)
## Edge correction: "border"
##  [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
## 
## Data pattern:
## Planar point pattern:  264 points
## Average intensity 5.58e-07 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
## 
## Dummy quadrature points:
##      64 x 64 grid of dummy points, plus 4 corner points
##      dummy spacing: 389.4469 x 387.3027 units
## 
## Original dummy parameters: =
## Planar point pattern:  3202 points
## Average intensity 6.77e-06 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
## Quadrature weights:
##      (counting weights based on 64 x 64 array of rectangular tiles)
## All weights:
##  range: [9670, 151000]   total: 4.73e+08
## Weights on data points:
##  range: [9670, 75400]    total: 17200000
## Weights on dummy points:
##  range: [21200, 151000]  total: 4.55e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
## 
## Stationary Poisson process
## 
## ---- Intensity: ----
## 
## 
## Uniform intensity:
## [1] 5.586069e-07
## 
##              Estimate       S.E.   CI95.lo   CI95.hi Ztest      Zval
## (Intercept) -14.39782 0.06154575 -14.51845 -14.27719   *** -233.9369
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta):
## (Intercept) 
##   -14.39782 
## 
## Fitted exp(theta):
##  (Intercept) 
## 5.586069e-07 
## 
## ----------- COX MODEL -----------
## Model: log-Gaussian Cox process
## 
##  Covariance model: exponential
## Fitted covariance parameters:
##        var      scale 
##    1.24723 1075.38408 
## Fitted mean of log of random intensity: -15.02143
## 
## Final standard error and CI
## (allowing for correlation of Cox process):
##              Estimate      S.E.   CI95.lo   CI95.hi Ztest      Zval
## (Intercept) -14.39782 0.1557738 -14.70313 -14.09251   *** -92.42773

plot(lgcp.mod1) # is this on residuals?

plot(envelope(lgcp.mod1, Gest, nsim = 39, global = TRUE))

## Generating 78 simulated realisations of fitted cluster model (39 to 
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77,  78.
## 
## Done.

# Plot 11 simulations of the fit process
XX <- simulate(lgcp.mod1, nsim = 11)

## Generating 11 simulations... 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  11.
## Done.

XX[[12]] <- fastfood
plot(XX, main = "", main.panel = "")

Intensity dependence on covariate

Our dataset may also include covariates — any data that we treat as explanatory, rather than as part of the ‘response’. Any kind of data may be recruited as an explanatory variable: – Covariate data may be a spatial function Z(u) defined at all spatial locations u, e.g. altitude, soil pH, displayed as a pixel image or a contour plot: – Covariate data may be another spatial pattern such as another point pattern, or a line segment pattern – Typically the covariate pattern would be used to define a surrogate spatial function Z, for example, Z(u) may be the distance from u to the nearest point.
In analysis of a point pattern dataset with covariate data, we typically: – investigate whether the intensity depends on the covariates – allow for covariate effects on intensity before studying interaction between points

We can start answering very important and interesting questions once we bring in this additional data.

Read in covariate data: locations of freeway exits

freeway.df = read.csv(paste0(data.dir, "Freeway_Exits.csv"))
#schools.df = read.csv(paste0(data.dir, "Public_High_Schools.csv"))
#hospitals.df = read.csv(paste0(data.dir, "Hospitals_and_Medical_Centers.csv"))

freeway.df %>% 
  leaflet() %>% 
  addProviderTiles('CartoDB.Positron') %>% 
  setView(-118.3260,33.99938, zoom = 10) %>% 
  addCircles(~longitude,  ~latitude,  label=food_outlets_la$Name,
             weight = 1, radius=20)

proj4string <- "+proj=utm +zone=11 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs "

newcoords<-project(as.matrix(cbind(freeway.df$longitude, freeway.df$latitude)), proj=proj4string)
freeway.df$x<-newcoords[,1]
freeway.df$y<-newcoords[,2]

# Map to same area as food outlet data:
freeway.df.dens <- freeway.df[(freeway.df$y < 3775000 & freeway.df$y > 
    3750000) & (freeway.df$x > 360000 & freeway.df$x < 385000),]
plot(x = freeway.df.dens$x, y = freeway.df.dens$y, ylab = "N-S", 
    xlab = "E-W", main = "Freeway exits", cex = 0.25)

freeway.dens.chull <- convexhull.xy(x = freeway.df.dens$x, y = freeway.df.dens$y)

freeway.ppp <- ppp(x = freeway.df.dens$x, y = freeway.df.dens$y, window = freeway.dens.chull)

##Assign names to units
unitname(food.ppp) <- c("meter", "meters")  

# Visualize ppp object:
p1 <- plot.ppp(freeway.ppp, main = "Freeway exits as ppp object")

# Create points object
freeway.ppp.pts<-as.points(freeway.ppp$x,freeway.ppp$y)

Calculate covariate function: cross nearest neighbor distance

We can compare the distance between two point pattern objects X and Y.
crossdist(X,Y) computes a matrix of distances from each point in X to each point in Y
ncross(X,Y) finds for each point in X the nearest point of Y and the distance to this point.

# Compute the distance to the nearest freeway exit for each fast food outlet  
cross.food.freeway <- nncross(fastfood,freeway.ppp)
head(cross.food.freeway)

##        dist which
## 1  189.1351    57
## 2  666.7974    17
## 3  436.4762    11
## 4 2447.1386    44
## 5 1900.4874    50
## 6  943.1941    20

# Define a covariate for fast food as the distance to the nearest freeway exit
dist <- cross.food.freeway$dist
plot(dist)

# Add the covariate into the fastfood  ppp
fastfood$dist<-dist

Estimate intensity as a function of covariate

# Isn't working
#   f <- nncross(fastfood,freeway.ppp)
#   fit <- ppm(fastfood, ~D, covariates=list(D=f))

Estimate intensity as a function of covariate – internal data

Get a rough understanding of how a covariates relates to a point process
Compute a nonparametric estimate of the function ρ
Compute the predicted intensity based on this estimate of ρ.
Use K-S test to formally test whether the point-pattern intensity depends on a specific covariate.

# Get a rough understanding of how a covariates relates to a point process:
data(bei) #in spatstat package
slope <- bei.extra$grad
b <- quantile(slope, probs = (0:4)/4)
slope_break <- cut(slope, breaks = b, labels = 1:4)
v <- tess(image = slope_break)
plot(v)
plot(bei, add=T, pch = "+")

#We create an object b that stores sample quantiles corresponding to the given probabilities (probs), in our case, 0%, 25%, 50%, 75%, and 100%, of slope having a certain value. We then use cut() from the raster package to reclassify the raster slope object with values 1 to 4 based on which quantile of data they belong to. The tess() or tessellation function creates a collection of disjoint spatial regions, in this case, each region is one of the four categories in the raster image we have created. We can then count the number of points in each tessellation. If the point pattern process was uniform, we’d expect a near-equal number of points in each slope category (1, 2, 3, 4). 

#We can use the quadratcount() function to count the number of points in each category of slope:
qb <- quadratcount(bei, tess=v)
qb

## tile
##    1    2    3    4 
##  271  984 1028 1321

#Looks like there are big differences in tree locations. It appears that trees in this dataset prefer steeper slopes (3 and 4).

# Compute a nonparametric estimate of the function ρ 
# In other words, estimate, across a gradient of covariate values, the prevalence of points 
#Here’s some quick math to explain this. Imagine the intensity of a point process is, in fact, a function of a covariate (i.e. tree density is a function of our slope). Cool! How do we test this empirically? One quick way is to compute a function that tells us how the intensity of points depends on the value of the covariates. At any spatial location u, let λ(u) describe the intensity of the point process and Z(u) be the value of the covariate (slope, in our case). Then what we are interested in is this relationship: 
# λ(u)=ρ(Z(u))
#where ρ is the function of interest. The spatstat function estimates the intensity as a function of a covariate using the rhohat() function:
plot(rhohat(bei, slope))

# Compute the predicted intensity based on this estimate of ρ.
prh <- predict(rhohat(bei, slope))
plot(prh, main = "")
plot(bei, add = TRUE, cols = "white", cex = .2, pch = 16)

# Use K-S test to formally test whether the point-pattern intensity depends on a specific covariate. 
#In the example above, we’ve collected evidence above that suggests that tropical tree location depends on slope. We can formally test this by applying the K-S test using slope as our covariate (instead of coordinates or density as we did earlier):
ks <- cdf.test(bei, slope)
plot(ks)

Identifying clusters

Will do this for the Korean food outlets since there is a very visible cluster

# Remembering the plot (and the large cluster)
checkins.pal = colorNumeric(c('blue','purple','pink','yellow'),
                       domain=log(food_outlets_la$checkinsCount))
# Filter to look only at specific types of places
food_outlets_la %>% 
  filter(food_outlets_la$type == "Korean") %>% 
  arrange(checkinsCount) %>% # to show higher checkins more prominently
  leaflet() %>% 
  addProviderTiles('CartoDB.Positron') %>% 
  setView(-118.3260,33.99938, zoom = 10) %>% 
  addCircles(~longitude,  ~latitude, weight = 1, radius=20,
             color=~checkins.pal(log(food_outlets_la$checkinsCount))) %>%  
  addLegend("bottomleft", pal=checkins.pal, values=log(food_outlets_la$checkinsCount),
            title='Log checkins', opacity=1)

korean.pts<-as.points(Korean$x, Korean$y)

#dbscan
db.clust = dbscan(korean.pts/1000, minPts = 5, eps=4) # what is epsilon (units?)
hullplot(korean.pts/1000,db.clust)

hdb.clust = hdbscan(korean.pts/1000, minPts = 5)
#par(mfrow=c(1,2))
plot(hdb.clust)

hullplot(korean.pts/1000,hdb.clust)

## Warning in hullplot(korean.pts/1000, hdb.clust): Not enough colors. Some
## colors will be reused.

# cluster membership assignment for each
hdb.clust$cluster

##   [1] 11  0 10  7  3  5 11  5  7  1  7  0  5  2  2  8  0  3  0  5 11 10  0
##  [24]  4  8  0 11  5  5 11  0  6  4  0  4  7 10  7  6  4 11 11  0  0 11  0
##  [47]  7 10  3 11  7  0  0  7  0  9  1  4  9  5 10  0  5  4  5  7  0  0  4
##  [70] 10  5  0 11  3  6 11 10  1 10  6  6  4  0  7  6  4  8 11  8 10  7  7
##  [93] 10  9  0  0  9 11  9  0  9 11  0  0  7  0  0  6  9  9  0 11  0  0 11
## [116]  0  5 11  1  8  4  7  0  0  0  2  0  9  3 11  0  5  9  0  9  7  9 10
## [139]  7  0  3  0  0  4  5  4  0  6  0  0  7  0  6  2  0  0  9  4  0  0  1
## [162]  1  5  7  6 10  3  0  8  6  0  0 11  8  0  0  0  0  2  3  0  0  7  0
## [185]  6 10 11  0  7  5  6  0  0  3 11  6 11  4  0  3  0 10  0  6  7  4  6
## [208]  0 11  7  3 10  2  7  0  0 11  0  7

# cluster membership probability of each
hdb.clust$membership_prob

##   [1] 0.77762650 0.00000000 0.30301284 0.40874662 0.03624360 0.40639025
##   [7] 0.43761574 0.47539925 0.62650684 0.00000000 0.00000000 0.00000000
##  [13] 0.10223180 0.08715812 0.30997178 0.12183873 0.00000000 0.64294444
##  [19] 0.00000000 0.45891791 0.51404479 0.44620324 0.00000000 0.48470401
##  [25] 0.19462002 0.00000000 0.71652865 0.00000000 0.36889827 0.63301869
##  [31] 0.00000000 0.33557244 0.18821475 0.00000000 0.32154573 0.79153912
##  [37] 0.76013943 0.12692369 0.58456615 0.18821475 0.75097927 0.16667865
##  [43] 0.00000000 0.00000000 0.35855668 0.00000000 0.31270755 0.75099619
##  [49] 0.51220795 0.71959145 0.40403988 0.00000000 0.00000000 0.34431033
##  [55] 0.00000000 0.56255251 0.26341467 0.85924638 0.77726211 0.44675980
##  [61] 0.78083454 0.00000000 0.11335110 0.88995330 0.44449831 0.67699088
##  [67] 0.00000000 0.00000000 0.04345776 0.07295445 0.30853172 0.00000000
##  [73] 0.64905286 0.76851832 0.63113284 0.72871816 0.30713488 0.38259474
##  [79] 0.68893861 0.30617395 0.30448185 0.89158004 0.00000000 0.50854829
##  [85] 0.42467196 0.00000000 0.19462002 0.73275948 0.29870597 0.00000000
##  [91] 0.75135031 0.79153912 0.30301284 0.70855637 0.00000000 0.00000000
##  [97] 0.76684260 0.55091697 0.00000000 0.00000000 0.76684260 0.72871816
## [103] 0.00000000 0.00000000 0.58985169 0.00000000 0.00000000 0.43194717
## [109] 0.77416918 0.19701337 0.00000000 0.73169785 0.00000000 0.00000000
## [115] 0.62338205 0.00000000 0.25090014 0.00000000 0.38526305 0.00000000
## [121] 0.85924638 0.54583530 0.00000000 0.00000000 0.00000000 0.00000000
## [127] 0.00000000 0.77726211 0.80470558 0.37352965 0.00000000 0.33880974
## [133] 0.66212635 0.00000000 0.63789577 0.62133852 0.12401551 0.17593821
## [139] 0.57991033 0.00000000 0.70826506 0.00000000 0.00000000 0.84482217
## [145] 0.44675980 0.84946708 0.00000000 0.16569791 0.00000000 0.00000000
## [151] 0.50078138 0.00000000 0.58456615 0.08715812 0.00000000 0.00000000
## [157] 0.55742486 0.84924262 0.00000000 0.00000000 0.33069365 0.35514116
## [163] 0.33743474 0.79716095 0.00000000 0.83801008 0.00000000 0.00000000
## [169] 0.30282867 0.43194717 0.00000000 0.00000000 0.29923108 0.16222972
## [175] 0.00000000 0.00000000 0.00000000 0.00000000 0.08361805 0.50459973
## [181] 0.00000000 0.00000000 0.06637030 0.00000000 0.48554157 0.81535311
## [187] 0.74878650 0.00000000 0.81690519 0.36889827 0.61853249 0.00000000
## [193] 0.00000000 0.70826506 0.75097927 0.61787241 0.81603615 0.39783121
## [199] 0.00000000 0.06903857 0.00000000 0.51588977 0.00000000 0.48554157
## [205] 0.59486590 0.58921518 0.12547817 0.00000000 0.67709633 0.72814059
## [211] 0.32742238 0.32671453 0.06553281 0.76670898 0.00000000 0.00000000
## [217] 0.45374324 0.00000000 0.56406909

# outlier score for each point, *not* complement of membership scor
hdb.clust$outlier_scores

##   [1] 0.000000000 0.912984186 0.735078486 0.656933792 0.759813078
##   [6] 0.088488539 0.557205365 0.000000000 0.456913606 0.382594739
##  [11] 0.797160946 0.830137077 0.397303128 0.000000000 0.000000000
##  [16] 0.201406328 0.993000712 0.351692827 0.993239516 0.000000000
##  [21] 0.487564435 0.666580038 0.969882511 0.786439834 0.129238307
##  [26] 0.841696990 0.121531202 0.458917912 0.142638872 0.321434824
##  [31] 0.990542573 0.425870426 0.864438658 0.989701556 0.837797914
##  [36] 0.026968235 0.230190723 0.767673167 0.081761115 0.864438658
##  [41] 0.000000000 0.701170820 0.927756239 0.989815936 0.611780609
##  [46] 0.995090482 0.704872282 0.258457546 0.525450067 0.046960146
##  [51] 0.659643240 0.952023159 0.994086628 0.690647784 0.909263634
##  [56] 0.490823697 0.161800766 0.218160812 0.000000000 0.021976187
##  [61] 0.157500039 0.966541126 0.389744816 0.000000000 0.142638872
##  [66] 0.372033041 0.901487927 0.994190120 0.884953645 0.800822199
##  [71] 0.217488202 0.828397499 0.290432361 0.000000000 0.000000000
##  [76] 0.014897127 0.733502397 0.000000000 0.406397256 0.450197192
##  [81] 0.451534782 0.000000000 0.994541494 0.587265544 0.105799827
##  [86] 0.889953303 0.129238307 0.000000000 0.000000000 0.815353107
##  [91] 0.184237670 0.026968235 0.735078486 0.235742802 0.920001078
##  [96] 0.853291190 0.044688712 0.445490657 0.777262106 0.865460974
## [101] 0.044688712 0.014897127 0.865063822 0.907775601 0.505449495
## [106] 0.941457287 0.994350091 0.094347560 0.013695779 0.722613198
## [111] 0.936452661 0.071864545 0.994350091 0.907261007 0.338797484
## [116] 0.993499717 0.277690313 0.750979265 0.000000000 0.298705966
## [121] 0.218160812 0.553379967 0.910652029 0.951439128 0.988449235
## [126] 0.087158122 0.901532818 0.000000000 0.000000000 0.602501964
## [131] 0.949790568 0.181654477 0.340765717 0.840556320 0.384879056
## [136] 0.464326148 0.745728497 0.775930768 0.517152961 0.841701257
## [141] 0.206534259 0.880647236 0.990827198 0.290834929 0.021976187
## [146] 0.268952633 0.956876375 0.542770518 0.915742419 0.864988165
## [151] 0.593686926 0.933061840 0.081761115 0.000000000 0.959496460
## [156] 0.981897969 0.496722991 0.270041061 0.820754495 0.942220080
## [161] 0.077544589 0.042573011 0.183352765 0.000000000 0.618532488
## [166] 0.000000000 0.768518317 0.921924116 0.000000000 0.094347560
## [171] 0.986968030 0.922755467 0.644646434 0.162904137 0.927006052
## [176] 0.836246599 0.825958098 0.903920245 0.003863096 0.532738075
## [181] 0.985946000 0.994190120 0.782741430 0.957335927 0.000000000
## [186] 0.000000000 0.008728703 0.902740390 0.000000000 0.142638872
## [191] 0.000000000 0.928565694 0.838748185 0.206534259 0.000000000
## [196] 0.001727372 0.000000000 0.817249416 0.963424548 0.751352016
## [201] 0.862611633 0.666580038 0.990710895 0.000000000 0.499328608
## [206] 0.732106225 0.563798756 0.941898385 0.172383131 0.253882530
## [211] 0.655829044 0.725752448 0.023141868 0.130532088 0.973598920
## [216] 0.899676857 0.602501964 0.924759000 0.534699075

# cluster score for each *cluster*
hdb.clust$cluster_scores

##           1           2           3           4           5           6 
##   0.7039292   4.5059414 109.4420685 196.9262994  45.0862737  47.1334557 
##           7           8           9          10          11 
## 264.7428749  18.3059513 164.6276994 131.8197700 107.8656648

# plot hierarchical tree
plot(hdb.clust)

plot(hdb.clust, show_flat = TRUE)