Install any packages you might be missing:
#library(sp)
library(spatstat)
library(splancs)
library(dplyr)
library(dbscan)
library(leaflet)
library(proj4)
-Set working directory where data are found -Read in data
data.dir = "/Users/abigailhorn/Dropbox/0 2019/Classes/PM569 Spatial Statistics/Spatial10_my_lecture/Data/"
food_outlets_la = read.csv(paste0(data.dir, "df_food_places_edit.csv")) %>%
filter(city == "los angeles")
summary(food_outlets_la)
## checkinsCount city latitude longitude
## Min. : 1.0 los angeles:10241 Min. :33.71 Min. :-118.7
## 1st Qu.: 93.0 akron : 0 1st Qu.:34.02 1st Qu.:-118.4
## Median : 257.0 albany : 0 Median :34.06 Median :-118.3
## Mean : 642.5 albuquerque: 0 Mean :34.07 Mean :-118.4
## 3rd Qu.: 679.0 alexandria : 0 3rd Qu.:34.15 3rd Qu.:-118.3
## Max. :22044.0 allentown : 0 Max. :34.32 Max. :-118.1
## (Other) : 0
## shop_name type
## Subway : 281 Mexican : 981
## Starbucks : 265 Fast Food : 646
## McDonald's : 162 Coffee Shop: 624
## Jack in the Box: 105 American : 592
## Burger King : 81 Pizza : 591
## Taco Bell : 79 Sandwiches : 558
## (Other) :9268 (Other) :6249
# Map the points, coloring according to use (# checkinsCount)
checkins.pal = colorNumeric(c('blue','purple','pink','yellow'),
domain=food_outlets_la$checkinsCount)
food_outlets_la %>%
arrange(checkinsCount) %>% # to show higher checkins more prominently
leaflet() %>%
addProviderTiles('CartoDB.Positron') %>%
setView(-118.3260,33.99938, zoom = 10) %>%
addCircles(~longitude, ~latitude, label=food_outlets_la$shop_name,
weight = 1, radius=20,
color=~checkins.pal(food_outlets_la$checkinsCount)) %>%
addLegend("bottomleft", pal=checkins.pal, values=food_outlets_la$checkinsCount,
title='Checkins', opacity=1)
# Log plot is better to see checkins
checkins.pal = colorNumeric(c('blue','purple','pink','yellow'),
domain=log(food_outlets_la$checkinsCount))
food_outlets_la %>%
arrange(checkinsCount) %>% # to show higher checkins more prominently
leaflet() %>%
addProviderTiles('CartoDB.DarkMatter') %>%
setView(-118.3260,33.99938, zoom = 10) %>%
addCircles(~longitude, ~latitude, label=food_outlets_la$shop_name,
weight = 1, radius=20,
color=~checkins.pal(log(food_outlets_la$checkinsCount))) %>%
addLegend("bottomleft", pal=checkins.pal, values=log(food_outlets_la$checkinsCount),
title='Log checkins', opacity=1)
# Filter to look only at specific types of places
food_outlets_la %>%
filter(food_outlets_la$type == "Korean") %>%
arrange(checkinsCount) %>% # to show higher checkins more prominently
leaflet() %>%
addProviderTiles('CartoDB.Positron') %>%
setView(-118.3260,33.99938, zoom = 10) %>%
addCircles(~longitude, ~latitude, weight = 1, radius=20,
color=~checkins.pal(log(food_outlets_la$checkinsCount))) %>%
addLegend("bottomleft", pal=checkins.pal, values=log(food_outlets_la$checkinsCount),
title='Log checkins', opacity=1)
proj4string <- "+proj=utm +zone=11 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs "
newcoords<-project(as.matrix(cbind(food_outlets_la$longitude, food_outlets_la$latitude)), proj=proj4string)
food_outlets_la$x<-newcoords[,1]
food_outlets_la$y<-newcoords[,2]
plot(x = food_outlets_la$x, y = food_outlets_la$y, ylab = "N-S",
xlab = "E-W", main = "Food outlets proj", cex = 0.25)
# Focus on the area of greatest density:
food.dens <- food_outlets_la[(food_outlets_la$y < 3775000 & food_outlets_la$y >
3750000) & (food_outlets_la$x > 360000 & food_outlets_la$x < 385000),]
plot(x = food.dens$x, y = food.dens$y, ylab = "N-S",
xlab = "E-W", main = "Food outlets highest density", cex = 0.25)
# 1) Define the Window: SS: Use convexhull.xy() which traces the outermost
## points of a scatter plot. Give the function the x and y coordinates of
## the points, and it returns an owin object – which is necessary for
## making a point pattern object.
food.dens.chull <- convexhull.xy(x = food.dens$x, y = food.dens$y)
# 2) Create a ppp object using x and y points from above, window just defined
food.ppp <- ppp(x = food.dens$x, y = food.dens$y, window = food.dens.chull)
## Warning: data contain duplicated points
##Assign names to units
unitname(food.ppp) <- c("meter", "meters")
# To change the window of an existing ppp object use:
## food.ppp[newWINDOWobject]
## food.ppp <- food.ppp[food.win]
# Visualize ppp object:
p1 <- plot.ppp(food.ppp, main = "Food Outlets as ppp object")
# Create points
food.ppp.pts<-as.points(food.ppp$x,food.ppp$y)
Tests for CSR use this basic approach: compare our empirical (sampled) data to a theoretical (modeled) ideal and see if the two are significantly different. In our case, the theoretical model is a CSR processes.
# Evaluate the sample data based on the "x" coordinate
ks <- cdf.test(food.ppp, "x")
## Warning in ks.test(U, "punif", ...): ties should not be present for the
## Kolmogorov-Smirnov test
plot(ks)
pval <- ks$p.value
pval
## [1] 2.198242e-14
# To evaluate the overall spatial randomness, input a density function (see below) as a covariate:
ds <- density(food.ppp)
ks <- cdf.test(food.ppp, ds)
plot(ks)
pval <- ks$p.value
pval
## [1] 0
## In both cases, the low (or 0) p-values suggest that the food outlet data is not drawn from a population distributed according to the assumptions of CSR. We can also see this in our plots - the observed and expected (if our data were CSR) cumulative distributions are pretty different.
# G test: Nearest neighbor distribution test
Gtest<-Gest(food.ppp)
plot(envelope(food.ppp,Gest))
## Generating 99 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99.
##
## Done.
## For all values of r (distances between points) our empirical function is greater than the theoretical one. This suggests that nearest neighbor distances in the point pattern are shorter than for a Poisson process, suggesting a clustered pattern.
# Ripley's K test
K<-Kest(food.ppp)
## number of data points exceeds 3000 - computing border correction estimate only
plot(K)
plot(K, cbind(r, sqrt(border/pi)) ~ r)
plot(K, border - theo ~ r)
## takes too long
#plot(envelope(food.ppp,Kest))
Q <- quadratcount(food.ppp, nx = 20, ny = 20)
plot(food.ppp.pts, cex = 0.5, pch="+", main="Food Outlets in Quadrats", xlab="X",ylab="Y")
plot(Q, add = TRUE, cex = .5)
hist(Q, 100, main = "Distribution of food outlets in quadrats")
mean(Q)
## [1] 15.57101
# Use density to plot kernel density
## The density function creates a kernel density surface which is stored in R as spatstat class im or image. This is how spatstat handles rasters.
ds <- density(food.ppp)
class(ds)
## [1] "im"
# Plot the density surface:
plot(ds, main = "Food outlet density")
# Plot a color image of the kernel estimate ds with the point data superimposed.
plot(ds, main = "Food outlet density")
plot(food.ppp, add = TRUE, cols = "white", cex = 0.1, pch = 16)
# Get the maximum and minimum values of the intensity estimate over the study region
range(ds)
## [1] 1.204507e-06 2.878417e-05
summary(ds)
## real-valued pixel image
## 128 x 128 pixel array (ny, nx)
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## dimensions of each pixel: 195 x 193.6689 meters
## Image is defined on a subset of the rectangular grid
## Subset area = 480298384.956008 square meters
## Subset area fraction = 0.777
## Pixel values (inside window):
## range = [1.204507e-06, 2.878417e-05]
## integral = 5266.945
## mean = 1.096599e-05
plot(dirichlet(food.ppp), main="Dirichlet Tesselation of Food Outlets")
## Warning: 4 duplicated points were removed
##
## PLEASE NOTE: The components "delsgs" and "summary" of the
## object returned by deldir() are now DATA FRAMES rather than
## matrices (as they were prior to release 0.0-18).
## See help("deldir").
##
## PLEASE NOTE: The process that deldir() uses for determining
## duplicated points has changed from that used in version
## 0.0-9 of this package (and previously). See help("deldir").
# Fit a model with dependence on x
food.fit1<-ppm(food.ppp,~log(x)+log(y))
summary(food.fit1)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = food.ppp, trend = ~log(x) + log(y))
## Edge correction: "border"
## [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
##
## Data pattern:
## Planar point pattern: 5263 points
## Average intensity 1.1e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
##
## Dummy quadrature points:
## 150 x 150 grid of dummy points, plus 4 corner points
## dummy spacing: 166.1640 x 165.2641 meters
##
## Original dummy parameters: =
## Planar point pattern: 17491 points
## Average intensity 3.64e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
## Quadrature weights:
## (counting weights based on 150 x 150 array of rectangular tiles)
## All weights:
## range: [947, 27500] total: 4.79e+08
## Weights on data points:
## range: [947, 13700] total: 36900000
## Weights on dummy points:
## range: [947, 27500] total: 4.42e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
## ---- Intensity: ----
##
## Log intensity: ~log(x) + log(y)
##
## Fitted trend coefficients:
## (Intercept) log(x) log(y)
## -6819.213321 -3.648892 452.709025
##
## Estimate S.E. CI95.lo CI95.hi Ztest
## (Intercept) -6819.213321 147.6230117 -7108.549107 -6529.877535 ***
## log(x) -3.648892 0.8111888 -5.238793 -2.058992 ***
## log(y) 452.709025 9.7836776 433.533370 471.884681 ***
## Zval
## (Intercept) -46.193430
## log(x) -4.498204
## log(y) 46.271867
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) log(x) log(y)
## -6819.213321 -3.648892 452.709025
##
## Fitted exp(theta):
## (Intercept) log(x) log(y)
## 0.000000e+00 2.601993e-02 4.064729e+196
coef(food.fit1)
## (Intercept) log(x) log(y)
## -6819.213321 -3.648892 452.709025
plot(food.fit1) # this plots the density
AIC(food.fit1)
## [1] 128162.2
# Plot the fitted model intensity (using plot(predict(fit)))
#par(mar=c(0,3))
plot(predict(food.fit1), main = "density of prediction")
plot(density(food.ppp), main = "density of data")
plot(predict(food.fit1, se=TRUE)$se, main = "SE")
# Generate 10 simulated realisations of the fitted model, and plot them, using plot(simulate(fit, nsim=10)) where fit is the fitted model.
par(mar=rep(0.5,4))
plot(simulate(food.fit1, nsim=10), main = "")
## Generating 10 simulated patterns ...1, 2, 3, 4, 5, 6, 7, 8, 9, 10.
# Generate simulation envelopes of the G function from this fitted model. Do they suggest the model is plausible?
plot(envelope(food.fit1, Gest, nsim = 39, global = TRUE))
## Generating 78 simulated realisations of fitted Poisson model (39 to
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78.
##
## Done.
# Fit Poisson Cluster Process by minimum contrast
pcp.mod1<-kppm(food.ppp,method="mincon")
summary(pcp.mod1)
## Stationary cluster point process model
## Fitted to point pattern dataset 'food.ppp'
## Fitted by minimum contrast
## Summary statistic: K-function
## Minimum contrast fit (object of class "minconfit")
## Model: Thomas process
## Fitted by matching theoretical K function to food.ppp
##
## Internal parameters fitted by minimum contrast ($par):
## kappa sigma2
## 3.175541e-08 1.511772e+06
##
## Fitted cluster parameters:
## kappa scale
## 3.175541e-08 1.229541e+03
## Mean cluster size: 346.1318 points
##
## Converged successfully after 185 function evaluations
##
## Starting values of parameters:
## kappa sigma2
## 1.096176e-05 1.460337e+04
## Domain of integration: [ 0 , 5389 ]
## Exponents: p= 2, q= 0.25
##
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates,
## covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE,
## nd = nd, eps = eps)
## Edge correction: "border"
## [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
##
## Data pattern:
## Planar point pattern: 5263 points
## Average intensity 1.1e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
##
## Dummy quadrature points:
## 150 x 150 grid of dummy points, plus 4 corner points
## dummy spacing: 166.1640 x 165.2641 meters
##
## Original dummy parameters: =
## Planar point pattern: 17491 points
## Average intensity 3.64e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
## Quadrature weights:
## (counting weights based on 150 x 150 array of rectangular tiles)
## All weights:
## range: [947, 27500] total: 4.79e+08
## Weights on data points:
## range: [947, 13700] total: 36900000
## Weights on dummy points:
## range: [947, 27500] total: 4.42e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
##
## Stationary Poisson process
##
## ---- Intensity: ----
##
##
## Uniform intensity:
## [1] 1.099156e-05
##
## Estimate S.E. CI95.lo CI95.hi Ztest Zval
## (Intercept) -11.41838 0.01378426 -11.4454 -11.39137 *** -828.3641
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept)
## -11.41838
##
## Fitted exp(theta):
## (Intercept)
## 1.099156e-05
##
## ----------- CLUSTER MODEL -----------
## Model: Thomas process
##
## Fitted cluster parameters:
## kappa scale
## 3.175541e-08 1.229541e+03
## Mean cluster size: 346.1318 points
##
## Final standard error and CI
## (allowing for correlation of cluster process):
## Estimate S.E. CI95.lo CI95.hi Ztest Zval
## (Intercept) -11.41838 0.2400639 -11.8889 -10.94787 *** -47.56394
parameters(pcp.mod1)
## $trend
## [1] 1.099156e-05
##
## $kappa
## [1] 3.175541e-08
##
## $scale
## [1] 1229.541
##
## $mu
## [1] 346.1318
plot(pcp.mod1)
plot(simulate(pcp.mod1, drop = TRUE), main = "")
plot(envelope(pcp.mod1, Gest, nsim = 39, global = TRUE))
## Generating 78 simulated realisations of fitted cluster model (39 to
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78.
##
## Done.
# Fit Log Gaussian Cox process to simulated data
lgcp.mod1<-kppm(food.ppp,clusters="LGCP",method="mincon")
summary(lgcp.mod1)
## Stationary Cox point process model
## Fitted to point pattern dataset 'food.ppp'
## Fitted by minimum contrast
## Summary statistic: K-function
## Minimum contrast fit (object of class "minconfit")
## Model: Log-Gaussian Cox process
## Covariance model: exponential
## Fitted by matching theoretical K function to food.ppp
##
## Internal parameters fitted by minimum contrast ($par):
## sigma2 alpha
## 1.443587 1953.699010
##
## Fitted covariance parameters:
## var scale
## 1.443587 1953.699010
## Fitted mean of log of random intensity: -12.14018
##
## Converged successfully after 117 function evaluations
##
## Starting values of parameters:
## sigma2 alpha
## 1.0000 120.8444
## Domain of integration: [ 0 , 5389 ]
## Exponents: p= 2, q= 0.25
##
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates,
## covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE,
## nd = nd, eps = eps)
## Edge correction: "border"
## [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
##
## Data pattern:
## Planar point pattern: 5263 points
## Average intensity 1.1e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
##
## Dummy quadrature points:
## 150 x 150 grid of dummy points, plus 4 corner points
## dummy spacing: 166.1640 x 165.2641 meters
##
## Original dummy parameters: =
## Planar point pattern: 17491 points
## Average intensity 3.64e-05 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774990] meters
## Window area = 480124000 square meters
## Unit of length: 1 meter
## Fraction of frame area: 0.777
## Quadrature weights:
## (counting weights based on 150 x 150 array of rectangular tiles)
## All weights:
## range: [947, 27500] total: 4.79e+08
## Weights on data points:
## range: [947, 13700] total: 36900000
## Weights on dummy points:
## range: [947, 27500] total: 4.42e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
##
## Stationary Poisson process
##
## ---- Intensity: ----
##
##
## Uniform intensity:
## [1] 1.099156e-05
##
## Estimate S.E. CI95.lo CI95.hi Ztest Zval
## (Intercept) -11.41838 0.01378426 -11.4454 -11.39137 *** -828.3641
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept)
## -11.41838
##
## Fitted exp(theta):
## (Intercept)
## 1.099156e-05
##
## ----------- COX MODEL -----------
## Model: log-Gaussian Cox process
##
## Covariance model: exponential
## Fitted covariance parameters:
## var scale
## 1.443587 1953.699010
## Fitted mean of log of random intensity: -12.14018
##
## Final standard error and CI
## (allowing for correlation of Cox process):
## Estimate S.E. CI95.lo CI95.hi Ztest Zval
## (Intercept) -11.41838 0.2615944 -11.9311 -10.90567 *** -43.64919
plot(lgcp.mod1) # is this on residuals?
plot(envelope(lgcp.mod1, Gest, nsim = 39, global = TRUE))
## Generating 78 simulated realisations of fitted cluster model (39 to
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78.
##
## Done.
# Plot 11 simulations of the fit process
XX <- simulate(lgcp.mod1, nsim = 11)
## Generating 11 simulations... 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11.
## Done.
XX[[12]] <- food.ppp
plot(XX, main = "", main.panel = "")
# Filter to the 6 categories above
food_outlets_la_filt <-
food_outlets_la %>%
filter(food_outlets_la$type == "Fast Food" | type == "Burgers" | type == "American" | type == "Coffee Shop" | type == "Korean" | type =="Mexican")
# Print out and read back in filtered data --> hack to force only these 6 categories
write.csv(food_outlets_la_filt, file = paste0(data.dir, "food_outlets_la_filt.csv"))
food.outlets.filt = read.csv(paste0(data.dir, "food_outlets_la_filt.csv"))
# Focus on the area of greatest density again
food.dens.filt <- food.outlets.filt[(food.outlets.filt$y < 3775000 & food.outlets.filt$y >
3750000) & (food.outlets.filt$x > 360000 & food.outlets.filt$x < 385000),]
plot(x = food.dens.filt$x, y = food.dens.filt$y, ylab = "N-S",
xlab = "E-W", main = "Food outlets highest density (type filtered)", cex = 0.25)
food.dens.chull <- convexhull.xy(x = food.dens.filt$x, y = food.dens.filt$y)
# Make the point pattern
food.ppp.marks <- ppp(x = food.dens.filt$x, y = food.dens.filt$y, window = food.dens.chull, marks = food.dens.filt$type)
# Plot the point pattern with all marks
p1 <- plot.ppp(food.ppp.marks, which.marks = "type", main = "Food Outlets as ppp object") #which.marks isn't necessary here
# Plot each outlet type
plot(split(food.ppp.marks), main = "")
# Plot density of each outlet type
plot(density(split(food.ppp.marks)), main = "")
#To extract one of the patterns:
spp <- split(food.ppp.marks)
Mexican <- spp$Mexican
Korean <- spp$Korean
fastfood <- spp$`Fast Food`
plot(fastfood)
plot(density(fastfood), main = "Fast food outlet density")
plot(fastfood, add = TRUE, cols = "white", cex = 0.25, pch = 16)
plot(dirichlet(fastfood), main="Dirichlet Tesselation of Fast Food Outlets")
# To study the relationship between two or more density plots use pairs() to show how two density surfaces interact. Think of this as a 3D covariance matrix.
pairs(density(spp[c(2,3,5)]))
# calculating the Lest difference in the lung and larynx point patterns
Kdiff<-function(Xpp,r,cr="border"){
k1<-Kest(fastfood,r=r,correction=cr)
k2<-Kest(Mexican,r=r,correction= cr)
res<-data.frame(r=r, D=k1[[cr]]-k2[[cr]])
fv(res, valu="D",fname="D")
}
r<-seq(0,0.15,by=0.001)
envKdif <- envelope(fastfood, Kdiff, r=r, nsim=99, cr="border", nrank=2, savefuns=TRUE)
## Generating 99 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99.
##
## Done.
plot(envKdif, main="Difference between Fast-Food and Mexican Food Outlet Point Processes")
## Error in yp[whole] <- yp[whole] + epsilon :
## NAs are not allowed in subscripted assignments
# Fit a model with dependence on x
fastfood.fit1<-ppm(fastfood,~log(x)+log(y))
summary(fastfood.fit1)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = fastfood, trend = ~log(x) + log(y))
## Edge correction: "border"
## [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
##
## Data pattern:
## Planar point pattern: 264 points
## Average intensity 5.58e-07 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
##
## Dummy quadrature points:
## 64 x 64 grid of dummy points, plus 4 corner points
## dummy spacing: 389.4469 x 387.3027 units
##
## Original dummy parameters: =
## Planar point pattern: 3202 points
## Average intensity 6.77e-06 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
## Quadrature weights:
## (counting weights based on 64 x 64 array of rectangular tiles)
## All weights:
## range: [9670, 151000] total: 4.73e+08
## Weights on data points:
## range: [9670, 75400] total: 17200000
## Weights on dummy points:
## range: [21200, 151000] total: 4.55e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
## ---- Intensity: ----
##
## Log intensity: ~log(x) + log(y)
##
## Fitted trend coefficients:
## (Intercept) log(x) log(y)
## -2506.88918 18.39645 149.02383
##
## Estimate S.E. CI95.lo CI95.hi Ztest Zval
## (Intercept) -2506.88918 552.623842 -3590.01201 -1423.76635 *** -4.536339
## log(x) 18.39645 3.883858 10.78423 26.00867 *** 4.736643
## log(y) 149.02383 36.335004 77.80853 220.23913 *** 4.101385
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) log(x) log(y)
## -2506.88918 18.39645 149.02383
##
## Fitted exp(theta):
## (Intercept) log(x) log(y)
## 0.000000e+00 9.760591e+07 5.250807e+64
coef(fastfood.fit1)
## (Intercept) log(x) log(y)
## -2506.88918 18.39645 149.02383
plot(fastfood.fit1) # this plots the density
AIC(fastfood.fit1)
## [1] 8096.897
# Plot the fitted model intensity (using plot(predict(fit)))
#par(mar=c(0,3))
plot(predict(fastfood.fit1), main = "density of prediction")
plot(density(fastfood), main = "density of data")
plot(predict(fastfood.fit1, se=TRUE)$se, main = "SE")
par(mar=rep(0.5,4))
plot(simulate(fastfood.fit1, nsim=10), main = "")
## Generating 10 simulated patterns ...1, 2, 3, 4, 5, 6, 7, 8, 9, 10.
# Gest on fitted data
plot(envelope(fastfood.fit1, Gest, nsim = 39, global = TRUE))
## Generating 78 simulated realisations of fitted Poisson model (39 to
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78.
##
## Done.
# Fit Poisson Cluster Process by minimum contrast
pcp.mod1<-kppm(fastfood,method="mincon")
summary(pcp.mod1)
## Stationary cluster point process model
## Fitted to point pattern dataset 'fastfood'
## Fitted by minimum contrast
## Summary statistic: K-function
## Minimum contrast fit (object of class "minconfit")
## Model: Thomas process
## Fitted by matching theoretical K function to fastfood
##
## Internal parameters fitted by minimum contrast ($par):
## kappa sigma2
## 1.025697e-07 7.403431e+05
##
## Fitted cluster parameters:
## kappa scale
## 1.025697e-07 8.604319e+02
## Mean cluster size: 5.446121 points
##
## Converged successfully after 129 function evaluations
##
## Starting values of parameters:
## kappa sigma2
## 5.580523e-07 7.863436e+05
## Domain of integration: [ 0 , 6197 ]
## Exponents: p= 2, q= 0.25
##
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates,
## covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE,
## nd = nd, eps = eps)
## Edge correction: "border"
## [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
##
## Data pattern:
## Planar point pattern: 264 points
## Average intensity 5.58e-07 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
##
## Dummy quadrature points:
## 64 x 64 grid of dummy points, plus 4 corner points
## dummy spacing: 389.4469 x 387.3027 units
##
## Original dummy parameters: =
## Planar point pattern: 3202 points
## Average intensity 6.77e-06 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
## Quadrature weights:
## (counting weights based on 64 x 64 array of rectangular tiles)
## All weights:
## range: [9670, 151000] total: 4.73e+08
## Weights on data points:
## range: [9670, 75400] total: 17200000
## Weights on dummy points:
## range: [21200, 151000] total: 4.55e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
##
## Stationary Poisson process
##
## ---- Intensity: ----
##
##
## Uniform intensity:
## [1] 5.586069e-07
##
## Estimate S.E. CI95.lo CI95.hi Ztest Zval
## (Intercept) -14.39782 0.06154575 -14.51845 -14.27719 *** -233.9369
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept)
## -14.39782
##
## Fitted exp(theta):
## (Intercept)
## 5.586069e-07
##
## ----------- CLUSTER MODEL -----------
## Model: Thomas process
##
## Fitted cluster parameters:
## kappa scale
## 1.025697e-07 8.604319e+02
## Mean cluster size: 5.446121 points
##
## Final standard error and CI
## (allowing for correlation of cluster process):
## Estimate S.E. CI95.lo CI95.hi Ztest Zval
## (Intercept) -14.39782 0.1505921 -14.69297 -14.10266 *** -95.60809
parameters(pcp.mod1)
## $trend
## [1] 5.586069e-07
##
## $kappa
## [1] 1.025697e-07
##
## $scale
## [1] 860.4319
##
## $mu
## [1] 5.446121
plot(pcp.mod1)
plot(simulate(pcp.mod1, drop = TRUE), main = "")
plot(envelope(pcp.mod1, Gest, nsim = 39, global = TRUE))
## Generating 78 simulated realisations of fitted cluster model (39 to
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78.
##
## Done.
# Fit Log Gaussian Poisson cluster process to simulated data
lgcp.mod1<-kppm(fastfood,clusters="LGCP",method="mincon")
summary(lgcp.mod1)
## Stationary Cox point process model
## Fitted to point pattern dataset 'fastfood'
## Fitted by minimum contrast
## Summary statistic: K-function
## Minimum contrast fit (object of class "minconfit")
## Model: Log-Gaussian Cox process
## Covariance model: exponential
## Fitted by matching theoretical K function to fastfood
##
## Internal parameters fitted by minimum contrast ($par):
## sigma2 alpha
## 1.24723 1075.38408
##
## Fitted covariance parameters:
## var scale
## 1.24723 1075.38408
## Fitted mean of log of random intensity: -15.02143
##
## Converged successfully after 77 function evaluations
##
## Starting values of parameters:
## sigma2 alpha
## 1.0000 886.7602
## Domain of integration: [ 0 , 6197 ]
## Exponents: p= 2, q= 0.25
##
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates,
## covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE,
## nd = nd, eps = eps)
## Edge correction: "border"
## [border correction distance r = 0 ]
## ---------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
##
## Data pattern:
## Planar point pattern: 264 points
## Average intensity 5.58e-07 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
##
## Dummy quadrature points:
## 64 x 64 grid of dummy points, plus 4 corner points
## dummy spacing: 389.4469 x 387.3027 units
##
## Original dummy parameters: =
## Planar point pattern: 3202 points
## Average intensity 6.77e-06 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 16 vertices
## enclosing rectangle: [360057.9, 384982.5] x [3750201, 3774988] units
## Window area = 473074000 square units
## Fraction of frame area: 0.766
## Quadrature weights:
## (counting weights based on 64 x 64 array of rectangular tiles)
## All weights:
## range: [9670, 151000] total: 4.73e+08
## Weights on data points:
## range: [9670, 75400] total: 17200000
## Weights on dummy points:
## range: [21200, 151000] total: 4.55e+08
## ---------------------------------------------------------------------------
## FITTED MODEL:
##
## Stationary Poisson process
##
## ---- Intensity: ----
##
##
## Uniform intensity:
## [1] 5.586069e-07
##
## Estimate S.E. CI95.lo CI95.hi Ztest Zval
## (Intercept) -14.39782 0.06154575 -14.51845 -14.27719 *** -233.9369
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept)
## -14.39782
##
## Fitted exp(theta):
## (Intercept)
## 5.586069e-07
##
## ----------- COX MODEL -----------
## Model: log-Gaussian Cox process
##
## Covariance model: exponential
## Fitted covariance parameters:
## var scale
## 1.24723 1075.38408
## Fitted mean of log of random intensity: -15.02143
##
## Final standard error and CI
## (allowing for correlation of Cox process):
## Estimate S.E. CI95.lo CI95.hi Ztest Zval
## (Intercept) -14.39782 0.1557738 -14.70313 -14.09251 *** -92.42773
plot(lgcp.mod1) # is this on residuals?
plot(envelope(lgcp.mod1, Gest, nsim = 39, global = TRUE))
## Generating 78 simulated realisations of fitted cluster model (39 to
## estimate the mean and 39 to calculate envelopes) ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78.
##
## Done.
# Plot 11 simulations of the fit process
XX <- simulate(lgcp.mod1, nsim = 11)
## Generating 11 simulations... 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11.
## Done.
XX[[12]] <- fastfood
plot(XX, main = "", main.panel = "")
Our dataset may also include covariates — any data that we treat as explanatory, rather than as part of the ‘response’. Any kind of data may be recruited as an explanatory variable: – Covariate data may be a spatial function Z(u) defined at all spatial locations u, e.g. altitude, soil pH, displayed as a pixel image or a contour plot: – Covariate data may be another spatial pattern such as another point pattern, or a line segment pattern – Typically the covariate pattern would be used to define a surrogate spatial function Z, for example, Z(u) may be the distance from u to the nearest point.
In analysis of a point pattern dataset with covariate data, we typically: – investigate whether the intensity depends on the covariates – allow for covariate effects on intensity before studying interaction between points
We can start answering very important and interesting questions once we bring in this additional data.
freeway.df = read.csv(paste0(data.dir, "Freeway_Exits.csv"))
#schools.df = read.csv(paste0(data.dir, "Public_High_Schools.csv"))
#hospitals.df = read.csv(paste0(data.dir, "Hospitals_and_Medical_Centers.csv"))
freeway.df %>%
leaflet() %>%
addProviderTiles('CartoDB.Positron') %>%
setView(-118.3260,33.99938, zoom = 10) %>%
addCircles(~longitude, ~latitude, label=food_outlets_la$Name,
weight = 1, radius=20)
proj4string <- "+proj=utm +zone=11 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs "
newcoords<-project(as.matrix(cbind(freeway.df$longitude, freeway.df$latitude)), proj=proj4string)
freeway.df$x<-newcoords[,1]
freeway.df$y<-newcoords[,2]
# Map to same area as food outlet data:
freeway.df.dens <- freeway.df[(freeway.df$y < 3775000 & freeway.df$y >
3750000) & (freeway.df$x > 360000 & freeway.df$x < 385000),]
plot(x = freeway.df.dens$x, y = freeway.df.dens$y, ylab = "N-S",
xlab = "E-W", main = "Freeway exits", cex = 0.25)
freeway.dens.chull <- convexhull.xy(x = freeway.df.dens$x, y = freeway.df.dens$y)
freeway.ppp <- ppp(x = freeway.df.dens$x, y = freeway.df.dens$y, window = freeway.dens.chull)
##Assign names to units
unitname(food.ppp) <- c("meter", "meters")
# Visualize ppp object:
p1 <- plot.ppp(freeway.ppp, main = "Freeway exits as ppp object")
# Create points object
freeway.ppp.pts<-as.points(freeway.ppp$x,freeway.ppp$y)
# Compute the distance to the nearest freeway exit for each fast food outlet
cross.food.freeway <- nncross(fastfood,freeway.ppp)
head(cross.food.freeway)
## dist which
## 1 189.1351 57
## 2 666.7974 17
## 3 436.4762 11
## 4 2447.1386 44
## 5 1900.4874 50
## 6 943.1941 20
# Define a covariate for fast food as the distance to the nearest freeway exit
dist <- cross.food.freeway$dist
plot(dist)
# Add the covariate into the fastfood ppp
fastfood$dist<-dist
# Isn't working
# f <- nncross(fastfood,freeway.ppp)
# fit <- ppm(fastfood, ~D, covariates=list(D=f))
# Get a rough understanding of how a covariates relates to a point process:
data(bei) #in spatstat package
slope <- bei.extra$grad
b <- quantile(slope, probs = (0:4)/4)
slope_break <- cut(slope, breaks = b, labels = 1:4)
v <- tess(image = slope_break)
plot(v)
plot(bei, add=T, pch = "+")
#We create an object b that stores sample quantiles corresponding to the given probabilities (probs), in our case, 0%, 25%, 50%, 75%, and 100%, of slope having a certain value. We then use cut() from the raster package to reclassify the raster slope object with values 1 to 4 based on which quantile of data they belong to. The tess() or tessellation function creates a collection of disjoint spatial regions, in this case, each region is one of the four categories in the raster image we have created. We can then count the number of points in each tessellation. If the point pattern process was uniform, we’d expect a near-equal number of points in each slope category (1, 2, 3, 4).
#We can use the quadratcount() function to count the number of points in each category of slope:
qb <- quadratcount(bei, tess=v)
qb
## tile
## 1 2 3 4
## 271 984 1028 1321
#Looks like there are big differences in tree locations. It appears that trees in this dataset prefer steeper slopes (3 and 4).
# Compute a nonparametric estimate of the function ρ
# In other words, estimate, across a gradient of covariate values, the prevalence of points
#Here’s some quick math to explain this. Imagine the intensity of a point process is, in fact, a function of a covariate (i.e. tree density is a function of our slope). Cool! How do we test this empirically? One quick way is to compute a function that tells us how the intensity of points depends on the value of the covariates. At any spatial location u, let λ(u) describe the intensity of the point process and Z(u) be the value of the covariate (slope, in our case). Then what we are interested in is this relationship:
# λ(u)=ρ(Z(u))
#where ρ is the function of interest. The spatstat function estimates the intensity as a function of a covariate using the rhohat() function:
plot(rhohat(bei, slope))
# Compute the predicted intensity based on this estimate of ρ.
prh <- predict(rhohat(bei, slope))
plot(prh, main = "")
plot(bei, add = TRUE, cols = "white", cex = .2, pch = 16)
# Use K-S test to formally test whether the point-pattern intensity depends on a specific covariate.
#In the example above, we’ve collected evidence above that suggests that tropical tree location depends on slope. We can formally test this by applying the K-S test using slope as our covariate (instead of coordinates or density as we did earlier):
ks <- cdf.test(bei, slope)
plot(ks)
# Remembering the plot (and the large cluster)
checkins.pal = colorNumeric(c('blue','purple','pink','yellow'),
domain=log(food_outlets_la$checkinsCount))
# Filter to look only at specific types of places
food_outlets_la %>%
filter(food_outlets_la$type == "Korean") %>%
arrange(checkinsCount) %>% # to show higher checkins more prominently
leaflet() %>%
addProviderTiles('CartoDB.Positron') %>%
setView(-118.3260,33.99938, zoom = 10) %>%
addCircles(~longitude, ~latitude, weight = 1, radius=20,
color=~checkins.pal(log(food_outlets_la$checkinsCount))) %>%
addLegend("bottomleft", pal=checkins.pal, values=log(food_outlets_la$checkinsCount),
title='Log checkins', opacity=1)
korean.pts<-as.points(Korean$x, Korean$y)
#dbscan
db.clust = dbscan(korean.pts/1000, minPts = 5, eps=4) # what is epsilon (units?)
hullplot(korean.pts/1000,db.clust)
hdb.clust = hdbscan(korean.pts/1000, minPts = 5)
#par(mfrow=c(1,2))
plot(hdb.clust)
hullplot(korean.pts/1000,hdb.clust)
## Warning in hullplot(korean.pts/1000, hdb.clust): Not enough colors. Some
## colors will be reused.
# cluster membership assignment for each
hdb.clust$cluster
## [1] 11 0 10 7 3 5 11 5 7 1 7 0 5 2 2 8 0 3 0 5 11 10 0
## [24] 4 8 0 11 5 5 11 0 6 4 0 4 7 10 7 6 4 11 11 0 0 11 0
## [47] 7 10 3 11 7 0 0 7 0 9 1 4 9 5 10 0 5 4 5 7 0 0 4
## [70] 10 5 0 11 3 6 11 10 1 10 6 6 4 0 7 6 4 8 11 8 10 7 7
## [93] 10 9 0 0 9 11 9 0 9 11 0 0 7 0 0 6 9 9 0 11 0 0 11
## [116] 0 5 11 1 8 4 7 0 0 0 2 0 9 3 11 0 5 9 0 9 7 9 10
## [139] 7 0 3 0 0 4 5 4 0 6 0 0 7 0 6 2 0 0 9 4 0 0 1
## [162] 1 5 7 6 10 3 0 8 6 0 0 11 8 0 0 0 0 2 3 0 0 7 0
## [185] 6 10 11 0 7 5 6 0 0 3 11 6 11 4 0 3 0 10 0 6 7 4 6
## [208] 0 11 7 3 10 2 7 0 0 11 0 7
# cluster membership probability of each
hdb.clust$membership_prob
## [1] 0.77762650 0.00000000 0.30301284 0.40874662 0.03624360 0.40639025
## [7] 0.43761574 0.47539925 0.62650684 0.00000000 0.00000000 0.00000000
## [13] 0.10223180 0.08715812 0.30997178 0.12183873 0.00000000 0.64294444
## [19] 0.00000000 0.45891791 0.51404479 0.44620324 0.00000000 0.48470401
## [25] 0.19462002 0.00000000 0.71652865 0.00000000 0.36889827 0.63301869
## [31] 0.00000000 0.33557244 0.18821475 0.00000000 0.32154573 0.79153912
## [37] 0.76013943 0.12692369 0.58456615 0.18821475 0.75097927 0.16667865
## [43] 0.00000000 0.00000000 0.35855668 0.00000000 0.31270755 0.75099619
## [49] 0.51220795 0.71959145 0.40403988 0.00000000 0.00000000 0.34431033
## [55] 0.00000000 0.56255251 0.26341467 0.85924638 0.77726211 0.44675980
## [61] 0.78083454 0.00000000 0.11335110 0.88995330 0.44449831 0.67699088
## [67] 0.00000000 0.00000000 0.04345776 0.07295445 0.30853172 0.00000000
## [73] 0.64905286 0.76851832 0.63113284 0.72871816 0.30713488 0.38259474
## [79] 0.68893861 0.30617395 0.30448185 0.89158004 0.00000000 0.50854829
## [85] 0.42467196 0.00000000 0.19462002 0.73275948 0.29870597 0.00000000
## [91] 0.75135031 0.79153912 0.30301284 0.70855637 0.00000000 0.00000000
## [97] 0.76684260 0.55091697 0.00000000 0.00000000 0.76684260 0.72871816
## [103] 0.00000000 0.00000000 0.58985169 0.00000000 0.00000000 0.43194717
## [109] 0.77416918 0.19701337 0.00000000 0.73169785 0.00000000 0.00000000
## [115] 0.62338205 0.00000000 0.25090014 0.00000000 0.38526305 0.00000000
## [121] 0.85924638 0.54583530 0.00000000 0.00000000 0.00000000 0.00000000
## [127] 0.00000000 0.77726211 0.80470558 0.37352965 0.00000000 0.33880974
## [133] 0.66212635 0.00000000 0.63789577 0.62133852 0.12401551 0.17593821
## [139] 0.57991033 0.00000000 0.70826506 0.00000000 0.00000000 0.84482217
## [145] 0.44675980 0.84946708 0.00000000 0.16569791 0.00000000 0.00000000
## [151] 0.50078138 0.00000000 0.58456615 0.08715812 0.00000000 0.00000000
## [157] 0.55742486 0.84924262 0.00000000 0.00000000 0.33069365 0.35514116
## [163] 0.33743474 0.79716095 0.00000000 0.83801008 0.00000000 0.00000000
## [169] 0.30282867 0.43194717 0.00000000 0.00000000 0.29923108 0.16222972
## [175] 0.00000000 0.00000000 0.00000000 0.00000000 0.08361805 0.50459973
## [181] 0.00000000 0.00000000 0.06637030 0.00000000 0.48554157 0.81535311
## [187] 0.74878650 0.00000000 0.81690519 0.36889827 0.61853249 0.00000000
## [193] 0.00000000 0.70826506 0.75097927 0.61787241 0.81603615 0.39783121
## [199] 0.00000000 0.06903857 0.00000000 0.51588977 0.00000000 0.48554157
## [205] 0.59486590 0.58921518 0.12547817 0.00000000 0.67709633 0.72814059
## [211] 0.32742238 0.32671453 0.06553281 0.76670898 0.00000000 0.00000000
## [217] 0.45374324 0.00000000 0.56406909
# outlier score for each point, *not* complement of membership scor
hdb.clust$outlier_scores
## [1] 0.000000000 0.912984186 0.735078486 0.656933792 0.759813078
## [6] 0.088488539 0.557205365 0.000000000 0.456913606 0.382594739
## [11] 0.797160946 0.830137077 0.397303128 0.000000000 0.000000000
## [16] 0.201406328 0.993000712 0.351692827 0.993239516 0.000000000
## [21] 0.487564435 0.666580038 0.969882511 0.786439834 0.129238307
## [26] 0.841696990 0.121531202 0.458917912 0.142638872 0.321434824
## [31] 0.990542573 0.425870426 0.864438658 0.989701556 0.837797914
## [36] 0.026968235 0.230190723 0.767673167 0.081761115 0.864438658
## [41] 0.000000000 0.701170820 0.927756239 0.989815936 0.611780609
## [46] 0.995090482 0.704872282 0.258457546 0.525450067 0.046960146
## [51] 0.659643240 0.952023159 0.994086628 0.690647784 0.909263634
## [56] 0.490823697 0.161800766 0.218160812 0.000000000 0.021976187
## [61] 0.157500039 0.966541126 0.389744816 0.000000000 0.142638872
## [66] 0.372033041 0.901487927 0.994190120 0.884953645 0.800822199
## [71] 0.217488202 0.828397499 0.290432361 0.000000000 0.000000000
## [76] 0.014897127 0.733502397 0.000000000 0.406397256 0.450197192
## [81] 0.451534782 0.000000000 0.994541494 0.587265544 0.105799827
## [86] 0.889953303 0.129238307 0.000000000 0.000000000 0.815353107
## [91] 0.184237670 0.026968235 0.735078486 0.235742802 0.920001078
## [96] 0.853291190 0.044688712 0.445490657 0.777262106 0.865460974
## [101] 0.044688712 0.014897127 0.865063822 0.907775601 0.505449495
## [106] 0.941457287 0.994350091 0.094347560 0.013695779 0.722613198
## [111] 0.936452661 0.071864545 0.994350091 0.907261007 0.338797484
## [116] 0.993499717 0.277690313 0.750979265 0.000000000 0.298705966
## [121] 0.218160812 0.553379967 0.910652029 0.951439128 0.988449235
## [126] 0.087158122 0.901532818 0.000000000 0.000000000 0.602501964
## [131] 0.949790568 0.181654477 0.340765717 0.840556320 0.384879056
## [136] 0.464326148 0.745728497 0.775930768 0.517152961 0.841701257
## [141] 0.206534259 0.880647236 0.990827198 0.290834929 0.021976187
## [146] 0.268952633 0.956876375 0.542770518 0.915742419 0.864988165
## [151] 0.593686926 0.933061840 0.081761115 0.000000000 0.959496460
## [156] 0.981897969 0.496722991 0.270041061 0.820754495 0.942220080
## [161] 0.077544589 0.042573011 0.183352765 0.000000000 0.618532488
## [166] 0.000000000 0.768518317 0.921924116 0.000000000 0.094347560
## [171] 0.986968030 0.922755467 0.644646434 0.162904137 0.927006052
## [176] 0.836246599 0.825958098 0.903920245 0.003863096 0.532738075
## [181] 0.985946000 0.994190120 0.782741430 0.957335927 0.000000000
## [186] 0.000000000 0.008728703 0.902740390 0.000000000 0.142638872
## [191] 0.000000000 0.928565694 0.838748185 0.206534259 0.000000000
## [196] 0.001727372 0.000000000 0.817249416 0.963424548 0.751352016
## [201] 0.862611633 0.666580038 0.990710895 0.000000000 0.499328608
## [206] 0.732106225 0.563798756 0.941898385 0.172383131 0.253882530
## [211] 0.655829044 0.725752448 0.023141868 0.130532088 0.973598920
## [216] 0.899676857 0.602501964 0.924759000 0.534699075
# cluster score for each *cluster*
hdb.clust$cluster_scores
## 1 2 3 4 5 6
## 0.7039292 4.5059414 109.4420685 196.9262994 45.0862737 47.1334557
## 7 8 9 10 11
## 264.7428749 18.3059513 164.6276994 131.8197700 107.8656648
# plot hierarchical tree
plot(hdb.clust)
plot(hdb.clust, show_flat = TRUE)