library(sf)
library(ggplot2)
library(osmdata)
library(readxl)
library(dplyr)
library(rnaturalearth)
library(rnaturalearthdata)
library(viridis)
library(tidyr) # For pivoting
library(ggforce) # For pie charts
library(ggrepel)
library(stringr)
library(scatterpie)
Geospatial plotting of ANI connections between strains from Jakarta, Indonesia
1. Setting up R
2. Loading data
# 1. Read data
= read.csv("../data/metadata_all_edit2.csv")
metadata = read_excel("../data/GPS Data of Environment sampling site_TC.xlsx") locations
3. Wrangle data
# Remove number before locations
= locations %>%
locations mutate(site = sub("^[0-9]+\\.", "", Location))
$Location == "Pasar Sawah Barat", "Longitude"] = 106.93
locations[locations
# create a new column with mutations
= metadata %>%
metadata ::mutate(site = sub("^[0-9]+\\.", "", site))
dplyr
# Merge metadata and coordinates
= metadata %>%
metadata_mlst ::left_join(locations, by = c("site" = "site"))
dplyr
# Choose
= metadata_mlst %>%
metadata_mlst_2 ::filter(sector == "Human" | sector == "Environment" | sector == "Animal")
dplyr
write.csv(metadata_mlst_2, "metadata_combined_with_mlst_human_animal_env.csv")
4. Making the Jakarta Map
# boundaries
= c(xmin = 106.5, ymin = -6.7, xmax = 107.2, ymax = -5.9)
gj_bbox
# Query for city/regency boundaries (admin level 5)
= opq(bbox = gj_bbox) %>%
greater_jakarta_admin add_osm_feature(key = "admin_level", value = "5") %>%
osmdata_sf()
= greater_jakarta_admin$osm_multipolygons
greater_jakarta_map
#plot(st_geometry(greater_jakarta_map))
unique(greater_jakarta_map$name)
[1] "Jakarta Selatan" "Jakarta Timur" "Kepulauan Seribu"
[4] "Jakarta Pusat" "Jakarta Barat" "Jakarta Utara"
[7] "Tangerang Selatan" "Tangerang" "Kabupaten Tangerang"
[10] "Bekasi" "Depok" "Bogor"
[13] "Kab Bogor" "Kab Bekasi" "Cianjur"
[16] "Karawang"
unique(greater_jakarta_map$admin_level)
[1] "5"
unique(greater_jakarta_map$type)
[1] "boundary"
= greater_jakarta_map %>%
gj_filtered filter(grepl("Jakarta|Bogor|Bekasi|Depok|Tangerang", name))
= gj_filtered %>%
labels_df st_point_on_surface() %>% # safer than centroid for oddly shaped areas
select(name) %>%
mutate(lon = st_coordinates(.)[,1],
lat = st_coordinates(.)[,2])
Warning: st_point_on_surface assumes attributes are constant over geometries
Warning in st_point_on_surface.sfc(st_geometry(x)): st_point_on_surface may not
give correct results for longitude/latitude data
5. Plotting ANI Connections
5.1 ANI connections with ST38 strains
This example is for ST38 but all the STs analysed in the study were run the same way.
You can use ctrl
+ F
to replace ST38
with any other ST of interest and the code will run the same.
# 1. Read data
= read.csv("../data/ST38_ANI_matrix_indonesia_trycycle.csv")
ani_mat_ST38 = read.csv("../data/metadata_combined_with_mlst_human_animal_env.csv")
coords
= colnames(ani_mat_ST38)
colnames rownames(ani_mat_ST38) = colnames
= ani_mat_ST38 %>% mutate(sample1 = rownames(ani_mat_ST38), .before = 1)
ani_mat_ST38
# 2. Prepare ANI long-format table
= ani_mat_ST38 %>%
ani_long_ST38 pivot_longer(cols = -sample1, names_to = "sample2", values_to = "ANI") %>%
filter(!is.na(ANI))
# 3. Merge coordinates for both ends of each pair
= ani_long_ST38 %>%
ani_pairs_ST38 # Join coords for sample1
::left_join(coords %>%
dplyrselect(sample1 = sample_name, lon1 = Longitude, lat1 = Latitude),
by = "sample1") %>%
# Join coords for sample2
::left_join(coords %>%
dplyrselect(sample2 = sample_name, lon2 = Longitude, lat2 = Latitude),
by = "sample2")
= ani_pairs_ST38 %>% filter(ANI != 100.00)
ani_pairs_ST38 = ani_pairs_ST38 %>% filter(ANI >= 99.5)
ani_pairs_ST38
= ani_pairs_ST38 %>%
ani_pairs_ST38_singles rowwise() %>%
mutate(
pair_min = min(c_across(c(sample1, sample2))),
pair_max = max(c_across(c(sample1, sample2)))
%>%
) ungroup() %>%
distinct(pair_min, pair_max, .keep_all = TRUE)
= ani_pairs_ST38_singles %>% filter(!(lon1 == lon2 & lat1 == lat2))
ani_pairs_ST38_singles
# 1) extract same‑site pairs, keep only the one with max ANI at each location
= ani_pairs_ST38_singles %>%
ani_pairs_ST38_best # 1) define an "unordered" site‐pair via min/max coords
mutate(
lon_min = pmin(lon1, lon2),
lon_max = pmax(lon1, lon2),
lat_min = pmin(lat1, lat2),
lat_max = pmax(lat1, lat2)
%>%
) # 2) group by that unordered pair
group_by(lon_min, lat_min, lon_max, lat_max) %>%
# 3) pick the single row with the highest ANI in each group
slice_max(order_by = ANI, n = 1, with_ties = FALSE) %>%
ungroup() %>%
# 4) drop the helper columns
select(-lon_min, -lon_max, -lat_min, -lat_max)
ani_pairs_ST38
# A tibble: 54 × 7
sample1 sample2 ANI lon1 lat1 lon2 lat2
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 X211_S1 X222_S11 99.9 NA NA NA NA
2 X211_S1 A1_S1 100. NA NA 107. -6.20
3 X211_S1 E113 100. NA NA 107. -6.11
4 X211_S1 E117 100. NA NA 107. -6.37
5 X211_S1 E79 99.7 NA NA 107. -6.28
6 X211_S1 H200 100. NA NA 107. -6.20
7 X222_S11 X211_S1 99.9 NA NA NA NA
8 X222_S11 A1_S1 100. NA NA 107. -6.20
9 X222_S11 E113 100. NA NA 107. -6.11
10 X222_S11 E117 100. NA NA 107. -6.37
# ℹ 44 more rows
ani_pairs_ST38_singles
# A tibble: 14 × 9
sample1 sample2 ANI lon1 lat1 lon2 lat2 pair_min pair_max
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 A1_S1 E113 100. 107. -6.20 107. -6.11 A1_S1 E113
2 A1_S1 E117 100. 107. -6.20 107. -6.37 A1_S1 E117
3 A1_S1 E79 99.7 107. -6.20 107. -6.28 A1_S1 E79
4 B4_S10 E136 99.9 107. -6.12 107. -6.28 B4_S10 E136
5 B4_S10 E177 99.9 107. -6.12 107. -6.12 B4_S10 E177
6 D5_S23 E136 99.8 107. -6.12 107. -6.28 D5_S23 E136
7 D5_S23 E177 99.9 107. -6.12 107. -6.12 D5_S23 E177
8 E113 E117 100. 107. -6.11 107. -6.37 E113 E117
9 E113 E79 99.7 107. -6.11 107. -6.28 E113 E79
10 E113 H200 100. 107. -6.11 107. -6.20 E113 H200
11 E117 E79 99.7 107. -6.37 107. -6.28 E117 E79
12 E117 H200 100. 107. -6.37 107. -6.20 E117 H200
13 E136 E177 100. 107. -6.28 107. -6.12 E136 E177
14 E79 H200 99.7 107. -6.28 107. -6.20 E79 H200
= 155
ST155 = 410
ST410 = 131
ST131 = 38
ST38 = 10
ST10 = 58
ST58
= coords %>% filter(ST == ST38)
coords_ST38
# Jakarta with ANI connections
= unique(coords_ST38[, c("sector", "sector_colour")])
sector_key_ST38
# Jakarta with ANI connections
# 4. Build the map
= ggplot() +
p_ST38 # Basemap
# gj_filtered has already been created above
geom_sf(data = gj_filtered, fill = "white", color = "grey80") +
geom_text(data = labels_df,
aes(x = lon, y = lat, label = name),
size = 3, fontface = "bold", color = "gray50") +
# Connections: curved segments colored by ANI
geom_curve(data = ani_pairs_ST38_best,
aes(x = lon1, y = lat1, xend = lon2, yend = lat2, color = ANI),
curvature = 0.2, size = 0.7, alpha = 0.8) +
scale_color_gradientn(
colors = c("#3B4DC0", "#6281EA", "#8DB0FE", "#B7CFF9", "#DCDDDD", "#F4C5AD", "#F4997B", "#DD5F4C", "#B40426"),
limits = c(99.50, 100.00),
name = "ANI (%)"
+
) # Sample points
geom_point(data = coords_ST38,
aes(x = Longitude, y = Latitude,
fill = sector_colour),
shape = 21, # circle with fill + border
color = "black", # border colour
size = 3) +
scale_fill_identity(
name = "Sector", # legend title
breaks = sector_key_ST38$sector_colour, # the hex‐codes to show
labels = sector_key_ST38$sector, # the text you want
guide = "legend"
+
) # Sample labels
geom_text_repel(data = coords_ST38,
aes(x = Longitude, y = Latitude, label = sample_name),
size = 3, bg.color = "white", max.overlaps = 20) +
coord_sf(
xlim = c(106.7, 107.1),
ylim = c(-6.4, -6.05),
expand = FALSE
+
) theme_minimal() +
theme(
panel.background = element_rect(fill = "#cceeff", color = NA)) +
theme(legend.position = "right",
panel.border = element_rect(colour = "black", fill=NA, linewidth=1)) +
labs(title = "ST38",
subtitle = " ",
x = "Longitude", y = "Latitude")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
# 5. Print the map
print(p_ST38)
ggsave("../imgs/Geospatial_ANI_connections_between_ST38_threshold=99.5_singles.svg", p_ST38, height = 8, width = 8)
ggsave("../imgs/Geospatial_ANI_connections_between_ST38_threshold=99.5_singles.png", p_ST38, height = 8, width = 8)