-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFEWSNET_processing.Rmd
More file actions
242 lines (199 loc) · 7.27 KB
/
FEWSNET_processing.Rmd
File metadata and controls
242 lines (199 loc) · 7.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
---
title: "FEWSNET_processing"
author: "Ferdinand Seyffer"
date: "2025-10-07"
output:
html_document:
code_folding: hide
toc: true
number_sections: true
toc_float:
collapsed: true
theme: cerulean
editor_options:
chunk_output_type: console
markdown:
wrap: sentence
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Setup
Load necessary packages, if not installed install with "install.packages()
```{r message=F, warning=F}
library(sf)
library(dplyr)
library(stringr)
library(ggplot2)
library(purrr)
library(tidyr)
library(scales)
```
Setup data
```{r message=F, warning=F}
# Turn off s2 fo planar instead of spherical geodata handling
sf_use_s2(FALSE)
# Load Somalia districts
districts <- st_read("Admin/som_adm_ocha_20250108_ab_shp/som_admbnda_adm2_ocha_20250108.shp")
# List and load FEWSNET shapefiles
foodsec_files <- list.files(path = "FI_data/FEWSNET_EA_all/", pattern = "\\.shp$", full.names = TRUE)
```
## Preprocessing
Loop through FEWSNET files, intersect with ADM2
```{r message=F, warning=F}
intersections <- list()
for (fp in foodsec_files) {
food_sec <- st_read(fp, quiet = TRUE) # load food security file
# Reproject to CRS of districts file
if (is.na(st_crs(food_sec))) st_crs(food_sec) <- 4326
food_sec <- st_transform(food_sec, 4326)
# Fix geometries because of common invalid geometries with FEWSNET data
food_sec <- st_make_valid(food_sec)
# Intersect FS file with districts, keeping only column "CS" from FS layer
intersection <- st_intersection(districts, food_sec["CS"])
# extract base name (without .shp) for both naming + date
basename <- tools::file_path_sans_ext(basename(fp))
# add date column based on basename of FS files
intersection <- intersection %>%
mutate(
source = basename,
date = str_extract(basename, "\\d{6}") %>%
paste0("01") %>%
as.Date("%Y%m%d")
)
# Add each file to intersections list
intersections[[basename]] <- intersection
}
```
Convert distinct intersections to one dataframe
```{r message=F, warning=F}
all_intersections <- map_dfr(intersections, \(x) st_drop_geometry(x))|> # drop geometries and combine to dataframe
distinct(ADM2_EN, CS, date, .keep_all = TRUE) |> # keep just rows where ADM2_EN, CS and date are unique
# Add TRUE/FALSE columns for different conditions
mutate(
cs3 = CS == 3, # CS = 3
cs4 = CS == 4, # CS = 4
cs5 = CS == 5, # CS = 5
cs_gr2 = CS >= 3, # CS over 2
cs_gr3 = CS >= 4 # CS over 3
)
```
Convert distinct intersections to one dataframe
```{r message=F, warning=F}
clean_intersections <- all_intersections |>
# drop invalid/missing values
filter(CS != 99) |>
# keep only selected columns
select(ADM2_EN, date, ADM2_PCODE, CS, cs3, cs4, cs5, cs_gr2, cs_gr3)
```
Group by districts and calculate different basic stats
```{r message=F, warning=F}
fewsnet_stats <- clean_intersections |>
group_by(ADM2_PCODE, ADM2_EN) |> # group by districts
# Summarise per districts: times in specific cs, times in cs >2/ >3
summarise(
sum_cs3 = sum(cs3, na.rm = TRUE),
sum_cs4 = sum(cs4, na.rm = TRUE),
sum_cs5 = sum(cs5, na.rm = TRUE),
sum_cs_gr2 = sum(cs_gr2, na.rm = TRUE),
sum_cs_gr3 = sum(cs_gr3, na.rm = TRUE),
mean_CS = mean(CS, na.rm = TRUE),
.groups = "drop"
)
```
Join ipc stats back to district geometries (e.g. for import in QGIS)
```{r message=F, warning=F}
# Join data back to geometries
fewsnet_stats_sf <- districts |>
select(ADM2_PCODE, geometry) |> # keep only join key + geometry
left_join(fewsnet_stats, by = "ADM2_PCODE") |>
st_as_sf()
# Write file
st_write(fewsnet_stats_sf,
"FEWSNET_adm2_stats.gpkg",
delete_dsn = TRUE)
```
## Data Analysis
Timeseries plots for IPC for four selected districts
```{r message=F, warning=F}
# Enter districts of interest here
sel_districts <- c("Luuq", "Xudur", "Xarardheere", "Hobyo")
# Summarise per district & month
district_summary <- clean_intersections %>%
filter(ADM2_EN %in% sel_districts) %>%
group_by(ADM2_EN, date) %>%
summarise(
mean_CS = mean(CS, na.rm = TRUE),
max_CS = max(CS, na.rm = TRUE),
.groups = "drop"
)
# Calculate overall CS mean per district (for dashed mean line)
overall_means <- district_summary %>%
group_by(ADM2_EN) %>%
summarise(overall_mean = mean(mean_CS, na.rm = TRUE))
# Plot
ggplot(district_summary, aes(x = date, group = ADM2_EN)) +
geom_line(aes(y = mean_CS, color = "Mean CS"), linewidth = 1) +
geom_line(aes(y = max_CS, color = "Max CS"), linewidth = 1, linetype = "dashed") +
geom_hline(data = overall_means, aes(yintercept = overall_mean),
linetype = "dashed", color = "black") +
facet_wrap(~ ADM2_EN, ncol = 2) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_color_manual(values = c("Mean CS" = "steelblue", "Max CS" = "red3")) +
labs(x = "Date", y = "CS", color = "Statistic") +
theme_minimal()
```
Barcharts for times in IPC ≥3/4 & mean IPC for all districts
```{r message=F, warning=F}
# --- CS ≥ 3 ---
ggplot(fewsnet_stats, aes(x = reorder(ADM2_EN, sum_cs_gr2))) +
geom_col(aes(y = sum_cs_gr2), fill = "red3") +
geom_line(aes(y = rescale(mean_CS, to = c(0, max(sum_cs_gr2)), from = c(2, 3)), group = 1),
color = "black") +
geom_point(aes(y = rescale(mean_CS, to = c(0, max(sum_cs_gr2)), from = c(2, 3))),
color = "black") +
scale_y_continuous(
name = "Count of CS ≥ 3",
sec.axis = sec_axis(~rescale(., from = c(0, max(fewsnet_stats$sum_cs_gr2)), to = c(2, 3)),
name = "Mean CS")
) +
coord_flip() +
labs(title = "Districts with CS ≥ 3") +
theme_minimal()
# --- CS ≥ 4 ---
ggplot(fewsnet_stats, aes(x = reorder(ADM2_EN, sum_cs_gr3))) +
geom_col(aes(y = sum_cs_gr3), fill = "steelblue") +
geom_line(aes(y = rescale(mean_CS, to = c(0, max(sum_cs_gr3)), from = c(2, 3)), group = 1),
color = "black") +
geom_point(aes(y = rescale(mean_CS, to = c(0, max(sum_cs_gr3)), from = c(2, 3))),
color = "black") +
scale_y_continuous(
name = "Count of CS ≥ 4",
sec.axis = sec_axis(~rescale(., from = c(0, max(fewsnet_stats$sum_cs_gr3)), to = c(2, 3)),
name = "Mean CS")
) +
coord_flip() +
labs(title = "Districts with CS ≥ 4") +
theme_minimal()
```
Histogramms for districts in IPC ≥3/4 over whole time period
```{r message=F, warning=F}
# group by date to get district nr. per period
counts <- clean_intersections %>%
group_by(date) %>%
summarise(
n_cs3plus = sum(cs_gr2, na.rm = TRUE),
n_cs4plus = sum(cs_gr3, na.rm = TRUE),
.groups = "drop"
)
# plot for CS ≥3
ggplot(counts, aes(x = date, y = n_cs3plus)) +
geom_col(fill = "red3") +
labs(title = "Districts in CS ≥ 3", x = "Date", y = "Count") +
theme_minimal()
# plot for CS ≥4
ggplot(counts, aes(x = date, y = n_cs4plus)) +
geom_col(fill = "steelblue") +
labs(title = "Districts in CS ≥ 4", x = "Date", y = "Count") +
theme_minimal()
```