pacman::p_load(tidyverse, dbplyr, gt)
theme_set(theme_bw())
theme_update(legend.position = "bottom", plot.title.position = "plot")
# Establish read connect to parfit
source("connect-parfit.R")
## Using stored credentials for authentication.
This is intended to calculate all non-pledge facilitated donations between 2023 and 2024. The result should:
Years where a donor gives more than $140K are excluded because 140K USD is the amount given by the smallest major non-pledge donor surveyed and this group reported no counterfactual influence of GWWC on their donations.
inflation_adjustments <- tbl(con, in_schema("impact_surveys","inflation_data"))
# Create a version of the Giving Report with inflation adjusted amounts
giving_report_inf_adj <- tbl(con, in_schema("reporting", "complete_giving_report")) %>%
# Get donation year
mutate(donation_year = year(donation_date)) %>%
# Join inflation adjustments and apply to USD normalised donations
left_join(inflation_adjustments, join_by(donation_year == year)) %>%
mutate(amount_adj_norm = amount_normalized * cumulative_inflation_to_2024)
total_non_pledge_donations_period <- giving_report_inf_adj %>%
filter(
is.na(pledge_id), # Exclude pledge donations
object_type == "payment", # Exclude reported donations
donation_year %in% 2023:2024, # Filter to include donations in period
sum(amount_adj_norm, na.rm = TRUE) <= 139e3, # Exclude where total donations exceed 140K...
.by = c(person_id, donation_year) # For a given person-year
) %>%
# Get sum of normalised, inflation adjusted donations
summarise(total_non_pledge_period = sum(amount_adj_norm, na.rm = T)) %>%
collect()
# Present in a table
total_non_pledge_donations_period %>%
gt() %>%
fmt_currency()
total_non_pledge_period |
---|
$23,485,123.97 |
This is intended to calculate all pledge (10% and Trial Pledge) donations (reported and facilitated) between 2023 and 2024. The result should:
# Get active 10% pledges that started between 2009 and 2024
active_10pledges <- tbl(con, in_schema("pledges","verified_active_pledge")) %>%
mutate(start_date = lower(period),
pledge_cohort = year(start_date)) %>%
filter(pledge_type == "giving_what_we_can",
between(year(start_date),2009, 2024)
)%>%
select(pledge_id = id, start_date, pledge_cohort)
# Get 10% pledgers who give on average 1M per year
average_1M_pledgers <- giving_report_inf_adj %>%
# Get total donations by pledge id
summarise(total_donations = sum(amount_adj_norm, na.rm = T),
.by = c(pledge_id,person_id)) %>%
# Include active 10 percent pledges only
inner_join(
active_10pledges,
by = "pledge_id"
) %>%
mutate(
# Get Pledge age
pledge_age = as.numeric(today() - start_date) / 365.25,
# Get total pledge donations
average_annual_donation = total_donations / pledge_age
) %>%
# Get cases where average annual donations exceed 1M
filter(average_annual_donation >1e6) %>%
collect()
donations_pledge_in_period <- giving_report_inf_adj %>%
# Only include verified pledges with an inner join
inner_join(tbl(con, in_schema("pledges","verified_pledge")), by = join_by(pledge_id == id)) %>%
filter(
between(year(donation_date), 2023,2024), # Include donations in period
lower(period) <= donation_date, # Only include donations made after the pledge
# Exclude very large pledgers with no GWWC influence
!pledge_id %in% average_1M_pledgers$pledge_id
) %>%
summarise(total_pledge_donations_period = sum(amount_adj_norm)) %>%
collect()
## Warning: Missing values are always removed in SQL aggregation functions.
## Use `na.rm = TRUE` to silence this warning
## This warning is displayed once every 8 hours.
donations_pledge_in_period %>%
gt() %>%
fmt_currency()
total_pledge_donations_period |
---|
$66,943,813.08 |
This is intended to calculate the number of new pledges (10% and Trial Pledges) in 2023 and 2024, calculate the number of pledges that ended in this period and the net change in the number of these pledges. Ultimately, I am interested in the net change in 10% pledges and the number of new trial pledges. Both figures should:
net_change_in_pledges <- tbl(con, in_schema("pledges","verified_pledge")) %>%
mutate(start_year = year(lower(period)),
end_year = year(upper(period)),
end_in_period = end_year %in% 2023:2024,
created_year = year(created_at),
created_in_period = created_year %in% 2023:2024
& start_year <= 2025
) %>%
summarise(
new_pledges_in_period = sum(as.integer(created_in_period)),
pledges_ending_in_period = sum(as.integer(end_in_period)),
.by = pledge_type) %>%
mutate(net_change_in_pledges = new_pledges_in_period - pledges_ending_in_period) %>%
collect()
net_change_in_pledges %>%
gt(caption = "New Trial Pledges and net change in 10% Pledges between 2023 and 2024")
pledge_type | new_pledges_in_period | pledges_ending_in_period | net_change_in_pledges |
---|---|---|---|
giving_what_we_can | 1450 | 103 | 1347 |
try_giving | 1259 | 1325 | -66 |
Here I want to estimate the average lifetime donations (facilitated and reported) for a trial pledge. This figure should:
completed_trial_pledges <- tbl(con, in_schema("pledges","verified_pledge")) %>%
filter(pledge_type == "try_giving",
upper(period) < today(), # Trial Pledges Completed Prior to today
year(lower(period)) >= 2014, # Start year after 2014
sql("lower(period) <= current_date - interval '1 year'") # Pledge must have started at least one year ago
)
trial_pledge_donations <- giving_report_inf_adj %>%
inner_join(completed_trial_pledges, by = join_by(pledge_id == id)) %>%
filter(
lower(period) <= donation_date, # Only include donations made after the pledge
) %>%
# Get total donations by Trial Pledge
summarise(total_donations = sum(amount_adj_norm, na.rm = T), .by = pledge_id) %>%
# Join Trial pledges with $0 donated
right_join(completed_trial_pledges, by = join_by(pledge_id == id)) %>%
mutate(
# Set missing total donations to zero
total_donations = coalesce(total_donations, 0),
start_year = year(lower(period)),
pledge_duration_years = as.numeric(upper(period) - lower(period)) / 365.25,
average_donations_per_year = total_donations / pledge_duration_years,
.keep = "used"
) %>%
collect() %>%
# Pledges started at least one year ago
filter(
average_donations_per_year < 1e6 # Annual donation rate should be less than $1M
)
trial_pledge_donations %>%
mutate(start_year = as.character(start_year)) %>%
# Get average across all Trial pledges
bind_rows(
mutate(trial_pledge_donations, start_year = "All")
) %>%
# Get average_since2020 for conservative estimate
bind_rows(mutate(
filter(trial_pledge_donations, start_year>= 2020),
start_year = "From 2020–2023"
)) %>%
summarise(mean_trial_pledge_donations = mean(total_donations),
num_pledges_completed = n(),
.by = start_year) %>%
arrange(start_year) %>%
gt(caption = "Average donations per completed trial pledge") %>%
fmt_currency(mean_trial_pledge_donations)
start_year | mean_trial_pledge_donations | num_pledges_completed |
---|---|---|
2014 | $1,293.98 | 123 |
2015 | $1,412.64 | 286 |
2016 | $4,967.38 | 500 |
2017 | $1,284.38 | 370 |
2018 | $2,153.41 | 382 |
2019 | $2,192.66 | 332 |
2020 | $2,331.18 | 743 |
2021 | $2,149.08 | 915 |
2022 | $1,787.42 | 867 |
2023 | $1,607.85 | 409 |
2024 | $1,570.93 | 182 |
All | $2,204.80 | 5109 |
From 2020–2023 | $1,987.06 | 3116 |
Estimate the proportion of Trial Pledge takers who go on to take a 10% Pledge. This figure should:
I aim to count the number of people who took a Trial Pledge who later took a 10% Pledge, and to separately count the number of people who took a Trial Pledge who took a 10% Pledge within 1 year of their Trial Pledge ending.
active_10pct_pledges <- tbl(con, in_schema("pledges","verified_active_pledge")) %>%
filter(pledge_type == "giving_what_we_can") %>%
select(person_id, id_10pct = id, period_10pct = period,
created_at_10pct = created_at)
trial_pledge_conversion <- tbl(con, in_schema("pledges","verified_pledge")) %>%
filter(pledge_type == "try_giving",
year(upper(period)) < 2025, # Trial Pledges Completed prior to 2025
between(year(lower(period)), 2014, 2023), # Start year after 2014
) %>%
mutate(trial_end_date = upper(period)) %>%
# Join 10% pledges by person_id
left_join(active_10pct_pledges, by = "person_id") %>%
# Get conversion time in years
mutate(
conversion_time_years = (lower(period_10pct) - trial_end_date)/365.25,
start_year = year(lower(period))
) %>%
collect()
trial_pledge_conversion %>%
summarise(
# Number of trial pledges
trial_pledges = n(),
# Unique persons who took GWWC pledge after completion of a trial pledge,
# Taking unique persons prevents double counting those with multiple trial pledges
conversions = n_distinct(person_id[!is.na(conversion_time_years)]),
conversions_in_year = n_distinct(person_id[conversion_time_years <= 1]),
.by = start_year
) %>%
# Add summary row on the bottom
bind_rows(summarise(., start_year = 9999, across(where(is.numeric), sum))) %>%
arrange(start_year) %>%
# Get conversion rates
mutate(
conversion_rate_ever = conversions / trial_pledges,
conversion_rate_within_year = conversions_in_year / trial_pledges
) %>%
# Format in table
gt(caption = "Trial pledge conversion by year of Trial Pledge start") %>%
fmt_percent(contains("rate")) %>%
text_replace("9999", "Total")
start_year | trial_pledges | conversions | conversions_in_year | conversion_rate_ever | conversion_rate_within_year |
---|---|---|---|---|---|
2014 | 123 | 9 | 4 | 7.32% | 3.25% |
2015 | 286 | 23 | 14 | 8.04% | 4.90% |
2016 | 500 | 48 | 30 | 9.60% | 6.00% |
2017 | 370 | 26 | 11 | 7.03% | 2.97% |
2018 | 381 | 44 | 17 | 11.55% | 4.46% |
2019 | 331 | 41 | 31 | 12.39% | 9.37% |
2020 | 736 | 91 | 71 | 12.36% | 9.65% |
2021 | 915 | 93 | 72 | 10.16% | 7.87% |
2022 | 852 | 70 | 60 | 8.22% | 7.04% |
2023 | 398 | 45 | 42 | 11.31% | 10.55% |
Total | 4892 | 490 | 352 | 10.02% | 7.20% |
Plot of pledge conversion for interest:
trial_pledge_conversion %>%
filter(
!is.na(conversion_time_years), # Only successful conversions
# Only conversion for the most recent trial pledge for the person
conversion_time_years == min(conversion_time_years),
.by = person_id) %>%
# Plot
count(conversion_time_years) %>%
arrange(conversion_time_years) %>%
mutate(cum_percent = cumsum(n)/sum(n)) %>%
ggplot(aes(x=conversion_time_years, y =cum_percent)) +
geom_line() +
labs(x = "Trial pledge conversion time (years since end of most recent trial pledge)",
y = NULL,
subtitle = "Cumulative proportion of all conversions") +
ylim(0,1)
In order to determine how the recording adjustments from the PCV/PRAC survey and MPCV surveys should be weighted, we need to understand the relative contribution of both of these groups to total recorded donations.
Here I aim to estimate the proportion of donations that those surveyed in the major pledge counterfactual value survey contributed to total recorded pledge donations in 2023. I will exclude:
sample_pop <- tbl(con, in_schema("pledges","verified_active_pledge")) %>%
filter(
pledge_type == "giving_what_we_can",
between(year(lower(period)), 2009, 2022)
)
giving_report_inf_adj %>%
# Only include verified pledges with an inner join
inner_join(
sample_pop,
by = join_by(pledge_id == id)
) %>%
filter(
year(donation_date) == 2023, # Include donations in 2023
# Exclude very large pledgers with no GWWC influence
!pledge_id %in% average_1M_pledgers$pledge_id
) %>%
left_join(
# Join MPCV Sample to Identify these donors
tbl(con, in_schema("impact_surveys","mpcv2025_sample")),
by = join_by(pledge_id)
) %>%
# Tag these as major pledge donors
mutate(major_pledger_group = !is.na(`2023_top_10`)) %>%
# Get total donated by group
summarise(pledge_donations_2023 = sum(amount_adj_norm),
mean_donations = mean(amount_adj_norm),
number_pledgers = n_distinct(pledge_id), .by = major_pledger_group) %>%
mutate(proportion_donations = pledge_donations_2023/sum(pledge_donations_2023)) %>%
collect() %>%
gt(caption = "Proportion of 2023 donations from the major pledger group (excluding those with >$1 M donations per year)") %>%
fmt_currency(2:3, suffixing = T) %>%
fmt_percent(5)
major_pledger_group | pledge_donations_2023 | mean_donations | number_pledgers | proportion_donations |
---|---|---|---|---|
FALSE | $22.31M | $274.74 | 2826 | 78.30% |
TRUE | $6.18M | $16.44K | 18 | 21.70% |
Here I want to identify the average age of pledgers whose pledges commenced in 2023 and 2024. First I will import age at pledging for all verified pledgers. Because a small minority of pledger ages seem unreliable (e.g., 0 or 115), I will filter to just include those between 16 and 100 years of age at pledging. I am most interested in 10% Pledgers, but I will also analyse Trial Pledgers just for interest.
pledge_age_by_cohort_both <- tbl(con, in_schema("pledges", "verified_pledge")) %>%
left_join(
tbl(con, in_schema("people", "person")),
by = join_by(person_id == id)
) %>%
mutate(pledge_start = lower(period),
pledge_cohort = year(pledge_start)) %>%
filter(between(pledge_cohort,2009,2024)) %>%
mutate(
age_at_pledge = sql("(pledge_start - birth_date) / 365.25")
) %>%
select(age_at_pledge, pledge_cohort, pledge_type) %>%
collect() %>%
# Recode pledgers younger than 15 or older than 100
mutate(age_at_pledge = case_when(between(age_at_pledge, 16,100) ~ age_at_pledge))
pledge_age_by_cohort <- pledge_age_by_cohort_both %>%
filter(pledge_type == "giving_what_we_can")
First let’s visualise age at pledging as a histogram (all pledges):
pledge_age_by_cohort %>%
filter(!is.na(age_at_pledge)) %>%
ggplot(aes(age_at_pledge, fill = pledge_type)) +
geom_histogram(binwidth = 2.5,boundary = 0, fill = "#e86f2b", colour = "black") +
labs(
y = NULL,
subtitle = "Count",
x = "Age at pledge (Years)"
) +
scale_y_continuous(expand = expansion(c(0,0.05))) +
scale_x_continuous(breaks = \(x) seq(0,x[2], 5))
Across all 10% pledgers, the mean age at pledge is 28.9007951 years.
Next, we will estimate the average age at pledging by pledger cohort:
pledge_age_summary <- pledge_age_by_cohort %>%
summarise(
number_pledges = n(),
number_age_provided = sum(!is.na(age_at_pledge)),
percent_age_provided = number_age_provided/number_pledges,
mean_age = mean(age_at_pledge, na.rm = T),
median_age = median(age_at_pledge, na.rm = T),
.by = pledge_cohort) %>%
arrange(pledge_cohort)
pledge_age_summary %>%
ggplot(aes(x = pledge_cohort)) +
geom_line(aes(y = mean_age, colour = "Mean"), linewidth = 1) +
geom_line(aes(y = median_age, colour = "Median"), linewidth = 1) +
labs(
subtitle = "Age (years)",
y = NULL,
x = "Year pledged",
colour = "Statistic"
) +
scale_y_continuous(limits = c(0,NA), breaks = seq(0,35,5))+
scale_x_continuous(breaks = \(x) 2009:x[2])
pledge_age_summary %>%
gt(caption = "Pledge age statistics by 10% Pledge start year") %>%
cols_label_with(fn = ~str_to_sentence(str_replace_all(.x,"_"," "))) %>%
fmt_percent(contains("percent")) %>%
fmt_number(ends_with("_age"))
Pledge cohort | Number pledges | Number age provided | Percent age provided | Mean age | Median age |
---|---|---|---|---|---|
2009 | 33 | 24 | 72.73% | 32.49 | 28.99 |
2010 | 37 | 35 | 94.59% | 28.11 | 25.60 |
2011 | 98 | 89 | 90.82% | 29.91 | 24.74 |
2012 | 102 | 78 | 76.47% | 30.08 | 24.88 |
2013 | 117 | 103 | 88.03% | 27.59 | 24.19 |
2014 | 399 | 387 | 96.99% | 26.03 | 23.66 |
2015 | 687 | 678 | 98.69% | 26.65 | 24.51 |
2016 | 979 | 965 | 98.57% | 27.07 | 24.46 |
2017 | 920 | 893 | 97.07% | 27.49 | 25.15 |
2018 | 618 | 587 | 94.98% | 26.41 | 25.03 |
2019 | 533 | 528 | 99.06% | 27.24 | 26.04 |
2020 | 1043 | 1032 | 98.95% | 30.20 | 27.78 |
2021 | 1231 | 1216 | 98.78% | 30.66 | 27.92 |
2022 | 1520 | 1444 | 95.00% | 31.04 | 28.64 |
2023 | 584 | 578 | 98.97% | 29.64 | 26.75 |
2024 | 790 | 711 | 90.00% | 30.15 | 27.41 |
Finally, lets get the mean age of pledgers who pledged in 2023 and 2024:
pledge_age_by_cohort %>%
filter(pledge_cohort %in% c(2023,2024)) %>%
summarise(
number_pledges = n(),
number_age_provided = sum(!is.na(age_at_pledge)),
percent_age_provided = number_age_provided/number_pledges,
mean_age = mean(age_at_pledge, na.rm = T),
median_age = median(age_at_pledge, na.rm = T)
) %>%
gt(
caption = "Pledge age statistics for 10% Pledges starting in 2023 and 2024"
)%>%
cols_label_with(fn = ~str_to_sentence(str_replace_all(.x,"_"," "))) %>%
fmt_percent(contains("percent")) %>%
fmt_number(ends_with("_age"))
Number pledges | Number age provided | Percent age provided | Mean age | Median age |
---|---|---|---|---|
1374 | 1289 | 93.81% | 29.92 | 27.16 |
pledge_age_by_cohort_trial <- pledge_age_by_cohort_both %>%
filter(pledge_type == "try_giving", between(pledge_cohort,2014,2024))
pledge_age_by_cohort_trial %>%
filter(!is.na(age_at_pledge)) %>%
ggplot(aes(age_at_pledge, fill = pledge_type)) +
geom_histogram(binwidth = 2.5,boundary = 0, fill = "#0074ba", colour = "black") +
labs(
y = NULL,
subtitle = "Count",
x = "Age at pledge (Years)"
) +
scale_y_continuous(expand = expansion(c(0,0.05)))+
scale_x_continuous(breaks = \(x) seq(0,x[2], 5))
Across all Trial pledgers, the mean age at pledge is 30.8355689 years.
Next, we will estimate the average age at pledging by pledger cohort:
pledge_age_summary_trial <- pledge_age_by_cohort_trial %>%
summarise(
number_pledges = n(),
number_age_provided = sum(!is.na(age_at_pledge)),
percent_age_provided = number_age_provided/number_pledges,
mean_age = mean(age_at_pledge, na.rm = T),
median_age = median(age_at_pledge, na.rm = T),
.by = pledge_cohort) %>%
arrange(pledge_cohort)
pledge_age_summary_trial %>%
ggplot(aes(x = pledge_cohort)) +
geom_line(aes(y = mean_age, colour = "Mean"), linewidth = 1) +
geom_line(aes(y = median_age, colour = "Median"), linewidth = 1) +
labs(
subtitle = "Age (years)",
y = NULL,
x = "Year pledged",
colour = "Statistic"
) +
scale_y_continuous(limits = c(0,NA), breaks = seq(0,35,5))+
scale_x_continuous(breaks = \(x) 2009:x[2])
pledge_age_summary_trial %>%
gt(caption = "Pledge age statistics by Trial Pledge start year") %>%
cols_label_with(fn = ~str_to_sentence(str_replace_all(.x,"_"," "))) %>%
fmt_percent(contains("percent")) %>%
fmt_number(ends_with("_age"))
Pledge cohort | Number pledges | Number age provided | Percent age provided | Mean age | Median age |
---|---|---|---|---|---|
2014 | 125 | 104 | 83.20% | 28.43 | 25.69 |
2015 | 292 | 290 | 99.32% | 29.13 | 26.79 |
2016 | 506 | 503 | 99.41% | 28.43 | 26.19 |
2017 | 377 | 368 | 97.61% | 28.52 | 26.78 |
2018 | 383 | 367 | 95.82% | 27.98 | 26.32 |
2019 | 335 | 329 | 98.21% | 29.43 | 27.56 |
2020 | 783 | 780 | 99.62% | 31.28 | 29.45 |
2021 | 998 | 995 | 99.70% | 31.73 | 29.59 |
2022 | 978 | 943 | 96.42% | 33.19 | 30.67 |
2023 | 481 | 464 | 96.47% | 31.38 | 29.26 |
2024 | 781 | 709 | 90.78% | 31.68 | 29.25 |
Here, I aim to model how average annual donations of 10% pledgers change over time. In our previous evaluation, we concluded from the available data that there was no decay in average pledge donations over time. Here we seek to re-investigate this.
First let’s create a dataframe with one row for each full year that has been experienced by pledgers who pledged between 2009 and 2023. We will only include up to pledge years that started in 2023. As usual, we will exclude pledgers whose average annual recorded donations exceed $1M
# Identify pledges we want to include
filtered_pledges <- tbl(con, in_schema("pledges", "verified_active_pledge")) %>%
mutate(
start_date = lower(period),
pledge_cohort = year(start_date)
) %>%
filter(
pledge_type == "giving_what_we_can", # 10% Pledges
between(pledge_cohort,2009, 2023),
!id %in% average_1M_pledgers$pledge_id
)
# Get donations by pledge year for each of these pledges
pledger_donations_by_year_since_pledge <- filtered_pledges %>%
inner_join(giving_report_inf_adj, by = join_by(id == pledge_id)) %>%
filter(donation_date >=start_date) %>%
mutate(years_since_pledge = date_part("year", age(donation_date, start_date))) %>%
summarise(
amount_adj_norm = sum(amount_adj_norm, na.rm = T),
.by = c(years_since_pledge, id)
) %>%
collect()
# Create a dataframe that includes all years for all pledgers and join the yearly donations
pledge_years <- filtered_pledges %>%
select(id, pledge_cohort) %>%
collect() %>%
reframe(years_since_pledge = 0:(2023-pledge_cohort),.by = c(id, pledge_cohort)) %>%
left_join(pledger_donations_by_year_since_pledge, by = join_by(id, years_since_pledge)) %>%
# Where no donations are recorded for a pledger in a year, set the amount to 0
mutate(amount_adj_norm = coalesce(amount_adj_norm,0))
Next, let’s report average donations by pledge cohort and years since pledging. First I’ll create a table with this raw data, for reference:
donations_by_year <- pledge_years %>%
summarise(
average_donations_all = mean(amount_adj_norm),
average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0]),
num_pledgers = n(),
num_any_donations_recorded = sum(amount_adj_norm > 0),
percent_any_donations_recorded = num_any_donations_recorded/num_pledgers,
.by = c(pledge_cohort, years_since_pledge)
) %>%
arrange(pledge_cohort, years_since_pledge)
donations_by_year %>%
gt() %>%
cols_label_with(fn=\(x) str_to_sentence(str_replace_all(x,"_"," "))) %>%
fmt_percent(percent_any_donations_recorded) %>%
fmt_currency(starts_with("average_donations"), suffixing = T) %>%
cols_label_with(starts_with("average_donations"), ~ str_c(.x," (2024 USD)"))
Pledge cohort | Years since pledge | Average donations all (2024 USD) | Average donations recorded (2024 USD) | Num pledgers | Num any donations recorded | Percent any donations recorded |
---|---|---|---|---|---|---|
2009 | 0 | $5.77K | $17.30K | 33 | 11 | 33.33% |
2009 | 1 | $7.11K | $21.32K | 33 | 11 | 33.33% |
2009 | 2 | $13.82K | $20.73K | 33 | 22 | 66.67% |
2009 | 3 | $22.88K | $34.32K | 33 | 22 | 66.67% |
2009 | 4 | $14.82K | $25.74K | 33 | 19 | 57.58% |
2009 | 5 | $23.94K | $41.59K | 33 | 19 | 57.58% |
2009 | 6 | $31.07K | $51.26K | 33 | 20 | 60.61% |
2009 | 7 | $21.69K | $55.05K | 33 | 13 | 39.39% |
2009 | 8 | $22.81K | $53.77K | 33 | 14 | 42.42% |
2009 | 9 | $23.06K | $54.36K | 33 | 14 | 42.42% |
2009 | 10 | $31.18K | $68.59K | 33 | 15 | 45.45% |
2009 | 11 | $24.93K | $68.55K | 33 | 12 | 36.36% |
2009 | 12 | $24.12K | $56.86K | 33 | 14 | 42.42% |
2009 | 13 | $18.69K | $56.06K | 33 | 11 | 33.33% |
2009 | 14 | $15.17K | $71.51K | 33 | 7 | 21.21% |
2010 | 0 | $2.55K | $11.49K | 36 | 8 | 22.22% |
2010 | 1 | $6.40K | $10.47K | 36 | 22 | 61.11% |
2010 | 2 | $5.03K | $7.87K | 36 | 23 | 63.89% |
2010 | 3 | $4.10K | $7.03K | 36 | 21 | 58.33% |
2010 | 4 | $3.30K | $6.98K | 36 | 17 | 47.22% |
2010 | 5 | $2.75K | $6.20K | 36 | 16 | 44.44% |
2010 | 6 | $5.83K | $17.48K | 36 | 12 | 33.33% |
2010 | 7 | $2.39K | $8.60K | 36 | 10 | 27.78% |
2010 | 8 | $2.64K | $9.51K | 36 | 10 | 27.78% |
2010 | 9 | $1.90K | $6.23K | 36 | 11 | 30.56% |
2010 | 10 | $2.33K | $7.00K | 36 | 12 | 33.33% |
2010 | 11 | $2.48K | $7.43K | 36 | 12 | 33.33% |
2010 | 12 | $1.71K | $6.84K | 36 | 9 | 25.00% |
2010 | 13 | $1.49K | $5.35K | 36 | 10 | 27.78% |
2011 | 0 | $4.50K | $7.17K | 94 | 59 | 62.77% |
2011 | 1 | $7.54K | $11.82K | 94 | 60 | 63.83% |
2011 | 2 | $9.85K | $18.52K | 94 | 50 | 53.19% |
2011 | 3 | $6.78K | $12.50K | 94 | 51 | 54.26% |
2011 | 4 | $6.38K | $12.75K | 94 | 47 | 50.00% |
2011 | 5 | $15.39K | $46.66K | 94 | 31 | 32.98% |
2011 | 6 | $5.47K | $17.13K | 94 | 30 | 31.91% |
2011 | 7 | $4.43K | $14.89K | 94 | 28 | 29.79% |
2011 | 8 | $7.07K | $24.62K | 94 | 27 | 28.72% |
2011 | 9 | $5.40K | $20.32K | 94 | 25 | 26.60% |
2011 | 10 | $13.43K | $46.76K | 94 | 27 | 28.72% |
2011 | 11 | $7.17K | $25.91K | 94 | 26 | 27.66% |
2011 | 12 | $12.77K | $42.86K | 94 | 28 | 29.79% |
2012 | 0 | $3.23K | $6.66K | 97 | 47 | 48.45% |
2012 | 1 | $10.70K | $17.90K | 97 | 58 | 59.79% |
2012 | 2 | $9.17K | $15.07K | 97 | 59 | 60.82% |
2012 | 3 | $11.02K | $19.44K | 97 | 55 | 56.70% |
2012 | 4 | $9.44K | $20.81K | 97 | 44 | 45.36% |
2012 | 5 | $13.43K | $33.40K | 97 | 39 | 40.21% |
2012 | 6 | $25.37K | $61.53K | 97 | 40 | 41.24% |
2012 | 7 | $21.70K | $50.12K | 97 | 42 | 43.30% |
2012 | 8 | $23.81K | $55.00K | 97 | 42 | 43.30% |
2012 | 9 | $77.53K | $183.43K | 97 | 41 | 42.27% |
2012 | 10 | $17.63K | $43.85K | 97 | 39 | 40.21% |
2012 | 11 | $6.49K | $17.48K | 97 | 36 | 37.11% |
2013 | 0 | $7.02K | $11.88K | 110 | 65 | 59.09% |
2013 | 1 | $8.72K | $15.99K | 110 | 60 | 54.55% |
2013 | 2 | $8.50K | $19.07K | 110 | 49 | 44.55% |
2013 | 3 | $9.89K | $25.90K | 110 | 42 | 38.18% |
2013 | 4 | $8.21K | $22.58K | 110 | 40 | 36.36% |
2013 | 5 | $2.55K | $8.51K | 110 | 33 | 30.00% |
2013 | 6 | $5.41K | $16.08K | 110 | 37 | 33.64% |
2013 | 7 | $5.81K | $18.79K | 110 | 34 | 30.91% |
2013 | 8 | $12.38K | $38.92K | 110 | 35 | 31.82% |
2013 | 9 | $3.40K | $12.89K | 110 | 29 | 26.36% |
2013 | 10 | $4.49K | $15.93K | 110 | 31 | 28.18% |
2014 | 0 | $12.60K | $22.16K | 394 | 224 | 56.85% |
2014 | 1 | $20.21K | $48.55K | 394 | 164 | 41.62% |
2014 | 2 | $12.78K | $35.47K | 394 | 142 | 36.04% |
2014 | 3 | $5.86K | $17.90K | 394 | 129 | 32.74% |
2014 | 4 | $7.48K | $24.17K | 394 | 122 | 30.96% |
2014 | 5 | $4.11K | $13.83K | 394 | 117 | 29.70% |
2014 | 6 | $4.26K | $13.87K | 394 | 121 | 30.71% |
2014 | 7 | $5.78K | $19.64K | 394 | 116 | 29.44% |
2014 | 8 | $5.90K | $22.37K | 394 | 104 | 26.40% |
2014 | 9 | $4.94K | $18.71K | 394 | 104 | 26.40% |
2015 | 0 | $3.73K | $7.89K | 673 | 318 | 47.25% |
2015 | 1 | $3.39K | $9.39K | 673 | 243 | 36.11% |
2015 | 2 | $4.33K | $12.28K | 673 | 237 | 35.22% |
2015 | 3 | $3.87K | $11.57K | 673 | 225 | 33.43% |
2015 | 4 | $4.61K | $14.70K | 673 | 211 | 31.35% |
2015 | 5 | $3.20K | $10.87K | 673 | 198 | 29.42% |
2015 | 6 | $3.63K | $12.40K | 673 | 197 | 29.27% |
2015 | 7 | $2.83K | $10.03K | 673 | 190 | 28.23% |
2015 | 8 | $3.16K | $11.80K | 673 | 180 | 26.75% |
2016 | 0 | $3.18K | $6.61K | 960 | 462 | 48.12% |
2016 | 1 | $2.99K | $8.56K | 960 | 335 | 34.90% |
2016 | 2 | $4.71K | $13.91K | 960 | 325 | 33.85% |
2016 | 3 | $7.72K | $23.23K | 960 | 319 | 33.23% |
2016 | 4 | $3.04K | $9.82K | 960 | 297 | 30.94% |
2016 | 5 | $3.13K | $10.50K | 960 | 286 | 29.79% |
2016 | 6 | $2.25K | $7.76K | 960 | 278 | 28.96% |
2016 | 7 | $2.86K | $10.64K | 960 | 258 | 26.88% |
2017 | 0 | $2.59K | $5.58K | 900 | 418 | 46.44% |
2017 | 1 | $2.19K | $6.60K | 900 | 299 | 33.22% |
2017 | 2 | $2.63K | $8.05K | 900 | 294 | 32.67% |
2017 | 3 | $2.82K | $9.00K | 900 | 282 | 31.33% |
2017 | 4 | $3.80K | $12.78K | 900 | 268 | 29.78% |
2017 | 5 | $3.17K | $11.40K | 900 | 250 | 27.78% |
2017 | 6 | $2.66K | $10.35K | 900 | 231 | 25.67% |
2018 | 0 | $2.69K | $4.62K | 605 | 352 | 58.18% |
2018 | 1 | $2.72K | $6.64K | 605 | 248 | 40.99% |
2018 | 2 | $3.70K | $10.93K | 605 | 205 | 33.88% |
2018 | 3 | $5.06K | $16.47K | 605 | 186 | 30.74% |
2018 | 4 | $2.39K | $9.05K | 605 | 160 | 26.45% |
2018 | 5 | $1.99K | $8.49K | 605 | 142 | 23.47% |
2019 | 0 | $5.09K | $7.89K | 521 | 336 | 64.49% |
2019 | 1 | $7.37K | $16.33K | 521 | 235 | 45.11% |
2019 | 2 | $3.34K | $8.49K | 521 | 205 | 39.35% |
2019 | 3 | $3.18K | $9.80K | 521 | 169 | 32.44% |
2019 | 4 | $2.71K | $9.29K | 521 | 152 | 29.17% |
2020 | 0 | $5.64K | $7.90K | 1018 | 727 | 71.41% |
2020 | 1 | $4.72K | $9.32K | 1018 | 516 | 50.69% |
2020 | 2 | $3.49K | $8.33K | 1018 | 426 | 41.85% |
2020 | 3 | $3.26K | $8.91K | 1018 | 372 | 36.54% |
2021 | 0 | $8.33K | $11.72K | 1207 | 858 | 71.09% |
2021 | 1 | $4.96K | $9.62K | 1207 | 622 | 51.53% |
2021 | 2 | $4.20K | $9.66K | 1207 | 525 | 43.50% |
2022 | 0 | $3.55K | $5.65K | 1468 | 922 | 62.81% |
2022 | 1 | $3.16K | $7.27K | 1468 | 637 | 43.39% |
2023 | 0 | $3.99K | $6.35K | 571 | 359 | 62.87% |
In our previous evaluation, we attempted to determine the trend in average pledge donations over time by focusing on the overall average in pledge donations across all pledgers (including those who do not report). Here we attempt to separate out the average pledge donations among pledgers who record donations and the percentage of pledgers who report donations and model these separately.
First we can plot how average donations trended over time for those pledgers who recorded donations. We will separae pledgers into groups of cohorts based on when they started their pledge:
slice_cohorts <- function(x, cut_years = c(2008,2016,2019,2023)) {
upper_bounds <- tail(cut_years, -1)
lower_bounds <- head(cut_years, -1)+1
cut_labels <- sapply(1:length(upper_bounds), \(i) str_c(lower_bounds[i], "–", upper_bounds[i]))
cut(x, cut_years, cut_labels)
}
donations_by_year_cohort <- pledge_years %>%
mutate(pledge_cohort_group = slice_cohorts(pledge_cohort, c(2008,2012,2016,2019,2023))) %>%
bind_rows(pledge_years) %>%
mutate(pledge_cohort_group = fct_na_value_to_level(pledge_cohort_group, "All")) %>%
summarise(
average_donations_all = mean(amount_adj_norm),
average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0]),
num_pledgers = n(),
num_any_donations_recorded = sum(amount_adj_norm > 0),
percent_any_donations_recorded = num_any_donations_recorded/num_pledgers,
.by = c(years_since_pledge,pledge_cohort_group)
)
donations_by_year_cohort %>%
ggplot(aes(x = years_since_pledge, y = average_donations_recorded, colour = pledge_cohort_group)) +
geom_line()+
geom_point() +
geom_line() +
scale_y_continuous(
limits = c(0,NA),
labels = scales::label_currency(scale = 1e-3, suffix = "K"),
breaks = \(x) seq(0,x[2],5e3)) +
labs(x = "Years since pledge", y = NULL,
subtitle = "Average annual donations among those who recorded any [2024 USD]",
colour = "Cohorts")
The 2009 to 2012 cohorts are based on much smaller numbers and see much more dramatic swings. They don’t seem very representative of the later cohorts. Let’s look at this plot again, without this group:
donations_by_year_cohort %>%
filter(!pledge_cohort_group %in% c("2009–2012", "All")) %>%
ggplot(aes(x = years_since_pledge, y = average_donations_recorded, colour = pledge_cohort_group)) +
geom_line()+
geom_point() +
geom_line() +
scale_y_continuous(
limits = c(0,NA),
labels = scales::label_currency(scale = 1e-3, suffix = "K"),
breaks = \(x) seq(0,x[2],5e3)) +
labs(x = "Years since pledge", y = NULL,
subtitle = "Average annual donations among those who recorded any [2024 USD]",
colour = "Cohorts") +
scale_x_continuous(breaks = \(x) seq(0,x[2],1))
Looking at this, it appears there is was an initial increase in average annual donations among pledgers who record donations for the 2013–2016 group and the 2017–2019 group. This increase was less noticeable for the 2020–2023 group of cohorts, which are likely the most representative of the current generation of pledgers.
Let’s look at the more recent cohorts on a cohort level rather than in aggregate:
donations_by_year <- pledge_years %>%
summarise(
average_donations_all = mean(amount_adj_norm),
average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0]),
num_pledgers = n(),
num_any_donations_recorded = sum(amount_adj_norm > 0),
percent_any_donations_recorded = num_any_donations_recorded/num_pledgers,
.by = c(years_since_pledge,pledge_cohort)
)
donations_by_year%>%
filter(pledge_cohort >= 2017) %>%
mutate(pledge_cohort_group = slice_cohorts(pledge_cohort, c(2008,2012,2016,2019,2023))) %>%
ggplot(aes(x = years_since_pledge, y = average_donations_recorded,
colour = pledge_cohort_group, group = factor(pledge_cohort))) +
geom_line()+
geom_point() +
geom_line() +
scale_y_continuous(
limits = c(0,NA),
labels = scales::label_currency(scale = 1e-3, suffix = "K"),
breaks = \(x) seq(0,x[2],5e3)) +
scale_x_continuous(breaks = \(x) seq(0,x[2],1))+
labs(x = "Years since pledge", y = NULL,
subtitle = "Average annual donations among those who recorded any [2024 USD]",
colour = "Cohort group")
Eyeballing these results, average donations among recording pledgers for the most recent cohorts do seem notably more stable than they did for the 2017 to 2019 cohorts. Based on these results, I will assume that average donations among recording donations are relatively stable for reporting donors. As such, I will model the average amount that recording pledgers give annually as a constant. I will calculate this constant as the average the annual donations of recording active 10% pledgers who pledged between 2020 and 2023.
pledge_years %>%
filter(between(pledge_cohort,2020,2023)) %>%
summarise(
average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0])
) %>%
gt(caption = "Average annual donations among recording pledgers (pledge cohorts 2020–2023)") %>%
cols_label(1 ~ "Average annual donations (2024 USD)") %>%
fmt_currency()
Average annual donations (2024 USD) |
---|
$8,490.77 |
The second component of pledge value over time is the proportion of pledgers for whom GWWC is recording donations in any given year. To get an idea of what this looks like, first we will plot the proportion of pledgers in each cohort group who reported a donation by the years since they pledged.
donations_by_year %>%
ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded, colour = factor(pledge_cohort))) +
geom_line()+
geom_point() +
geom_line() +
scale_y_continuous(
limits = c(0,NA),
labels = scales::label_percent(),
breaks = \(x) seq(0,x[2],0.1)) +
scale_x_continuous(breaks = \(x) seq(0,x[2], 1)) +
labs(x = "Years since pledge", y = NULL,
subtitle = "Percentage of pledgers recording donations",
colour = "Cohorts")
When we include all cohorts, the plot is messy and it is difficult to identify a consistent trend. But when we separate pledgers who pledged prior to 2012, a relatively consistent trend emerges across the more recent cohorts:
donations_by_year %>%
mutate(pledge_batch = if_else(pledge_cohort >=2013, "2013–2023", "2009–2012")) %>%
ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded,
colour = pledge_batch, group = factor(pledge_cohort))) +
geom_line()+
geom_point() +
geom_line() +
scale_y_continuous(
limits = c(0,NA),
labels = scales::label_percent(),
breaks = \(x) seq(0,x[2],0.1)) +
labs(x = "Years since pledge", y = NULL,
subtitle = "Percentage of pledgers recording donations",
colour = "Cohorts")
In this trend, about 50% or more of pledgers are reporting in their first year, but this quickly drops off at a slowing rate. possibly stabilising around 25% for most cohorts.
This is even seen more clearly when we plot by the cohort groups that we used earlier:
donations_by_year_cohort %>%
filter(!pledge_cohort_group %in% c("All", "2009–2012")) %>%
ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded, colour = pledge_cohort_group)) +
geom_line()+
geom_point() +
geom_line() +
scale_y_continuous(
limits = c(0,NA),
labels = scales::label_percent(),
breaks = \(x) seq(0,x[2],0.1)) +
scale_x_continuous(breaks = \(x) seq(0,x[2], 1)) +
labs(x = "Years since pledge", y = NULL,
subtitle = "Percentage of pledgers recording donations",
colour = "Cohorts")
This looks like a trend that we can model as part of our estimation of pledge lifetime value.
The trends between the different cohorts are sufficiently similar for me to feel comfortable aggregating the cohorts when modelling the decay, which will provide us with more years of data to work with.
First, let’s plot the percentage of pledges reporting donations over time across all of these cohorts.
gwwc_colours <- list(yellow = "#E86F2B", orange = "#CC4115", red = "#BA2934", pink = "#DA3552", purple = "#BA175B")
recent_pledges <- pledge_years %>%
filter(pledge_cohort >=2013) %>%
summarise(
average_donations_all = mean(amount_adj_norm),
average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0]),
num_pledgers = n(),
num_any_donations_recorded = sum(amount_adj_norm > 0),
percent_any_donations_recorded = num_any_donations_recorded/num_pledgers,
.by = c(years_since_pledge)
)
y_axis2_scaling = 5e-5
recent_pledges %>%
ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded)) +
geom_col(aes(y = num_pledgers*y_axis2_scaling,
fill = "Number of pledgers who experienced year")) +
geom_point(aes(colour = "Percentage of pledgers reporting")) +
geom_line(aes(colour = "Percentage of pledgers reporting"), linewidth = 1) +
scale_y_continuous(
limits = c(0,NA),
labels = scales::label_percent(),
breaks = seq(0,1,0.1),
sec.axis = sec_axis(
transform = ~.x/y_axis2_scaling,
labels = scales::label_number(),
breaks = \(x) seq(0,x[2],1e3),
name = "Number"
)) +
scale_x_continuous(breaks = \(x) seq(0,x[2], 1)) +
scale_colour_manual(
values = c(
"Number of pledgers who experienced year" = gwwc_colours$yellow,
"Percentage of pledgers reporting" = gwwc_colours$red
)
,aesthetics = c("colour", "fill")) +
labs(
title = "Percentage of pledges with recorded donations by year since pledging",
subtitle = "Pledges started in 2013 or later",
x = "Years since pledge",
y = "Percentage",
colour = NULL,
fill = NULL
)
Now, from this data, let’s model the change in the percentage who donated over time. From the data we have to-date, it looks like the donations could follow an exponential decay function towards a floor. Let’s fit a model to this data using a nonlinear least square method, weighting each year by the number of pledgers:
# Choose start values
start_vals <- list(y0 = 0.6, r = 0.5, c = 0.2)
# Model average donations as an exponential decay with a floor, using years since pledge as predictor
nls_model <- nls(
percent_any_donations_recorded ~ c + (y0-c) * (1-r) ^ years_since_pledge,
data = recent_pledges,
weights = num_pledgers,
start = start_vals
)
Here are the summary statistics for this model:
# Summarise model
summary(nls_model)
##
## Formula: percent_any_donations_recorded ~ c + (y0 - c) * (1 - r)^years_since_pledge
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## y0 0.594388 0.007593 78.28 7.90e-13 ***
## r 0.479817 0.027550 17.42 1.20e-07 ***
## c 0.277945 0.007635 36.41 3.55e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7102 on 8 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 8.452e-06
# Get coefficients
coefs <- coef(nls_model)
Results:
All three of these parameters are highly statistically significant.
If we plot our residuals, all are relatively small in magnitude and they seem reasonably balanced around 0
# Plot residuals of the model
plot(recent_pledges$years_since_pledge, resid(nls_model),
main = "Residuals vs Years", xlab = "Years Since Pledge", ylab = "Residuals",
pch = 16)
abline(h = 0, col = "red")
Let’s plot the predicted values from this model against our observed values from the pledge data we have. Let’s plot the predicted values all the way out to 35 years:
# Predict donations over time
predicted_data <- tibble(
years_since_pledge = seq(0, 35, length.out = 100),
) %>%
mutate(predicted_percentage_donating = coefs["c"] + (coefs["y0"]-coefs["c"]) * (1-coefs["r"]) ^ years_since_pledge)
recent_pledges %>%
ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded)) +
# Plot predicted
geom_line(
data = predicted_data,
aes(y = predicted_percentage_donating, colour = "Predicted"),
linewidth = 1) +
# Plot observed
geom_point(aes(colour = "Observed [2013 to 2023 pledgers]")) +
# Format plot
scale_y_continuous(
limits = c(0,NA),
labels = scales::label_percent(),
breaks = seq(0,1,0.1)
) +
scale_x_continuous(breaks = \(x) seq(0,x[2], 5)) +
scale_colour_manual(
values = c(
"Predicted" = gwwc_colours$yellow,
"Observed [2013 to 2023 pledgers]" = gwwc_colours$red
)
,aesthetics = c("colour", "fill")) +
labs(
title = "Modelling percentage of pledges with recorded donations by year since pledging",
subtitle = "Pledges started in 2013 or later",
x = "Years since pledge",
y = "Percentage pledgers recording donations",
colour = NULL,
fill = NULL
)
We can’t be certain that this pattern will hold — it is entirely possible that the % of reporting pledgers will drop again after a certain number of years. However, we also think it is possible that the average donations of recording pledgers could go up for recent pledgers, as has been observed for past pledgers. Rather than trying to deal with these competing considerations, we are assuming that these uncertainties approximately cancel out.
Thus, our the inputs we will use for modelling pledge value over time are:
tibble(
Names = c("Estimated % of 10% Pledgers recording donations in their first year",
"Estimated annual decay rate in % of 10% Pledgers recording donations",
"Estimated floor in % of 10% Pledgers recording donations"),
Coefficients = coefs
) %>%
gt(caption = "Parameters for predicting percentage of 10% Pledgers recording donations by year since pledging") %>%
fmt_percent()
Names | Coefficients |
---|---|
Estimated % of 10% Pledgers recording donations in their first year | 59.44% |
Estimated annual decay rate in % of 10% Pledgers recording donations | 47.98% |
Estimated floor in % of 10% Pledgers recording donations | 27.79% |
knitr::knit_exit()