pacman::p_load(tidyverse, dbplyr, gt)
theme_set(theme_bw())
theme_update(legend.position = "bottom", plot.title.position = "plot")
# Establish read connect to parfit
source("connect-parfit.R")
## Using stored credentials for authentication.

Non-pledge donations 2023–2024

This is intended to calculate all non-pledge facilitated donations between 2023 and 2024. The result should:

Years where a donor gives more than $140K are excluded because 140K USD is the amount given by the smallest major non-pledge donor surveyed and this group reported no counterfactual influence of GWWC on their donations.

inflation_adjustments <- tbl(con, in_schema("impact_surveys","inflation_data"))

# Create a version of the Giving Report with inflation adjusted amounts
giving_report_inf_adj <- tbl(con, in_schema("reporting", "complete_giving_report")) %>% 
  # Get donation year
  mutate(donation_year = year(donation_date)) %>% 
  # Join inflation adjustments and apply to USD normalised donations
  left_join(inflation_adjustments, join_by(donation_year == year)) %>% 
  mutate(amount_adj_norm = amount_normalized * cumulative_inflation_to_2024) 

total_non_pledge_donations_period <- giving_report_inf_adj %>% 
  filter(
    is.na(pledge_id), # Exclude pledge donations
    object_type == "payment",  # Exclude reported donations
    donation_year %in% 2023:2024, # Filter to include donations in period
    sum(amount_adj_norm, na.rm = TRUE) <= 139e3, # Exclude where total donations exceed 140K...
    .by = c(person_id, donation_year) # For a given person-year
    )  %>% 
  # Get sum of normalised, inflation adjusted donations
  summarise(total_non_pledge_period = sum(amount_adj_norm, na.rm = T)) %>% 
  collect() 

# Present in a table
total_non_pledge_donations_period %>% 
  gt() %>% 
  fmt_currency()
total_non_pledge_period
$23,485,123.97

Pledge donations in 2023–2024

This is intended to calculate all pledge (10% and Trial Pledge) donations (reported and facilitated) between 2023 and 2024. The result should:

# Get active 10% pledges that started between 2009 and 2024
active_10pledges <- tbl(con, in_schema("pledges","verified_active_pledge")) %>% 
  mutate(start_date = lower(period),
         pledge_cohort = year(start_date)) %>% 
  filter(pledge_type == "giving_what_we_can",
         between(year(start_date),2009, 2024)
         )%>% 
  select(pledge_id = id, start_date, pledge_cohort)


# Get 10% pledgers who give on average 1M per year
average_1M_pledgers <- giving_report_inf_adj %>% 
  # Get total donations by pledge id
  summarise(total_donations = sum(amount_adj_norm, na.rm = T), 
            .by = c(pledge_id,person_id)) %>% 
  # Include active 10 percent pledges only
  inner_join(
   active_10pledges,
    by = "pledge_id"
  ) %>% 
  mutate(
    # Get Pledge age
    pledge_age = as.numeric(today() - start_date) / 365.25,
    # Get total pledge donations
    average_annual_donation = total_donations / pledge_age
    ) %>% 
  # Get cases where average annual donations exceed 1M
  filter(average_annual_donation >1e6) %>%  
  collect() 
donations_pledge_in_period <- giving_report_inf_adj %>% 
  # Only include verified pledges with an inner join
  inner_join(tbl(con, in_schema("pledges","verified_pledge")), by = join_by(pledge_id == id)) %>% 
  filter(
    between(year(donation_date), 2023,2024), # Include donations in period
    lower(period) <= donation_date, # Only include donations made after the pledge
    # Exclude very large pledgers with no GWWC influence
    !pledge_id %in% average_1M_pledgers$pledge_id
    ) %>% 
  summarise(total_pledge_donations_period = sum(amount_adj_norm)) %>% 
  collect()
## Warning: Missing values are always removed in SQL aggregation functions.
## Use `na.rm = TRUE` to silence this warning
## This warning is displayed once every 8 hours.
donations_pledge_in_period %>% 
  gt() %>% 
  fmt_currency()
total_pledge_donations_period
$66,943,813.08

Number of new pledges in period

This is intended to calculate the number of new pledges (10% and Trial Pledges) in 2023 and 2024, calculate the number of pledges that ended in this period and the net change in the number of these pledges. Ultimately, I am interested in the net change in 10% pledges and the number of new trial pledges. Both figures should:

net_change_in_pledges <- tbl(con, in_schema("pledges","verified_pledge")) %>% 
  mutate(start_year = year(lower(period)),
         end_year = year(upper(period)),
         end_in_period = end_year %in% 2023:2024,
         created_year = year(created_at),
         created_in_period = created_year %in% 2023:2024 
         & start_year <= 2025
        ) %>% 
  summarise(
    new_pledges_in_period = sum(as.integer(created_in_period)),
    pledges_ending_in_period = sum(as.integer(end_in_period)),
    .by = pledge_type) %>% 
  mutate(net_change_in_pledges = new_pledges_in_period - pledges_ending_in_period) %>% 
  collect() 


net_change_in_pledges %>%
  gt(caption = "New Trial Pledges and net change in 10% Pledges between 2023 and 2024")
New Trial Pledges and net change in 10% Pledges between 2023 and 2024
pledge_type new_pledges_in_period pledges_ending_in_period net_change_in_pledges
giving_what_we_can 1450 103 1347
try_giving 1259 1325 -66

Average lifetime trial pledge donations

Here I want to estimate the average lifetime donations (facilitated and reported) for a trial pledge. This figure should:

completed_trial_pledges <- tbl(con, in_schema("pledges","verified_pledge")) %>% 
  filter(pledge_type == "try_giving",
         upper(period) < today(), # Trial Pledges Completed Prior to today
         year(lower(period)) >= 2014, # Start year after 2014
         sql("lower(period) <= current_date - interval '1 year'") # Pledge must have started at least one year ago
         ) 

trial_pledge_donations <- giving_report_inf_adj %>% 
  inner_join(completed_trial_pledges, by = join_by(pledge_id == id)) %>% 
  filter(
     lower(period) <= donation_date, # Only include donations made after the pledge
  ) %>% 
  # Get total donations by Trial Pledge
  summarise(total_donations = sum(amount_adj_norm, na.rm = T), .by = pledge_id) %>% 
  # Join Trial pledges with $0 donated
  right_join(completed_trial_pledges, by = join_by(pledge_id == id)) %>% 
  mutate(
    # Set missing total donations to zero
    total_donations = coalesce(total_donations, 0),
    start_year = year(lower(period)),
    pledge_duration_years =  as.numeric(upper(period) - lower(period)) / 365.25,
    average_donations_per_year = total_donations / pledge_duration_years,
    .keep = "used"
    ) %>% 
  collect() %>% 
  # Pledges started at least one year ago
  filter(
    average_donations_per_year < 1e6 # Annual donation rate should be less than $1M 
    ) 
  
trial_pledge_donations %>% 
  mutate(start_year = as.character(start_year)) %>% 
  # Get average across all Trial pledges
  bind_rows(
    mutate(trial_pledge_donations, start_year = "All")
  ) %>% 
  # Get average_since2020 for conservative estimate
  bind_rows(mutate(
    filter(trial_pledge_donations, start_year>= 2020), 
    start_year = "From 2020–2023"
    )) %>% 
  summarise(mean_trial_pledge_donations = mean(total_donations), 
            num_pledges_completed = n(),
            .by = start_year) %>% 
  arrange(start_year) %>% 
  gt(caption = "Average donations per completed trial pledge") %>% 
  fmt_currency(mean_trial_pledge_donations)
Average donations per completed trial pledge
start_year mean_trial_pledge_donations num_pledges_completed
2014 $1,293.98 123
2015 $1,412.64 286
2016 $4,967.38 500
2017 $1,284.38 370
2018 $2,153.41 382
2019 $2,192.66 332
2020 $2,331.18 743
2021 $2,149.08 915
2022 $1,787.42 867
2023 $1,607.85 409
2024 $1,570.93 182
All $2,204.80 5109
From 2020–2023 $1,987.06 3116

Trial Pledge conversion rate

Estimate the proportion of Trial Pledge takers who go on to take a 10% Pledge. This figure should:

I aim to count the number of people who took a Trial Pledge who later took a 10% Pledge, and to separately count the number of people who took a Trial Pledge who took a 10% Pledge within 1 year of their Trial Pledge ending.

active_10pct_pledges <- tbl(con, in_schema("pledges","verified_active_pledge")) %>% 
  filter(pledge_type == "giving_what_we_can") %>% 
  select(person_id, id_10pct = id, period_10pct = period, 
         created_at_10pct = created_at)

trial_pledge_conversion <-  tbl(con, in_schema("pledges","verified_pledge")) %>% 
  filter(pledge_type == "try_giving",
         year(upper(period)) < 2025, # Trial Pledges Completed prior to 2025
         between(year(lower(period)), 2014, 2023), # Start year after 2014
         )  %>% 
  mutate(trial_end_date = upper(period)) %>% 
  # Join 10% pledges by person_id
  left_join(active_10pct_pledges, by = "person_id") %>% 
  # Get conversion time in years
  mutate(
    conversion_time_years = (lower(period_10pct) - trial_end_date)/365.25,
    start_year = year(lower(period))
    ) %>% 
  collect()
  
trial_pledge_conversion %>% 
  summarise(
    # Number of trial pledges
    trial_pledges = n(),
    # Unique persons who took GWWC pledge after completion of a trial pledge, 
    # Taking unique persons prevents double counting those with multiple trial pledges
    conversions = n_distinct(person_id[!is.na(conversion_time_years)]),
    conversions_in_year = n_distinct(person_id[conversion_time_years <= 1]),
    .by = start_year
  ) %>% 
  # Add summary row on the bottom
  bind_rows(summarise(., start_year = 9999, across(where(is.numeric), sum))) %>% 
  arrange(start_year) %>% 
  # Get conversion rates
  mutate(
    conversion_rate_ever = conversions / trial_pledges,
    conversion_rate_within_year = conversions_in_year / trial_pledges
    ) %>% 
  # Format in table
  gt(caption = "Trial pledge conversion by year of Trial Pledge start") %>% 
  fmt_percent(contains("rate")) %>% 
  text_replace("9999", "Total")
Trial pledge conversion by year of Trial Pledge start
start_year trial_pledges conversions conversions_in_year conversion_rate_ever conversion_rate_within_year
2014 123 9 4 7.32% 3.25%
2015 286 23 14 8.04% 4.90%
2016 500 48 30 9.60% 6.00%
2017 370 26 11 7.03% 2.97%
2018 381 44 17 11.55% 4.46%
2019 331 41 31 12.39% 9.37%
2020 736 91 71 12.36% 9.65%
2021 915 93 72 10.16% 7.87%
2022 852 70 60 8.22% 7.04%
2023 398 45 42 11.31% 10.55%
Total 4892 490 352 10.02% 7.20%

Plot of pledge conversion for interest:

trial_pledge_conversion %>% 
  filter(
    !is.na(conversion_time_years), # Only successful conversions
    # Only conversion for the most recent trial pledge for the person
    conversion_time_years == min(conversion_time_years),
    .by = person_id) %>% 
  # Plot
  count(conversion_time_years) %>%
  arrange(conversion_time_years) %>% 
  mutate(cum_percent = cumsum(n)/sum(n)) %>% 
  ggplot(aes(x=conversion_time_years, y =cum_percent)) +
  geom_line() +
  labs(x = "Trial pledge conversion time (years since end of most recent trial pledge)",
       y = NULL,
       subtitle = "Cumulative proportion of all conversions") +
  ylim(0,1) 

Fraction of 2023 pledge donations from surveyed large donors

In order to determine how the recording adjustments from the PCV/PRAC survey and MPCV surveys should be weighted, we need to understand the relative contribution of both of these groups to total recorded donations.

Here I aim to estimate the proportion of donations that those surveyed in the major pledge counterfactual value survey contributed to total recorded pledge donations in 2023. I will exclude:

sample_pop <- tbl(con, in_schema("pledges","verified_active_pledge")) %>% 
  filter(
    pledge_type == "giving_what_we_can",
    between(year(lower(period)), 2009, 2022)
  )


giving_report_inf_adj %>% 
  # Only include verified pledges with an inner join
  inner_join(
    sample_pop,
    by = join_by(pledge_id == id)
    ) %>% 
  filter(
    year(donation_date) == 2023, # Include donations in 2023
    # Exclude very large pledgers with no GWWC influence
    !pledge_id %in% average_1M_pledgers$pledge_id
    ) %>% 
  left_join(
    # Join MPCV Sample to Identify these donors
    tbl(con, in_schema("impact_surveys","mpcv2025_sample")), 
    by = join_by(pledge_id)
  ) %>% 
  # Tag these as major pledge donors
  mutate(major_pledger_group = !is.na(`2023_top_10`)) %>% 
  # Get total donated by group
  summarise(pledge_donations_2023 = sum(amount_adj_norm),
            mean_donations = mean(amount_adj_norm),
            number_pledgers = n_distinct(pledge_id), .by = major_pledger_group) %>% 
  mutate(proportion_donations = pledge_donations_2023/sum(pledge_donations_2023)) %>% 
  collect() %>% 
  gt(caption = "Proportion of 2023 donations from the major pledger group (excluding those with >$1 M donations per year)") %>% 
  fmt_currency(2:3, suffixing = T) %>% 
  fmt_percent(5)
Proportion of 2023 donations from the major pledger group (excluding those with >$1 M donations per year)
major_pledger_group pledge_donations_2023 mean_donations number_pledgers proportion_donations
FALSE $22.31M $274.74 2826 78.30%
TRUE $6.18M $16.44K 18 21.70%

Average age at pledge

Here I want to identify the average age of pledgers whose pledges commenced in 2023 and 2024. First I will import age at pledging for all verified pledgers. Because a small minority of pledger ages seem unreliable (e.g., 0 or 115), I will filter to just include those between 16 and 100 years of age at pledging. I am most interested in 10% Pledgers, but I will also analyse Trial Pledgers just for interest.

pledge_age_by_cohort_both <- tbl(con, in_schema("pledges", "verified_pledge")) %>% 
  left_join(
    tbl(con, in_schema("people", "person")),
    by = join_by(person_id == id)
  ) %>% 
  mutate(pledge_start = lower(period), 
         pledge_cohort = year(pledge_start)) %>% 
  filter(between(pledge_cohort,2009,2024)) %>% 
  mutate(
    age_at_pledge = sql("(pledge_start - birth_date) / 365.25")
  ) %>% 
  select(age_at_pledge, pledge_cohort, pledge_type) %>% 
  collect() %>% 
  # Recode pledgers younger than 15 or older than 100
  mutate(age_at_pledge = case_when(between(age_at_pledge, 16,100) ~ age_at_pledge))

10% Pledge

pledge_age_by_cohort <- pledge_age_by_cohort_both %>% 
  filter(pledge_type == "giving_what_we_can")

First let’s visualise age at pledging as a histogram (all pledges):

pledge_age_by_cohort %>% 
  filter(!is.na(age_at_pledge)) %>% 
  ggplot(aes(age_at_pledge, fill = pledge_type)) +
  geom_histogram(binwidth = 2.5,boundary = 0, fill = "#e86f2b", colour = "black") +
  labs(
    y = NULL,
    subtitle = "Count",
    x = "Age at pledge (Years)"
  ) +
  scale_y_continuous(expand = expansion(c(0,0.05))) +
  scale_x_continuous(breaks = \(x) seq(0,x[2], 5))

Across all 10% pledgers, the mean age at pledge is 28.9007951 years.

Next, we will estimate the average age at pledging by pledger cohort:

pledge_age_summary <- pledge_age_by_cohort %>% 
  summarise(
    number_pledges = n(),
    number_age_provided = sum(!is.na(age_at_pledge)),
    percent_age_provided = number_age_provided/number_pledges,
    mean_age = mean(age_at_pledge, na.rm = T),
    median_age = median(age_at_pledge, na.rm = T),
    .by = pledge_cohort) %>% 
  arrange(pledge_cohort)

pledge_age_summary %>% 
  ggplot(aes(x = pledge_cohort)) +
  geom_line(aes(y = mean_age, colour = "Mean"), linewidth = 1) +
  geom_line(aes(y = median_age, colour = "Median"), linewidth = 1) +
  labs(
    subtitle = "Age (years)",
    y = NULL,
    x = "Year pledged",
    colour = "Statistic"
  ) +
  scale_y_continuous(limits = c(0,NA), breaks = seq(0,35,5))+
  scale_x_continuous(breaks = \(x) 2009:x[2])

pledge_age_summary %>% 
  gt(caption = "Pledge age statistics by 10% Pledge start year") %>% 
  cols_label_with(fn = ~str_to_sentence(str_replace_all(.x,"_"," "))) %>% 
  fmt_percent(contains("percent")) %>% 
  fmt_number(ends_with("_age"))
Pledge age statistics by 10% Pledge start year
Pledge cohort Number pledges Number age provided Percent age provided Mean age Median age
2009 33 24 72.73% 32.49 28.99
2010 37 35 94.59% 28.11 25.60
2011 98 89 90.82% 29.91 24.74
2012 102 78 76.47% 30.08 24.88
2013 117 103 88.03% 27.59 24.19
2014 399 387 96.99% 26.03 23.66
2015 687 678 98.69% 26.65 24.51
2016 979 965 98.57% 27.07 24.46
2017 920 893 97.07% 27.49 25.15
2018 618 587 94.98% 26.41 25.03
2019 533 528 99.06% 27.24 26.04
2020 1043 1032 98.95% 30.20 27.78
2021 1231 1216 98.78% 30.66 27.92
2022 1520 1444 95.00% 31.04 28.64
2023 584 578 98.97% 29.64 26.75
2024 790 711 90.00% 30.15 27.41

Finally, lets get the mean age of pledgers who pledged in 2023 and 2024:

pledge_age_by_cohort %>% 
  filter(pledge_cohort %in% c(2023,2024)) %>% 
  summarise(
    number_pledges = n(),
    number_age_provided = sum(!is.na(age_at_pledge)),
    percent_age_provided = number_age_provided/number_pledges,
    mean_age = mean(age_at_pledge, na.rm = T),
    median_age = median(age_at_pledge, na.rm = T)
    ) %>% 
  gt(
    caption = "Pledge age statistics for 10% Pledges starting in 2023 and 2024"
  )%>% 
  cols_label_with(fn = ~str_to_sentence(str_replace_all(.x,"_"," "))) %>% 
  fmt_percent(contains("percent")) %>% 
  fmt_number(ends_with("_age"))
Pledge age statistics for 10% Pledges starting in 2023 and 2024
Number pledges Number age provided Percent age provided Mean age Median age
1374 1289 93.81% 29.92 27.16

Trial pledge

pledge_age_by_cohort_trial <- pledge_age_by_cohort_both %>% 
  filter(pledge_type == "try_giving", between(pledge_cohort,2014,2024)) 
pledge_age_by_cohort_trial %>% 
  filter(!is.na(age_at_pledge)) %>% 
  ggplot(aes(age_at_pledge, fill = pledge_type)) +
  geom_histogram(binwidth = 2.5,boundary = 0, fill = "#0074ba", colour = "black") +
  labs(
    y = NULL,
    subtitle = "Count",
    x = "Age at pledge (Years)"
  ) +
  scale_y_continuous(expand = expansion(c(0,0.05)))+
  scale_x_continuous(breaks = \(x) seq(0,x[2], 5))

Across all Trial pledgers, the mean age at pledge is 30.8355689 years.

Next, we will estimate the average age at pledging by pledger cohort:

pledge_age_summary_trial <- pledge_age_by_cohort_trial %>% 
  summarise(
    number_pledges = n(),
    number_age_provided = sum(!is.na(age_at_pledge)),
    percent_age_provided = number_age_provided/number_pledges,
    mean_age = mean(age_at_pledge, na.rm = T),
    median_age = median(age_at_pledge, na.rm = T),
    .by = pledge_cohort) %>% 
  arrange(pledge_cohort)

pledge_age_summary_trial %>% 
  ggplot(aes(x = pledge_cohort)) +
  geom_line(aes(y = mean_age, colour = "Mean"), linewidth = 1) +
  geom_line(aes(y = median_age, colour = "Median"), linewidth = 1) +
  labs(
    subtitle = "Age (years)",
    y = NULL,
    x = "Year pledged",
    colour = "Statistic"
  ) +
  scale_y_continuous(limits = c(0,NA), breaks = seq(0,35,5))+
  scale_x_continuous(breaks = \(x) 2009:x[2])

pledge_age_summary_trial %>% 
  gt(caption = "Pledge age statistics by Trial Pledge start year") %>% 
  cols_label_with(fn = ~str_to_sentence(str_replace_all(.x,"_"," "))) %>% 
  fmt_percent(contains("percent")) %>% 
  fmt_number(ends_with("_age"))
Pledge age statistics by Trial Pledge start year
Pledge cohort Number pledges Number age provided Percent age provided Mean age Median age
2014 125 104 83.20% 28.43 25.69
2015 292 290 99.32% 29.13 26.79
2016 506 503 99.41% 28.43 26.19
2017 377 368 97.61% 28.52 26.78
2018 383 367 95.82% 27.98 26.32
2019 335 329 98.21% 29.43 27.56
2020 783 780 99.62% 31.28 29.45
2021 998 995 99.70% 31.73 29.59
2022 978 943 96.42% 33.19 30.67
2023 481 464 96.47% 31.38 29.26
2024 781 709 90.78% 31.68 29.25

Average annual 10% pledge donations

Here, I aim to model how average annual donations of 10% pledgers change over time. In our previous evaluation, we concluded from the available data that there was no decay in average pledge donations over time. Here we seek to re-investigate this.

First let’s create a dataframe with one row for each full year that has been experienced by pledgers who pledged between 2009 and 2023. We will only include up to pledge years that started in 2023. As usual, we will exclude pledgers whose average annual recorded donations exceed $1M

# Identify pledges we want to include
filtered_pledges <- tbl(con, in_schema("pledges", "verified_active_pledge")) %>% 
  mutate(
     start_date = lower(period),
    pledge_cohort = year(start_date)
        ) %>% 
  filter(
    pledge_type == "giving_what_we_can", # 10% Pledges
    between(pledge_cohort,2009, 2023),
    !id %in% average_1M_pledgers$pledge_id
  ) 

# Get donations by pledge year for each of these pledges
pledger_donations_by_year_since_pledge <- filtered_pledges %>% 
  inner_join(giving_report_inf_adj, by = join_by(id == pledge_id)) %>% 
  filter(donation_date >=start_date) %>% 
  mutate(years_since_pledge = date_part("year", age(donation_date, start_date))) %>% 
  summarise(
    amount_adj_norm = sum(amount_adj_norm, na.rm = T),
    .by = c(years_since_pledge, id)
  ) %>% 
  collect()


# Create a dataframe that includes all years for all pledgers and join the yearly donations
pledge_years <- filtered_pledges %>% 
  select(id, pledge_cohort) %>% 
  collect() %>% 
  reframe(years_since_pledge = 0:(2023-pledge_cohort),.by = c(id, pledge_cohort)) %>% 
  left_join(pledger_donations_by_year_since_pledge, by = join_by(id, years_since_pledge)) %>%
  # Where no donations are recorded for a pledger in a year, set the amount to 0
  mutate(amount_adj_norm = coalesce(amount_adj_norm,0))

Next, let’s report average donations by pledge cohort and years since pledging. First I’ll create a table with this raw data, for reference:

donations_by_year <- pledge_years %>% 
  summarise(
    average_donations_all = mean(amount_adj_norm),
    average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0]),
    num_pledgers = n(),
    num_any_donations_recorded = sum(amount_adj_norm > 0),
    percent_any_donations_recorded = num_any_donations_recorded/num_pledgers,
    .by = c(pledge_cohort, years_since_pledge)
    ) %>% 
  arrange(pledge_cohort, years_since_pledge)


donations_by_year %>% 
  gt() %>% 
  cols_label_with(fn=\(x) str_to_sentence(str_replace_all(x,"_"," "))) %>% 
  fmt_percent(percent_any_donations_recorded) %>% 
  fmt_currency(starts_with("average_donations"), suffixing = T) %>% 
  cols_label_with(starts_with("average_donations"), ~ str_c(.x," (2024 USD)"))
Pledge cohort Years since pledge Average donations all (2024 USD) Average donations recorded (2024 USD) Num pledgers Num any donations recorded Percent any donations recorded
2009 0 $5.77K $17.30K 33 11 33.33%
2009 1 $7.11K $21.32K 33 11 33.33%
2009 2 $13.82K $20.73K 33 22 66.67%
2009 3 $22.88K $34.32K 33 22 66.67%
2009 4 $14.82K $25.74K 33 19 57.58%
2009 5 $23.94K $41.59K 33 19 57.58%
2009 6 $31.07K $51.26K 33 20 60.61%
2009 7 $21.69K $55.05K 33 13 39.39%
2009 8 $22.81K $53.77K 33 14 42.42%
2009 9 $23.06K $54.36K 33 14 42.42%
2009 10 $31.18K $68.59K 33 15 45.45%
2009 11 $24.93K $68.55K 33 12 36.36%
2009 12 $24.12K $56.86K 33 14 42.42%
2009 13 $18.69K $56.06K 33 11 33.33%
2009 14 $15.17K $71.51K 33 7 21.21%
2010 0 $2.55K $11.49K 36 8 22.22%
2010 1 $6.40K $10.47K 36 22 61.11%
2010 2 $5.03K $7.87K 36 23 63.89%
2010 3 $4.10K $7.03K 36 21 58.33%
2010 4 $3.30K $6.98K 36 17 47.22%
2010 5 $2.75K $6.20K 36 16 44.44%
2010 6 $5.83K $17.48K 36 12 33.33%
2010 7 $2.39K $8.60K 36 10 27.78%
2010 8 $2.64K $9.51K 36 10 27.78%
2010 9 $1.90K $6.23K 36 11 30.56%
2010 10 $2.33K $7.00K 36 12 33.33%
2010 11 $2.48K $7.43K 36 12 33.33%
2010 12 $1.71K $6.84K 36 9 25.00%
2010 13 $1.49K $5.35K 36 10 27.78%
2011 0 $4.50K $7.17K 94 59 62.77%
2011 1 $7.54K $11.82K 94 60 63.83%
2011 2 $9.85K $18.52K 94 50 53.19%
2011 3 $6.78K $12.50K 94 51 54.26%
2011 4 $6.38K $12.75K 94 47 50.00%
2011 5 $15.39K $46.66K 94 31 32.98%
2011 6 $5.47K $17.13K 94 30 31.91%
2011 7 $4.43K $14.89K 94 28 29.79%
2011 8 $7.07K $24.62K 94 27 28.72%
2011 9 $5.40K $20.32K 94 25 26.60%
2011 10 $13.43K $46.76K 94 27 28.72%
2011 11 $7.17K $25.91K 94 26 27.66%
2011 12 $12.77K $42.86K 94 28 29.79%
2012 0 $3.23K $6.66K 97 47 48.45%
2012 1 $10.70K $17.90K 97 58 59.79%
2012 2 $9.17K $15.07K 97 59 60.82%
2012 3 $11.02K $19.44K 97 55 56.70%
2012 4 $9.44K $20.81K 97 44 45.36%
2012 5 $13.43K $33.40K 97 39 40.21%
2012 6 $25.37K $61.53K 97 40 41.24%
2012 7 $21.70K $50.12K 97 42 43.30%
2012 8 $23.81K $55.00K 97 42 43.30%
2012 9 $77.53K $183.43K 97 41 42.27%
2012 10 $17.63K $43.85K 97 39 40.21%
2012 11 $6.49K $17.48K 97 36 37.11%
2013 0 $7.02K $11.88K 110 65 59.09%
2013 1 $8.72K $15.99K 110 60 54.55%
2013 2 $8.50K $19.07K 110 49 44.55%
2013 3 $9.89K $25.90K 110 42 38.18%
2013 4 $8.21K $22.58K 110 40 36.36%
2013 5 $2.55K $8.51K 110 33 30.00%
2013 6 $5.41K $16.08K 110 37 33.64%
2013 7 $5.81K $18.79K 110 34 30.91%
2013 8 $12.38K $38.92K 110 35 31.82%
2013 9 $3.40K $12.89K 110 29 26.36%
2013 10 $4.49K $15.93K 110 31 28.18%
2014 0 $12.60K $22.16K 394 224 56.85%
2014 1 $20.21K $48.55K 394 164 41.62%
2014 2 $12.78K $35.47K 394 142 36.04%
2014 3 $5.86K $17.90K 394 129 32.74%
2014 4 $7.48K $24.17K 394 122 30.96%
2014 5 $4.11K $13.83K 394 117 29.70%
2014 6 $4.26K $13.87K 394 121 30.71%
2014 7 $5.78K $19.64K 394 116 29.44%
2014 8 $5.90K $22.37K 394 104 26.40%
2014 9 $4.94K $18.71K 394 104 26.40%
2015 0 $3.73K $7.89K 673 318 47.25%
2015 1 $3.39K $9.39K 673 243 36.11%
2015 2 $4.33K $12.28K 673 237 35.22%
2015 3 $3.87K $11.57K 673 225 33.43%
2015 4 $4.61K $14.70K 673 211 31.35%
2015 5 $3.20K $10.87K 673 198 29.42%
2015 6 $3.63K $12.40K 673 197 29.27%
2015 7 $2.83K $10.03K 673 190 28.23%
2015 8 $3.16K $11.80K 673 180 26.75%
2016 0 $3.18K $6.61K 960 462 48.12%
2016 1 $2.99K $8.56K 960 335 34.90%
2016 2 $4.71K $13.91K 960 325 33.85%
2016 3 $7.72K $23.23K 960 319 33.23%
2016 4 $3.04K $9.82K 960 297 30.94%
2016 5 $3.13K $10.50K 960 286 29.79%
2016 6 $2.25K $7.76K 960 278 28.96%
2016 7 $2.86K $10.64K 960 258 26.88%
2017 0 $2.59K $5.58K 900 418 46.44%
2017 1 $2.19K $6.60K 900 299 33.22%
2017 2 $2.63K $8.05K 900 294 32.67%
2017 3 $2.82K $9.00K 900 282 31.33%
2017 4 $3.80K $12.78K 900 268 29.78%
2017 5 $3.17K $11.40K 900 250 27.78%
2017 6 $2.66K $10.35K 900 231 25.67%
2018 0 $2.69K $4.62K 605 352 58.18%
2018 1 $2.72K $6.64K 605 248 40.99%
2018 2 $3.70K $10.93K 605 205 33.88%
2018 3 $5.06K $16.47K 605 186 30.74%
2018 4 $2.39K $9.05K 605 160 26.45%
2018 5 $1.99K $8.49K 605 142 23.47%
2019 0 $5.09K $7.89K 521 336 64.49%
2019 1 $7.37K $16.33K 521 235 45.11%
2019 2 $3.34K $8.49K 521 205 39.35%
2019 3 $3.18K $9.80K 521 169 32.44%
2019 4 $2.71K $9.29K 521 152 29.17%
2020 0 $5.64K $7.90K 1018 727 71.41%
2020 1 $4.72K $9.32K 1018 516 50.69%
2020 2 $3.49K $8.33K 1018 426 41.85%
2020 3 $3.26K $8.91K 1018 372 36.54%
2021 0 $8.33K $11.72K 1207 858 71.09%
2021 1 $4.96K $9.62K 1207 622 51.53%
2021 2 $4.20K $9.66K 1207 525 43.50%
2022 0 $3.55K $5.65K 1468 922 62.81%
2022 1 $3.16K $7.27K 1468 637 43.39%
2023 0 $3.99K $6.35K 571 359 62.87%

In our previous evaluation, we attempted to determine the trend in average pledge donations over time by focusing on the overall average in pledge donations across all pledgers (including those who do not report). Here we attempt to separate out the average pledge donations among pledgers who record donations and the percentage of pledgers who report donations and model these separately.

Component 1: Average annual recorded donations among recorded pledgers

First we can plot how average donations trended over time for those pledgers who recorded donations. We will separae pledgers into groups of cohorts based on when they started their pledge:

slice_cohorts <- function(x, cut_years = c(2008,2016,2019,2023)) {
  
  upper_bounds <- tail(cut_years, -1)
  lower_bounds <- head(cut_years, -1)+1
  
  cut_labels <- sapply(1:length(upper_bounds), \(i) str_c(lower_bounds[i], "–", upper_bounds[i]))

  cut(x, cut_years, cut_labels)
  }


donations_by_year_cohort <- pledge_years %>% 
  mutate(pledge_cohort_group = slice_cohorts(pledge_cohort, c(2008,2012,2016,2019,2023))) %>% 
  bind_rows(pledge_years) %>% 
  mutate(pledge_cohort_group = fct_na_value_to_level(pledge_cohort_group, "All")) %>% 
   summarise(
    average_donations_all = mean(amount_adj_norm),
    average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0]),
    num_pledgers = n(),
    num_any_donations_recorded = sum(amount_adj_norm > 0),
    percent_any_donations_recorded = num_any_donations_recorded/num_pledgers,
    .by = c(years_since_pledge,pledge_cohort_group)
    )
donations_by_year_cohort %>% 
  ggplot(aes(x = years_since_pledge, y = average_donations_recorded, colour = pledge_cohort_group)) +
  geom_line()+
  geom_point() +
  geom_line() +
  scale_y_continuous(
    limits = c(0,NA), 
    labels = scales::label_currency(scale = 1e-3, suffix = "K"),
    breaks = \(x) seq(0,x[2],5e3)) +
  labs(x = "Years since pledge", y = NULL, 
       
       subtitle = "Average annual donations among those who recorded any [2024 USD]",
       colour = "Cohorts")

The 2009 to 2012 cohorts are based on much smaller numbers and see much more dramatic swings. They don’t seem very representative of the later cohorts. Let’s look at this plot again, without this group:

donations_by_year_cohort %>% 
  filter(!pledge_cohort_group %in% c("2009–2012", "All")) %>% 
  ggplot(aes(x = years_since_pledge, y = average_donations_recorded, colour = pledge_cohort_group)) +
  geom_line()+
  geom_point() +
  geom_line() +
  scale_y_continuous(
    limits = c(0,NA), 
    labels = scales::label_currency(scale = 1e-3, suffix = "K"),
    breaks = \(x) seq(0,x[2],5e3)) +
  labs(x = "Years since pledge", y = NULL, 
       subtitle = "Average annual donations among those who recorded any [2024 USD]",
       colour = "Cohorts") +
  scale_x_continuous(breaks = \(x) seq(0,x[2],1))

Looking at this, it appears there is was an initial increase in average annual donations among pledgers who record donations for the 2013–2016 group and the 2017–2019 group. This increase was less noticeable for the 2020–2023 group of cohorts, which are likely the most representative of the current generation of pledgers.

Let’s look at the more recent cohorts on a cohort level rather than in aggregate:

donations_by_year <- pledge_years %>% 
   summarise(
    average_donations_all = mean(amount_adj_norm),
    average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0]),
    num_pledgers = n(),
    num_any_donations_recorded = sum(amount_adj_norm > 0),
    percent_any_donations_recorded = num_any_donations_recorded/num_pledgers,
    .by = c(years_since_pledge,pledge_cohort)
    )
donations_by_year%>% 
  filter(pledge_cohort >= 2017) %>% 
  mutate(pledge_cohort_group = slice_cohorts(pledge_cohort, c(2008,2012,2016,2019,2023))) %>% 

  ggplot(aes(x = years_since_pledge, y = average_donations_recorded, 
             colour = pledge_cohort_group, group = factor(pledge_cohort))) +
  geom_line()+
  geom_point() +
  geom_line() +
  scale_y_continuous(
    limits = c(0,NA), 
    labels = scales::label_currency(scale = 1e-3, suffix = "K"),
    breaks = \(x) seq(0,x[2],5e3)) +
  scale_x_continuous(breaks = \(x) seq(0,x[2],1))+
  labs(x = "Years since pledge", y = NULL, 
       subtitle = "Average annual donations among those who recorded any [2024 USD]",
       colour = "Cohort group")

Eyeballing these results, average donations among recording pledgers for the most recent cohorts do seem notably more stable than they did for the 2017 to 2019 cohorts. Based on these results, I will assume that average donations among recording donations are relatively stable for reporting donors. As such, I will model the average amount that recording pledgers give annually as a constant. I will calculate this constant as the average the annual donations of recording active 10% pledgers who pledged between 2020 and 2023.

pledge_years %>% 
  filter(between(pledge_cohort,2020,2023)) %>% 
   summarise(
    average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0])
    ) %>% 
  gt(caption = "Average annual donations among recording pledgers (pledge cohorts 2020–2023)") %>% 
  cols_label(1 ~ "Average annual donations (2024 USD)") %>% 
  fmt_currency()
Average annual donations among recording pledgers (pledge cohorts 2020–2023)
Average annual donations (2024 USD)
$8,490.77

Component 2: Proportion of pledgers recording donations

The second component of pledge value over time is the proportion of pledgers for whom GWWC is recording donations in any given year. To get an idea of what this looks like, first we will plot the proportion of pledgers in each cohort group who reported a donation by the years since they pledged.

donations_by_year %>% 
  ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded, colour = factor(pledge_cohort))) +
  geom_line()+
  geom_point() +
  geom_line() +
  scale_y_continuous(
    limits = c(0,NA), 
    labels = scales::label_percent(),
    breaks = \(x) seq(0,x[2],0.1)) +
  scale_x_continuous(breaks = \(x) seq(0,x[2], 1)) +
  labs(x = "Years since pledge", y = NULL, 
       subtitle = "Percentage of pledgers recording donations",
       colour = "Cohorts")

When we include all cohorts, the plot is messy and it is difficult to identify a consistent trend. But when we separate pledgers who pledged prior to 2012, a relatively consistent trend emerges across the more recent cohorts:

donations_by_year %>% 
  mutate(pledge_batch = if_else(pledge_cohort >=2013, "2013–2023", "2009–2012")) %>% 
  ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded, 
             colour  = pledge_batch, group = factor(pledge_cohort))) +
  geom_line()+
  geom_point() +
  geom_line() +
  scale_y_continuous(
    limits = c(0,NA), 
    labels = scales::label_percent(),
    breaks = \(x) seq(0,x[2],0.1)) +
  labs(x = "Years since pledge", y = NULL, 
       
       subtitle = "Percentage of pledgers recording donations",
       colour = "Cohorts")

In this trend, about 50% or more of pledgers are reporting in their first year, but this quickly drops off at a slowing rate. possibly stabilising around 25% for most cohorts.

This is even seen more clearly when we plot by the cohort groups that we used earlier:

donations_by_year_cohort %>% 
  filter(!pledge_cohort_group %in% c("All", "2009–2012")) %>% 
  ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded, colour = pledge_cohort_group)) +
  geom_line()+
  geom_point() +
  geom_line() +
  scale_y_continuous(
    limits = c(0,NA), 
    labels = scales::label_percent(),
    breaks = \(x) seq(0,x[2],0.1)) +
  scale_x_continuous(breaks = \(x) seq(0,x[2], 1)) +
  labs(x = "Years since pledge", y = NULL, 
       subtitle = "Percentage of pledgers recording donations",
       colour = "Cohorts")

This looks like a trend that we can model as part of our estimation of pledge lifetime value.

The trends between the different cohorts are sufficiently similar for me to feel comfortable aggregating the cohorts when modelling the decay, which will provide us with more years of data to work with.

First, let’s plot the percentage of pledges reporting donations over time across all of these cohorts.

gwwc_colours <- list(yellow = "#E86F2B", orange = "#CC4115", red = "#BA2934", pink = "#DA3552", purple = "#BA175B")


recent_pledges <- pledge_years %>% 
  filter(pledge_cohort >=2013) %>% 
   summarise(
    average_donations_all = mean(amount_adj_norm),
    average_donations_recorded = mean(amount_adj_norm[amount_adj_norm>0]),
    num_pledgers = n(),
    num_any_donations_recorded = sum(amount_adj_norm > 0),
    percent_any_donations_recorded = num_any_donations_recorded/num_pledgers,
    .by = c(years_since_pledge)
    )

y_axis2_scaling = 5e-5

recent_pledges %>% 
  ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded)) +
  geom_col(aes(y = num_pledgers*y_axis2_scaling, 
               fill = "Number of pledgers who experienced year")) +
  geom_point(aes(colour = "Percentage of pledgers reporting")) +
  geom_line(aes(colour = "Percentage of pledgers reporting"), linewidth = 1) +
  scale_y_continuous(
    limits = c(0,NA), 
    labels = scales::label_percent(),
    breaks = seq(0,1,0.1), 
    sec.axis = sec_axis(
      transform = ~.x/y_axis2_scaling,
      labels = scales::label_number(),
      breaks = \(x) seq(0,x[2],1e3),
      name = "Number"
  )) +
  scale_x_continuous(breaks = \(x) seq(0,x[2], 1)) +
  scale_colour_manual(
    values = c(
      "Number of pledgers who experienced year" = gwwc_colours$yellow,
      "Percentage of pledgers reporting" = gwwc_colours$red
      )
    ,aesthetics = c("colour", "fill")) +
  labs(
    title = "Percentage of pledges with recorded donations by year since pledging",
    subtitle = "Pledges started in 2013 or later",
    x = "Years since pledge", 
    y = "Percentage", 
    colour = NULL,
    fill = NULL
    )

Now, from this data, let’s model the change in the percentage who donated over time. From the data we have to-date, it looks like the donations could follow an exponential decay function towards a floor. Let’s fit a model to this data using a nonlinear least square method, weighting each year by the number of pledgers:

# Choose start values
start_vals <- list(y0 = 0.6, r = 0.5, c = 0.2)

# Model average donations as an exponential decay with a floor, using years since pledge as predictor
nls_model <- nls(
  percent_any_donations_recorded ~ c + (y0-c) * (1-r) ^ years_since_pledge,
  data = recent_pledges,
  weights = num_pledgers,
  start = start_vals
)

Here are the summary statistics for this model:

# Summarise model
summary(nls_model)
## 
## Formula: percent_any_donations_recorded ~ c + (y0 - c) * (1 - r)^years_since_pledge
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## y0 0.594388   0.007593   78.28 7.90e-13 ***
## r  0.479817   0.027550   17.42 1.20e-07 ***
## c  0.277945   0.007635   36.41 3.55e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7102 on 8 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 8.452e-06
# Get coefficients
coefs <- coef(nls_model)

Results:

  • y0 ≈ 59.4%: our estimated initial % of pledgers recording donations
  • r ≈ 0.48: an effective annual decay rate of ~48% in the non-floor part
  • c ≈ 27.8%: long-run asymptote (your floor)

All three of these parameters are highly statistically significant.

If we plot our residuals, all are relatively small in magnitude and they seem reasonably balanced around 0

# Plot residuals of the model
plot(recent_pledges$years_since_pledge, resid(nls_model),
     main = "Residuals vs Years", xlab = "Years Since Pledge", ylab = "Residuals",
     pch = 16)
abline(h = 0, col = "red")

Let’s plot the predicted values from this model against our observed values from the pledge data we have. Let’s plot the predicted values all the way out to 35 years:

# Predict donations over time


predicted_data <- tibble(
  years_since_pledge = seq(0, 35, length.out = 100),
  ) %>% 
  mutate(predicted_percentage_donating = coefs["c"] + (coefs["y0"]-coefs["c"]) * (1-coefs["r"]) ^ years_since_pledge)
recent_pledges %>% 
  ggplot(aes(x = years_since_pledge, y = percent_any_donations_recorded)) +
  # Plot predicted
  geom_line(
    data = predicted_data, 
    aes(y = predicted_percentage_donating, colour = "Predicted"),
    linewidth = 1) +
  
  # Plot observed
  geom_point(aes(colour = "Observed [2013 to 2023 pledgers]")) +

  # Format plot
  scale_y_continuous(
    limits = c(0,NA), 
    labels = scales::label_percent(),
    breaks = seq(0,1,0.1)
    ) +
  scale_x_continuous(breaks = \(x) seq(0,x[2], 5)) +
  scale_colour_manual(
    values = c(
      "Predicted" = gwwc_colours$yellow,
      "Observed [2013 to 2023 pledgers]" = gwwc_colours$red
      )
    ,aesthetics = c("colour", "fill")) +
  labs(
    title = "Modelling percentage of pledges with recorded donations by year since pledging",
    subtitle = "Pledges started in 2013 or later",
    x = "Years since pledge", 
    y = "Percentage pledgers recording donations", 
    colour = NULL,
    fill = NULL
    )

We can’t be certain that this pattern will hold — it is entirely possible that the % of reporting pledgers will drop again after a certain number of years. However, we also think it is possible that the average donations of recording pledgers could go up for recent pledgers, as has been observed for past pledgers. Rather than trying to deal with these competing considerations, we are assuming that these uncertainties approximately cancel out.

Thus, our the inputs we will use for modelling pledge value over time are:

tibble(
  Names = c("Estimated % of 10% Pledgers recording donations in their first year",
            "Estimated annual decay rate in % of 10% Pledgers recording donations",
            "Estimated floor in % of 10% Pledgers recording donations"),
  Coefficients = coefs 
) %>% 
  gt(caption = "Parameters for predicting percentage of 10% Pledgers recording donations by year since pledging") %>% 
  fmt_percent()
Parameters for predicting percentage of 10% Pledgers recording donations by year since pledging
Names Coefficients
Estimated % of 10% Pledgers recording donations in their first year 59.44%
Estimated annual decay rate in % of 10% Pledgers recording donations 47.98%
Estimated floor in % of 10% Pledgers recording donations 27.79%
knitr::knit_exit()