I believe I was able to figure this out. There is no PERNUM variable in household-level data extracts, but by ensuring there were no duplicate SERIAL values I was able to get a count within the ACS5 estimate ± the margin of error. See below:
library(ipumsr)
library(dplyr)
ddi <- ""
ipums_path <- ""
ipums_data <- read_ipums_micro(ddi = ddi,
data_file = ipums_path)
IL_hh_pums <- ipums_data %>%
filter(GQ == 1, STATEFIP == 17) %>%
distinct(SERIAL, .keep_all = T) %>%
group_by(STATEFIP) %>%
summarize(households = sum(HHWT))
IL_hh_acs5 <- tidycensus::get_acs(
survey = "acs5",
year = 2019,
geography = "state",
state = "IL",
variables = c("households" = "B11012_001"),
output = "wide"
)
IL_hh_pums # 4,844,000
IL_hh_acs5 # 4,846,134; moe 10,459