I will suggest to use district and state names instead of codes and do a fuzzy matching. This is because in some instances DHS codes are different from census codes.
I do this in the following code for the DHS 2015-16 round and Census 2011. It leads to 95 percent correct matches. After that you can check manually.
rm(list=ls())
gc()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
library(tidyverse)
#get the dhs file
dhs = foreign::read.dta(file = '../Output/dhs_india_p7_dist_names.dta') %>%
mutate(state_name = tolower(state_name)) %>%
mutate(district_name = tolower(district_name)) %>%
mutate(state_name = gsub('&', '', state_name)) %>%
mutate(state_name = gsub('telangana', 'andhra pradesh', state_name)) %>%
mutate(district_name = gsub('&', '', district_name)) %>%
mutate(name_dhs = paste(state_name, district_name))
#get the india census 2011 file which contains historical population density
dcen = foreign::read.dta(file='../Output/population_density/popdens_with_attributes_1951_2011.dta') %>%
mutate(district11 = tolower(district11)) %>%
mutate(state11 = tolower(state11)) %>%
mutate(district11 = gsub('&', 'and', district11)) %>%
mutate(state11 = gsub('&', 'and', state11)) %>%
mutate(state11 = gsub('tamilnadu', 'tamil nadu', state11)) %>%
select(state11, district11, district_code11, popdens11 ) %>%
filter(state11 != "na") %>%
mutate(name_cen = paste(state11, district11))
dmatch = NULL
for (nr in 1:nrow(dhs)){
dist = matrix(0,nrow=1,ncol=nrow(dcen))
for (nc in 1:nrow(dcen)){
dist[nc] = stringdist::stringdist(dhs$name_dhs[nr], dcen$name_cen[nc], method = 'jw')
}
index = order(dist)[1:3]
dmatch = rbind(dmatch,
c(dhs$v024[nr], dhs$sdistri[nr],dhs$name_dhs[nr],
dcen$district_code11[index[1]], dcen$name_cen[index[1]], dcen$popdens11[index[1]],
dcen$district_code11[index[2]], dcen$name_cen[index[2]], dcen$popdens11[index[2]],
dcen$district_code11[index[3]], dcen$name_cen[index[3]], dcen$popdens11[index[3]],
dist[index[1]], dist[index[2]], dist[index[3]]))
} #end for nr
dmatch = as.data.frame(dmatch)
colnames(dmatch)= c('v024', 'sdistri', 'name_dhs',
'cen11_code1', 'cen11_name1', 'cen11_popdens1',
'cen11_code2', 'cen11_name2', 'cen11_popdens2',
'cen11_code3', 'cen11_name3', 'cen11_popdens3',
'dist1', 'dist2', 'dist3')
foreign::write.dta(dmatch, file ='../Output/matched_dhs_name_census_name.dta')