# Supplementary code: produces some of the results in the RDU Methods and Quality Report: # "Which differences in ethnic group data are real?" # This is illustrative code to replicate the figures in the report install.packages("here") install.packages("dplyr") install.packages("survey") install.packages("DescTools") # Load up some packages that are used later. library(dplyr) library(here) library(survey) library(DescTools) # Overrides scientific notation so numbers produced later are in double format. options(scipen = 999) # Data is 2017/18 from the Crime Survey for England and Wales, received under end-user licence from the UK Data Service # Import the data. Uses here() package, so the raw data should be stored in the same file. Changed .tab file from UK Data Service download to .txt. csew1718 <- read.delim(here("csew_apr17mar18_nvf.txt"),header=TRUE,sep="\t") # Select a subset of the columns needed: # rowlabel <- identifier # nsethgrp <- harmonised 5+1 ethnic group # delibvio <- If anyone has deliberately used force/violence on adult respondent (1= Yes, 2 = No) # C11IndivWgt <- weight variable # Put these into a subset of data for analysis. Just one year of data so doesn't include year variable. csew_summary <- csew1718 %>% select(rowlabel,nsethgrp,delibvio,C11IndivWgt) %>% # Select out some columns filter(nsethgrp >=0 & delibvio <=2 ) %>% # Remove NAs for the purpose of this analysis group_by(nsethgrp,delibvio) %>% # Group by two variables summarise(sum = sum(C11IndivWgt),count=n()) %>% # Do some aggregations to look at frequencies, both weighted and unweighted rename(ethnicity = nsethgrp,violence = delibvio,weighted_total = sum, unweighted_count = count) %>% # Rename some columns mutate(ethnicity = as.character(ethnicity),violence = as.character(violence)) %>% # Change character types of columns so they rename below mutate(ethnicity=dplyr::recode(ethnicity, '1' = 'White', '2' = 'Mixed', '3' = 'Asian', '4' = 'Black', '5' = 'Other')) %>% mutate(violence=dplyr::recode(violence, '1' = 'Yes', '2' = 'No')) csew_final <- csew_summary %>% group_by(ethnicity) %>% # Group by two variable summarise(ethnic_group_weighted_total = sum(weighted_total),ethnic_group_unweighted_total=sum(unweighted_count)) %>% # calculate totals left_join(csew_summary, csew_final,by = "ethnicity") %>% # join tables together mutate(percent_violence = weighted_total/ethnic_group_weighted_total*100) %>% # calculate percentages select(ethnicity,violence,percent_violence,unweighted_count,ethnic_group_unweighted_total) %>% # tidy up some of the columns filter(violence == "Yes") # Filters out the 'No" responses for tidiness. ####### Calculations for Table 1 and Figure 2 using the R Survey package ########## csew_subset <- csew1718 %>% select(ONSpsuid,fin_stra4,rowlabel,nsethgrp,delibvio,C11IndivWgt) # Subset the data again for use in the survey package # Set up the survey design mydesign <- svydesign( id = ~ONSpsuid, data = csew_subset , weight = ~ C11IndivWgt , strata = ~ fin_stra4 ) # Need to subset the population to calculate the percentages and the confidence intervals # Use the subset command to maintain the original survey design as described here: https://r-survey.r-forge.r-project.org/survey/html/subset.survey.design.html mydesign_white <- subset( mydesign , nsethgrp == 1,delibvio<8) mydesign_mixed <- subset( mydesign , nsethgrp == 2,delibvio<8) mydesign_asian <- subset( mydesign , nsethgrp == 3,delibvio<8) mydesign_black <- subset( mydesign , nsethgrp == 4,delibvio<8) mydesign_other <- subset( mydesign , nsethgrp == 5,delibvio<8) # Output estimates and confidence intervals white_mean <- svymean(~delibvio==1,mydesign_white,decimals = 5)*100 mixed_mean <- svymean(~delibvio==1,mydesign_mixed,decimals = 5)*100 asian_mean <- svymean(~delibvio==1,mydesign_asian,decimals = 5)*100 black_mean <- svymean(~delibvio==1,mydesign_black,decimals = 5)*100 other_mean <- svymean(~delibvio==1,mydesign_other,decimals = 5)*100 white_ci <- confint(svymean(~delibvio==1,mydesign_white,decimals = 5))*100 mixed_ci <- confint(svymean(~delibvio==1,mydesign_mixed,decimals = 5))*100 asian_ci <- confint(svymean(~delibvio==1,mydesign_asian,decimals = 5))*100 black_ci <- confint(svymean(~delibvio==1,mydesign_black,decimals = 5))*100 other_ci <- confint(svymean(~delibvio==1,mydesign_other,decimals = 5))*100 ######## Code for Wilson score intervals ########## # BinomCI function in "DescTools" calculates Wilson Score intervals with continuity correction # Joins to original table to get full set of figures csew_confidence_intervals <- cbind(csew_final,BinomCI((csew_final$percent_violence/100)*csew_final$ethnic_group_unweighted_total, csew_final$ethnic_group_unweighted_total, conf.level = 0.95, sides = c("two.sided"), method = c("wilsoncc"),rand = 123, tol = 1e-05)) %>% rename(lower_bound = lwr.ci) %>% rename(upper_bound = upr.ci) %>% select(-est) %>% mutate(lower_bound = lower_bound * 100,upper_bound = upper_bound *100) # Calculate the difference and check significance of the difference between Asian group and all the other groups. # Code uses the Binomial difference Confidence Intervals in DescTools package asian_percent <- csew_final %>% filter(ethnicity == "Asian") %>% select(percent_violence) # Get the Asian estimate asian_n <- csew_final %>% filter(ethnicity == "Asian") %>% select(ethnic_group_unweighted_total) # Get the Asian sample size csew_difference_asian <- as.data.frame(BinomDiffCI((csew_final$percent_violence/100)*csew_final$ethnic_group_unweighted_total,csew_final$ethnic_group_unweighted_total, (asian_percent$percent_violence/100)*asian_n$ethnic_group_unweighted_total, asian_n$ethnic_group_unweighted_total, conf.level = 0.95, sides = c("two.sided"),method = c("scorecc"))) %>% # calculate differences and confidence intervals rename(lower_bound = lwr.ci, upper_bound = upr.ci, difference_percent_violence = est) %>% # some renaming mutate(difference_percent_violence = difference_percent_violence * 100,lower_bound = lower_bound * 100,upper_bound = upper_bound *100) # convert to percentages row.names(csew_difference_asian) <- c("Asian","Black","Mixed","Other","White") # Add row names for clarity