P2D6: Open programming time
The age variable
[1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1126, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008, 4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, 4026, 4027, 5001, 5002, 5003, 5004, 5005, 5006, 5007, 5008, 5009, 5010, 5011, 5012, 5013, 5014, 5015, 5016, 5017, 5018, 5019, 5020, 5021, 5022, 5023, 5999, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, 6011, 6012, 6013, 6014, 6015, 6016, 6017, 6018, 6019, 6020, 6021, 6022, 6023, 6024, 6025, 6026, 6027, 6028, 6029, 6030, 6031, 6032, 6033, 6034, 6035, 6036, 6037, 6038, 6039, 6040, 6041, 6042, 6043, 6044, 6045, 6046, 6047, 6048, 6049, 6050, 6051, 6052, 6053, 6054, 6055, 6056, 6057, 6058, 6059, 6999, 9999]
R code
Don’t try to mimic the code. Try to write out the logic.
- What numbers are an age?
- What numbers are mapped to zero?
- What numbers are mapped to missing?
538 Code
mutate(age = ifelse(
substr(detail_age, 1, 1) == "1",
as.numeric(substr(detail_age, 2, 4)), # Year
ifelse(detail_age == 9999, NA, 0)),
age = ifelse(age == 999, NA, age))
Hathaway’s code
age = dplyr::case_when(
stringr::str_sub(detail_age, 1, 1) == "1" ~
as.numeric(stringr::str_sub(detail_age, 2, 4)),
detail_age == 9999 ~ NA_real_,
TRUE ~ 0),
age = ifelse(age == 999, NA, age),
year = year)
Python
Saving the pickle
Some Python snippets
Setting up the conditions to use with .assign()
.
gun_ids = ["W32", "W33", "W34", "X72", "X73", "X74", "U014", "X93", "X94", "X95", "Y22", "Y23", "Y24", "Y350"]
intent_cond = [
(raw_file.underlying_cause.isin(["W32", "W33", "W34"])),
(raw_file.underlying_cause.isin(["X72", "X73", "X74"])),
(raw_file.underlying_cause.isin(["*U01.4", "X93", "X94", "X95",
"Y350"])),
(raw_file.underlying_cause.isin(["Y22", "Y23", "Y24"]))
]
intent_val = ["Accidental", "Suicide", "Homicide", "Undetermined"]
weapon_cond = [
(raw_file.underlying_cause.isin(["W32", "X72", "X93", "Y22"])),
(raw_file.underlying_cause.isin(["W33", "X73", "X94", "Y23"]))
]
weapon_val = ["Handgun", "Rifle"]
Our file save DataFrame
# %%
year = 2014
folder_path = "death_data"
# %%
l2 = str(year)[2:]
fnames = ["deaths", "guns", "suicide"]
tnames = pd.DataFrame({
"folder":folder_path,
"start_name":fnames,
"year":l2
}).assign(
name = lambda x: x.start_name + "_" + x.year,
file_path_feather = lambda x: x.folder + "/" + x.name + ".feather",
file_path_pickle = lambda x: x.folder + "/" + x.name + ".pkl",
)
Pulling value from Pandas DataFrame
tnames.query('start_name == "suicide"').file_path_pickle.to_numpy()[0]