-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path46587X.R
325 lines (227 loc) · 15.1 KB
/
46587X.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# 1. validate/normalize agency files
# 2. put them in to_import folder
# a. create files2.txt - used for batch import
# b. create csv_files.txt - used for ?
# 3. get latest mechanism listing from: https://www.datim.org/api/sqlViews/fgUtV6e9YIX/data.csv to Mechanisms partners agencies OUS Start End.csv
# 3.1 get data elements in csv format
# 4. ger previously imported data from: https://www.datim.org/api/dataValueSets.csv?dataSet=O392zMXtwar&dataSet=rnEToFucnJ9&orgUnit=ybg3MO3hcf4&children=true&startDate=2020-07-01&endDate=2020-09-30 to fy20q4.csv
#
#
# 5. load sites
# 6. read in mechanismx
# 7.
#
#
#download mechanism values:
#
# to targets_and_results.csv
require(sqldf)
rm(list=ls())
base_path <- "/Users/41250/Documents/SIMS4/2024/Q4/2/combined/"
config_path <- "/Users/41250/Documents/datim-validation/datim.json"
datimutils::loginToDATIM(config_path)
mechanism_targets_and_results_data_url <- "https://www.datim.org/api/analytics.csv?dimension=SH885jaRe0o&dimension=IeMmjHyBUpi:Jh0jDM5yQ2E;W8imnja2Owd&filter=ou:ybg3MO3hcf4&filter=pe:2023Oct&displayProperty=SHORTNAME&outputIdScheme=CODE"
httr::GET(mechanism_targets_and_results_data_url, httr::write_disk(paste0(base_path, "targets_and_results.csv"), overwrite=TRUE), handle = d2_default_session$handle)
#gets all mechanisms for agency association
mechanism_list_url <- "https://www.datim.org/api/sqlViews/fgUtV6e9YIX/data.csv"
httr::GET(mechanism_list_url, httr::write_disk(paste0(base_path, "mechanisms.csv"), overwrite=TRUE), handle = d2_default_session$handle)
data_elements_url <- "https://www.datim.org/api/dataElements.csv?paging=false&fields=id,name,code&filter=dataSetElements.dataSet.id:in:[dT9xKGbcXLK]"
httr::GET(data_elements_url, httr::write_disk(paste0(base_path, "dataElements.csv"), overwrite=TRUE), handle = d2_default_session$handle)
# previous_data_url <- "https://www.datim.org/api/dataValueSets.csv?dataSet=lvfFcexh1nB&dataSet=VP0uG6tzB5l&orgUnit=ybg3MO3hcf4&children=true&startDate=2022-07-01&endDate=2022-09-30"
# httr::GET(previous_data_url, httr::write_disk(paste0(base_path, "previously_imported_data.csv"), overwrite=TRUE), handle = d2_default_session$handle)
#generate summaries
# org units - this is an long running block, try to reuse as much as possible
# get all org units, with ancestry
url <- paste0(d2_default_session$base_url, "api/organisationUnits.json?fields=ancestors[name,code,id],id,name,code,level&paging=false")
r <- httr::GET(url, httr::timeout(480), handle = d2_default_session$handle)
r <- httr::content(r, "text")
sites<-jsonlite::fromJSON(r,flatten=TRUE)[[1]]
# flatten out ancestry, if level is 1, 2, or 3, use actual code/name/id, otherwise grab third ancestor/lvl 3 (operating unit)
for(x in 1:length(sites$id)){
if(sites$level[[x]] < 4){
sites$operating_unit_code[x] <- sites$code[[x]]
sites$operating_unit_id[x] <- sites$id[[x]]
sites$operating_unit_name[x] <- sites$name[[x]]
} else {
sites$operating_unit_code[x] <- sites$ancestor[[x]]$code[3]
sites$operating_unit_id[x] <- sites$ancestor[[x]]$id[3]
sites$operating_unit_name[x] <- sites$ancestor[[x]]$name[3]
}
}
# remove no longer needed ancestors
sites$ancestors <- NULL
#d <- read.csv(paste0(base_path, "previously_imported_data.csv"), stringsAsFactors = FALSE)
mechz <- read.csv(paste0(base_path, "mechanisms.csv"), stringsAsFactors = FALSE)
all_data <- NULL
# UPDATE agencies whose data needs to be deleted/kept
#to_delete <- sqldf("select * from d where attributeoptioncombo in (select uid from mechz where agency in ('HHS/CDC', 'USAID')) and (period >= 20210401 and period < 20210701)")
#write.csv(to_delete, paste0(base_path, "/to_delete/to_delete.csv"), row.names=FALSE, na = '')
#writeLines("to_delete.csv", file(paste0(base_path, "/to_delete/to_delete.txt")))
#to_keep <- sqldf("select dataelement,period,orgunit,categoryoptioncombo,attributeoptioncombo,value,storedby,lastupdated,comment from d where attributeoptioncombo not in (select uid from mechz where agency in ('HHS/CDC', 'USAID')) ")
dir <- paste0(base_path, "to_import/")
#all_data <<- rbind(all_data, to_keep)
files <- read.csv(paste0(dir, "csv_files.txt"), stringsAsFactors = FALSE)
apply(files, 1, function(file){
filename <- file["name"]
print(paste0(dir,filename))
d <- read.csv(paste0(dir,filename), stringsAsFactors = FALSE)
# names(d) <- names(to_keep)
all_data <<- rbind(all_data, d)
})
des <- read.csv(paste0(base_path, "dataElements.csv"), stringsAsFactors = FALSE)
all_data2 <- sqldf("
select
case when period >= 20221001 and period < 20230101 then 'FY23Q1' else
case when period >= 20230101 and period < 20230401 then 'FY23Q2' else
case when period >= 20230401 and period < 20230701 then 'FY23Q3' else
case when period >= 20230701 and period < 20231001 then 'FY23Q4' else
case when period >= 20231001 and period < 20240101 then 'FY24Q1' else
case when period >= 20240101 and period < 20240401 then 'FY24Q2' else
case when period >= 20240401 and period < 20240701 then 'FY24Q3' else
case when period >= 20240701 and period < 20241001 then 'FY24Q4' else
'Unkown' end end end end end end end end as reporting_period,
operating_unit_code,
operating_unit_id as operating_unit_uid,
orgunit,
agency,
mechz.code as mechanism,
comment as assessment_id,
case when value is null then 'dummy' else value end as v,
case when storedby is null then '' else 'existing' end as type
from all_data left join mechz on all_data.attributeoptioncombo = mechz.uid left join sites on all_data.orgunit = sites.id
")
assessment_type <-
sqldf("
select comment,
case when value = '1' then
'Comprehensive'
else
case when value ='2' then
'Follow Up'
else
case when value ='3' then
'Concentrated'
else
'Uknown'
end end end as asmt_type
from all_data join des on all_data.dataElement = des.id
where des.name = 'SIMS.CS_ASMT_TYPE'
")
all_data2 <- sqldf("select ad2.*, asmt_type from all_data2 ad2 left join assessment_type at on ad2.assessment_id = at.comment")
assessments_with_coversheet <-
sqldf("
select comment, max(has_cs) as has_cs, max(has_assessment_details) as has_assessment_details from (
select comment,
case when des.name like 'SIMS.CS_%' then 1 else 0 end as has_cs,
case when des.name like 'SIMS.S_%' or des.name like 'SIMS.AS_%' then 1 else 0 end as has_assessment_details
from all_data join des on all_data.dataElement = des.id
) group by comment
")
# case when des.name like 'SIMS.CS%' then 1 else 0 end as coversheet,
# case when des.name not like 'SIMS.CS%' then 1 else 0 end as core
counts <- sqldf("select reporting_period, agency, count(distinct(assessment_id)) assessment_count, count(*) as record_count, type from all_data2 group by reporting_period, agency, type order by reporting_period, agency")
by_mechanism <- sqldf("select agency, reporting_period, mechanism, count(distinct(assessment_id)) assessment_count, count(*) as record_count from all_data2 group by agency, reporting_period, mechanism order by reporting_period, agency, mechanism")
by_operating_unit <- sqldf("select agency, reporting_period, operating_unit_code, operating_unit_uid, count(distinct(assessment_id)) assessment_count, count(*) as record_count from all_data2 group by agency, reporting_period, operating_unit_code, operating_unit_uid order by reporting_period, agency, operating_unit_code")
write.csv(counts, paste0(dir, "by_agency_summary.csv"), row.names=FALSE, na="")
write.csv(by_mechanism, paste0(dir, "by_mechanism_summary.csv"), row.names=FALSE, na="")
write.csv(by_operating_unit, paste0(dir, "by_operating_unit_summary.csv"), row.names=FALSE, na="NA")
counts <- sqldf("select reporting_period, agency, asmt_type, count(distinct(assessment_id)) assessment_count, count(*) as record_count, type from all_data2 group by reporting_period, agency, asmt_type, type order by reporting_period, agency")
by_mechanism <- sqldf("select agency, reporting_period, asmt_type, mechanism, count(distinct(assessment_id)) assessment_count, count(*) as record_count from all_data2 group by agency, reporting_period, asmt_type, mechanism order by reporting_period, agency, mechanism")
by_operating_unit <- sqldf("select agency, reporting_period, asmt_type, operating_unit_code, operating_unit_uid, count(distinct(assessment_id)) assessment_count, count(*) as record_count from all_data2 group by agency, reporting_period, asmt_type, operating_unit_code, operating_unit_uid order by reporting_period, agency, operating_unit_code")
write.csv(counts, paste0(dir, "by_agency_by_type_summary.csv"), row.names=FALSE, na="")
write.csv(by_mechanism, paste0(dir, "by_mechanism_by_type_summary.csv"), row.names=FALSE, na="")
write.csv(by_operating_unit, paste0(dir, "by_operating_unit_by_type_summary.csv"), row.names=FALSE, na="NA")
# missing MER data:
# tool type: RpqaKUXGtDS
# assessment type: axQrLFHH0Nl
mech_data <- read.csv(paste0(base_path, "targets_and_results.csv"), stringsAsFactors = FALSE)
colnames(mech_data) <- c("mech", "tr", "value")
# result - Jh0jDM5yQ2E
# target - W8imnja2Owd
m <- datimvalidation::getMechanismsMap()
has_mer <- sqldf("select m2.agency as agency, s.operating_unit_name as site_ou, m2.ou as mechanism_ou, d2.comment as assesment_id,
case when d3.value = '1' then 'site' else case when d3.value = '3' then 'above site' else 'unknown' end end as tool_type,
case when d4.value = '1' then 'full assessment' else case when d4.value = '2' then 'follow-up' else case when d4.value = '3' then 'concentrated' else 'unknown' end end end as assessment_type,
m.name as mechanism_name,
case when sum(t.value) is NULL then 'No' else 'Yes' end as has_targets,
case when sum(r.value) is NULL then 'No' else 'Yes' end as has_results
from all_data d2 join m on d2.attributeOptionCombo = m.id
left join mech_data t on m.code = t.mech and t.tr='W8imnja2Owd'
left join mech_data r on m.code = r.mech and r.tr='Jh0jDM5yQ2E'
left join mechz m2 on m.id = m2.uid
left join sites s on s.id = d2.orgUnit
left join all_data d3 on d2.comment = d3.comment and d3.dataElement = 'RpqaKUXGtDS'
left join all_data d4 on d2.comment = d4.comment and d4.dataElement = 'axQrLFHH0Nl'
group by d2.comment, m.name
")
missing_mer <- has_mer[has_mer$has_result != 'Yes' & has_mer$has_target != 'Yes',]
if(nrow(missing_mer) != 0){
write.csv(missing_mer,file=paste0(dir, "assessments_missing_mer_data.csv"), row.names = FALSE)
}
# generate a list
#m <- sqldf(paste0("select '", agency, "' as agency, '", isoPeriod, "' as reporting_period, m.code as mechanism, count(distinct(comment)) as assessment_count, count(case when value is null then 1 else value end) as record_count from d left join mech_map m on d.attributeOptionCombo = m.id group by m.code"))
#mechs <<- rbind(mechs, m)
#o <- sqldf(paste0("select '", agency, "' as agency, '", isoPeriod, "' as reporting_period, o.operating_unit_code as operating_unit_code, o.operating_unit_id as operating_unit_uid, count(distinct(comment)) as assessment_count, count(case when value is null then 1 else value end) as record_count from d left join sites o on d.orgUnit = o.id group by o.operating_unit_code"))
#operating_units <<- rbind(operating_units, o)
# get mechanism map (for codes)
#mech_map <- datimvalidation::getMechanismsMap()
mech_map <- m
# list of files in csv format. header: path, isoPeriod (fy period), agency
# files in the folder are expected to be the normalized one, ready for import - with normalized headers and UIDs for all metadata
files <- read.csv(paste0(dir, "csv_files.txt"), stringsAsFactors = FALSE)
# create mech and ou data frames for all files
mechs <- NULL
operating_units <- NULL
counts <- NULL
# process files
apply(files, 1, function(file){
filename <- file["name"]
isoPeriod <- file["isoPeriod"]
agency <- file["agency"]
d <- read.csv(paste0(dir,filename), stringsAsFactors = FALSE)
assessments_with_coversheet <-
sqldf("
select comment, max(has_cs) as has_cs, max(has_assessment_details) as has_assessment_details from (
select d.comment,
case when des.name like 'SIMS.CS_%' then 1 else 0 end as has_cs,
case when des.name like 'SIMS.S_%' or des.name like 'SIMS.AS_%' then 1 else 0 end as has_assessment_details
from d join des on d.dataElement = des.id
) group by comment
")
sql <- paste0("select '", agency, "' as file, '", isoPeriod, "' as pe, count(*) as record_count, count(distinct(d.comment)) as assessment_count,
case when sum(has_cs) - sum(has_assessment_details) = 0 then 'No' else 'Yes' end as has_incomplete_assessments from d join assessments_with_coversheet a on d.comment = a.comment")
print(sql)
counts <<- rbind(counts, sqldf(sql))
# generate a list
m <- sqldf(paste0("select '", agency, "' as agency, '", isoPeriod, "' as reporting_period, m.code as mechanism, count(distinct(comment)) as assessment_count, count(case when value is null then 1 else value end) as record_count from d left join mech_map m on d.attributeOptionCombo = m.id group by m.code"))
mechs <<- rbind(mechs, m)
o <- sqldf(paste0("select '", agency, "' as agency, '", isoPeriod, "' as reporting_period, o.operating_unit_code as operating_unit_code, o.operating_unit_id as operating_unit_uid, count(distinct(comment)) as assessment_count, count(case when value is null then 1 else value end) as record_count from d left join sites o on d.orgUnit = o.id group by o.operating_unit_code"))
operating_units <<- rbind(operating_units, o)
})
write.csv(mechs, paste0(dir, "by_mechanism_summary.csv"), row.names=FALSE, na="")
write.csv(operating_units, paste0(dir, "by_operating_unit_summary.csv"), row.names=FALSE, na="NA")
write.csv(counts, paste0(dir, "_counts.csv"), row.names=FALSE, na="")
#generate summaries
# org units - this is an long running block, try to reuse as much as possible
# get all org units, with ancestry
url <- paste0(d2_default_session$base_url, "api/organisationUnits.json?filter=level:in:[3,4]&fields=id,name,code&paging=false")
r <- httr::GET(url, httr::timeout(120), handle = d2_default_session$handle)
r <- httr::content(r, "text")
operating_unit_names<-jsonlite::fromJSON(r,flatten=TRUE)[[1]]
# flatten out ancestry, if level is 1, 2, or 3, use actual code/name/id, otherwise grab third ancestor/lvl 3 (operating unit)
summary <- read.csv(paste0(dir, "by_operating_unit_by_type_summary.csv"), stringsAsFactors = FALSE)
country_count_summary <- sqldf("
select ou as 'Operating Unit' from (
select o.name || ' (' || sum(assessment_count) || ')' as ou, 1 as SortOrder from operating_unit_names o join summary s on o.id=s.operating_unit_uid group by operating_unit_uid
union select 'Total (' || sum(assessment_count) || ')', 99 as SortOrder from summary
) order by SortOrder, ou
")
write.table(country_count_summary, file=paste0(dir, "_country_counts_pretty.txt"), row.names=FALSE, quote=FALSE)
files <- read.csv(paste0(dir, "csv_files.txt"), stringsAsFactors = FALSE)
apply(files, 1, function(file){
filename <- file["name"]
agency <- file["agency"]
print(paste0(dir,filename))
d <- read.csv(paste0(dir,filename), stringsAsFactors = FALSE)
sqldf(paste0("select distinct d.comment, m.code, m.partner, m.agency from d join mechz m on d.attributeOptionCombo = m.uid where m.agency != '", agency, "'"))
})