From f72600b06ccba27b8598ac4056d3659caee035a0 Mon Sep 17 00:00:00 2001 From: Warsame Yusuf Date: Mon, 23 Sep 2019 14:26:30 -0400 Subject: [PATCH] [Feature] preparing website for gh-pages set up --- .DS_Store | Bin 12292 -> 12292 bytes .gitignore | 3 +- DESCRIPTION | 2 +- README.md | 6 +- Vignettes/usingcchsflow.Rmd | 94 +++++++-- Vignettes/variableDetails.Rmd | 89 ++++----- Vignettes/variablesSheet.Rmd | 14 +- _pkgdown.yml | 20 ++ docs/404.html | 12 +- docs/CONTRIBUTING.html | 12 +- docs/articles/index.html | 16 +- docs/articles/usingcchsflow.html | 301 ++++++----------------------- docs/articles/variableDetails.html | 204 +++++++++---------- docs/articles/variablesSheet.html | 50 ++--- docs/authors.html | 14 +- docs/index.html | 22 ++- docs/reference/index.html | 12 +- 17 files changed, 378 insertions(+), 493 deletions(-) diff --git a/.DS_Store b/.DS_Store index 8d042af3f849efa2f749b78fe7ad7419cbf24ad3..95386cc845ad34f0e38763a623d0f48c37689e32 100644 GIT binary patch delta 213 zcmZokXi3;$#lp_T&Ew|o!8Q4Sa5!f~W^#UBaAsBNWJea^%@Y;%8D*3m3>d(GlYxUl zl0lEbfgyw;h9Q-qfT417ooFNDk;#p03X_kp-kn^>t~J?AAchC1oDrgjgM0Hu32~Oq z>OA~li(K8@u~~FZObow8lO*aHS5Iz`P~O}sG>LDs6-zo3$hOU90)b46f2y)CZWmi2>?dnG$sH5 delta 89 zcmZokXi3;$#j;skF`a3$v*?4#jqK`^L&Rcu7#J8BffxulxHn&v5NFw}&cn~Ld4+Ny o)8=-e34EJ(3TH8aRBsLu^Jm)pU6qM_a=W1YK1ONa4 diff --git a/.gitignore b/.gitignore index 2118edac..79a894b3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .Rproj.user .Rhistory .RData -.Ruserdata \ No newline at end of file +.Ruserdata +.DS_Store \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index cb57e923..c21d21d2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ Authors@R: c( comment = c(ORCID = "0000-0003-0912-0845")), person(given = "Warsame", family = "Yusuf", - role = c("aut"), + role = c("aut", "cre"), email = "waryusuf@ohri.ca"), person(given = "Rostyslav", family = "Vyuha", diff --git a/README.md b/README.md index 1d666335..8a8f9bba 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # cchsflow -This repository contains supports the use of the Canadian Community Health Survey (CCHS). The current focus is transformation of harmonized variables across surveys from 2001 to 2014. +This package contains supports the use of the Canadian Community Health Survey (CCHS). The current focus is transformation of harmonized variables across surveys from 2001 to 2014. At the heart of `cchsflow` are two worksheets (CSV files) that describe how to transform variables different CCHS cycles into common variables: `variables.csv` and `variableDetails.csv`. -Documents include: +Documents in the repository include: 1. `variables.csv` - a list of variables that can be transformed across CCHS surveys. The default variable name corresponds to 2007 CCHS. 2. `variableDetails.csv` - information that describes how the variables are recoded. @@ -10,7 +10,7 @@ Documents include: ## Important notes -Care must be taken to understand how your specific use of variable transformation and harmonization may result in misclassfication error and other forms of bias. Most variables have had some change in wording and category responses across the lifetime of the CCHS from 2001 to 2013. Furthermore, there have been changes in survey sampling, response rates, weighting methods and other survey design changes that affect responses. +Care must be taken to understand how your specific use of variable transformation and harmonization may result in misclassfication error and other forms of bias. Most variables have had some change in wording and category responses across the lifetime of the CCHS from 2001 to 2014. Furthermore, there have been changes in survey sampling, response rates, weighting methods and other survey design changes that affect responses. The transformations that are described in this repository have been used in several research projects (see reference list) but no guarantees are made regarding the accuracy or appropriate uses. diff --git a/Vignettes/usingcchsflow.Rmd b/Vignettes/usingcchsflow.Rmd index 260c15c3..6ce948b1 100644 --- a/Vignettes/usingcchsflow.Rmd +++ b/Vignettes/usingcchsflow.Rmd @@ -1,6 +1,5 @@ --- title: "Using cchsflow" -date: 'Last updated: 2019-08-27' output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{1 - using cchsflow} @@ -21,7 +20,7 @@ The `RecWTable` and `SetDataLabels` functions are part of the [bllflow](https:// ```{r eval= FALSE} install.packages("devtools") library(devtools) -install_github("Big-Life-Lab/bllflow", ref = "recode-with-table-patch") +install_github("Big-Life-Lab/bllflow") ``` ```{r results= 'hide', message = FALSE, warning=FALSE} library(bllflow) @@ -51,9 +50,9 @@ cchsMock2001 <- data.frame(DHHA_SEX = c(2, 1, 1, 6), DHHAGAGE = c(3, 4, 6, 6), F cchsMock2013 <- data.frame(DHH_SEX = c(1, 2, 1), DHHGAGE = c(2, 1, 1), FVCDTOT = c(25, 15, 6)) ``` -Did you notice that the names for the variables are slightly different in the two mock databases? That isn't a mistake: in the cchs2001 the variable for sex is `DHHA_SEX` and in CCHS2013 the variable is `DHH_SEX`. +Did you notice that the names for the variables are slightly different in the two mock databases? That isn't a mistake: in the 2001 CCHS the variable for sex is `DHHA_SEX` and in 2013 CCHS the variable is `DHH_SEX`. -Don't worry, `cchsflow` is here to help! `variableDetails.csv` contains the rules to harmonize those two variables into a common variable name. In the CCHS, the categories for `sex` are consistient: 1 = males, 2 = females. If the category values or labels changed, `variableDetails.csv` would provide instructions for how to harmonize them. You can learn more about `variables.csv` and `variableDetails.csv` in later vignettes. see ... and ... +Don't worry, `cchsflow` is here to help! `variableDetails.csv` contains the rules to harmonize those two variables into a common variable name. In the CCHS, the categories for `sex` are consistient: 1 = males, 2 = females. If the category values or labels changed, `variableDetails.csv` would provide instructions for how to harmonize them. You can learn more about these in later vignettes. ```{r, echo=FALSE, message=FALSE, warning=FALSE} library(DT) @@ -93,7 +92,8 @@ In this example, the sex variable in the 2001 CCHS cycle is transformed. ```{r, warning=FALSE} sex2001 <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file", log = TRUE, variables = c("DHH_SEX")) - +``` +```{r, echo=FALSE} datatable(sex2001, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:1)), dom = 't')) ``` @@ -109,12 +109,24 @@ This example shows how you can transform and combine a variable across multiple ```{r, warning = FALSE} sex2001 <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file", appendToData = FALSE, log = TRUE, variables = c("DHH_SEX")) +``` + +```{r, echo=FALSE} datatable(sex2001, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:1)), dom = 't')) +``` +```{r, warning=FALSE} sex2013 <- RecWTable(dataSource = cchsMock2013, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2013-2014-Annual-component", appendToData = FALSE, log = TRUE, variables = c("DHH_SEX")) +``` +```{r, echo=FALSE} datatable(sex2013, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:1)), dom = 't')) +``` +```{r, warning=FALSE} combinedSex <- bind_rows(sex2001, sex2013) +``` + +```{r, echo=FALSE} datatable(combinedSex, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:1)), dom = 't')) ``` @@ -127,10 +139,18 @@ There are many variables in the CCHS that changes in categories between cycles i The categories in `age` variable in the CCHS changed in 2005 and therefore it is not possible to have the same `age` categories across all CCHS cycels. `cchsflow` offers two optins. The first option is to transform the `age` variable into two variables. `DHHGAGE_A` is the age variable for CCHS cycles 2001-2003, and `DHHGAGE_B` is the age variable for CCHS cycles 2005-2014. With this option, you cannot combine `DHHGAGE_A` and `DHHGAGE_B` into a single dataset. ```{r, warning=FALSE} -age2001 <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file", log = TRUE, variables = c("DHHGAGE_A")) +age2001 <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file", variables = c("DHHGAGE_A")) +``` + +```{r, echo=FALSE} datatable(age2001, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:1)), dom = 't')) +``` + +```{r, warning=FALSE} +age2013 <- RecWTable(dataSource = cchsMock2013, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2013-2014-Annual-component", variables = c("DHHGAGE_B")) +``` -age2013 <- RecWTable(dataSource = cchsMock2013, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2013-2014-Annual-component", log = TRUE, variables = c("DHHGAGE_B")) +```{r, echo=FALSE} datatable(age2013, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:1)), dom = 't')) ``` @@ -140,13 +160,26 @@ datatable(age2013, options = list(columnDefs = list(list(className = 'dt-center' The categorical `age` variable can also be transformed into a single continuous `age` variable. This variable takes the midpoint age of each category for all CCHS cycles. With this option, the age category variable from all CCHS cycles can be combined into a single dataset. ```{r, warning=FALSE} -age2001_cont <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file", log = TRUE, variables = c("DHHGAGE_cont")) +age2001_cont <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file", variables = c("DHHGAGE_cont")) +``` + +```{r, echo=FALSE} datatable(age2001_cont, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:1)), dom = 't')) +``` -age2013_cont <- RecWTable(dataSource = cchsMock2013, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2013-2014-Annual-component", log = TRUE, variables = c("DHHGAGE_cont")) +```{r, warning=FALSE} +age2013_cont <- RecWTable(dataSource = cchsMock2013, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2013-2014-Annual-component", variables = c("DHHGAGE_cont")) +``` + +```{r, echo=FALSE} datatable(age2013_cont, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:1)), dom = 't')) +``` +```{r, warning= FALSE} combinedAge_cont <- bind_rows(age2001_cont, age2013_cont) +``` + +```{r, echo=FALSE} datatable(combinedAge_cont, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:1)), dom = 't')) ``` @@ -173,6 +206,9 @@ In the above code, varLabels is called in `RecWTable()` to label the age and sex ```{r, warning=FALSE} combinedAgeSex <- bind_rows(agesex_2001, agesex_2013) labelledCombinedAgeSex <- SetDataLabels(dataToLabel = combinedAgeSex, variableDetails = varDetails, variablesSheet = varSheet) +``` + +```{r, echo=FALSE} datatable(labelledCombinedAgeSex, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:2)), dom = 't')) ``` @@ -188,23 +224,53 @@ For more information on `get_label()` and other label helper functions, please r All the variables listed in `varDetails.csv` will be transformed if the variables argument in `RecWTable()` is not specified. In this example, all of the variables in our mock 2001 and 2013 datasets will be transformed, combined, and labelled. -```{r, warning=FALSE} +```{r, echo=FALSE} options(htmlwidgets.TOJSON_ARGS = list(na = 'string')) -transformed2001 <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file", log = TRUE) +``` + +```{r, warning=FALSE} +transformed2001 <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file") +``` + +```{r, echo=FALSE} datatable(transformed2001, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:4)), dom = 't')) +``` + +```{r, warning=FALSE} +transformed2013 <- RecWTable(dataSource = cchsMock2013, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2013-2014-Annual-component") +``` -transformed2013 <- RecWTable(dataSource = cchsMock2013, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2013-2014-Annual-component", log = TRUE) +```{r, echo=FALSE} datatable(transformed2013, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:4)), dom = 't')) +``` +```{r, warning=FALSE} combinedCCHS <- bind_rows(transformed2001, transformed2013) labelledCombinedCCHS <- SetDataLabels(dataToLabel = combinedCCHS, variableDetails = varDetails, variablesSheet = varSheet) +``` + +```{r, echo=FALSE} datatable(labelledCombinedCCHS, options = list(columnDefs = list(list(className = 'dt-center', targets = 0:5)), dom = 't')) ``` +```{r, warning=FALSE} +get_label(labelledCombinedCCHS) +``` + ### Step 4. Warning messages Warning messages will appear when the variables in your dataset do not match the variables in your two worksheets. In our example, our mock CCHS datasets only contain a two of variables from `variables.csv` and `varDetails.csv`. As such, warning messages about variables not included in our datasets will be printed. ```{r, echo=FALSE, results="hide", warning=2} -transformed2001 <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file", log = TRUE) -``` \ No newline at end of file +transformed2001 <- RecWTable(dataSource = cchsMock2001, variableDetails = varDetails, datasetName = "cchs-82M0013-E-2001-c1-1-general-file") +``` + +## Using your own CCHS dataset to transform variables + +As mentioned previously, CCHS datasets cannot be shared publicly. But that does not mean you cannot use a saved CCHS dataset on your computer to transform variables. Below illustrates a code that can be used to load a CCHS dataset onto your R environment. + +```{r, eval=FALSE} +cchsDataset <- read.csv("~/Documents/cchsdataset.csv") +``` + +You can copy this code to your clipboard and modify the path to where your dataset is saved onto your computer. \ No newline at end of file diff --git a/Vignettes/variableDetails.Rmd b/Vignettes/variableDetails.Rmd index e0aa62cf..41762bfb 100644 --- a/Vignettes/variableDetails.Rmd +++ b/Vignettes/variableDetails.Rmd @@ -1,6 +1,5 @@ --- -title: "`variableDetails.csv` worksheet" -date: 'Last updated: 2019-09-14' +title: "variableDetails.csv" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{3 - variableDetails.csv} @@ -15,15 +14,19 @@ knitr::opts_chunk$set( ) ``` -## `variableDetails.csv` -The **variableDetails** worksheet contain details for the variables in `variables.csv`. Information from `variableDetails.csv` worksheet is used by the `RecWTable()` function of the `bllflow` package to transform variables identifed in `variableDetails$variableFrom` to the newly transformed variable in `variableDetails$variable`. +## Introduction +The **variableDetails.csv** worksheet contain details for the variables in `variables.csv`. Information from `variableDetails.csv` worksheet is used by the `RecWTable()` function of the `bllflow` package to transform variables identifed in `variableDetails$variableStart` to the newly transformed variable in `variableDetails$variable`. ```{r Read variables.csv, echo=FALSE, message=FALSE, warning=FALSE} library(readr) +library(DT) varDetails <- read.csv(file.path(getwd(), '../inst/extdata/variableDetails.csv')) +datatable(varDetails, options = list(pageLength = 5)) cat("In the `variableDetails.csv` worksheet there are", nrow(varDetails), "rows and", ncol(varDetails), "columns", "\n\n") ``` +## Structure of variableDetails.csv + ### Rows Each row in `variableDetails.csv` holds the recode rules for transforming a single category for a variable in `variables.csv`. An exception to this rule are the "don't know", "refusal", and "not stated" categories, which are combined as a single missing category. For each unique variable, an `else` row is used to assign values not identified in other rows. We recommend not combining variables across the CCHS if variable has an important change between CCHS cycles `variableDetails$notes` is used to identify issues that may be relevant when transforming the variable or category. @@ -44,26 +47,26 @@ If a categorical variable has 4 distinct categories, along with a "not applicabl `RecWTable()` uses the `tagged_na()` function from the [haven](https://www.rdocumentation.org/packages/haven/versions/2.1.1) package to tag not applicable responses as `NA(a)`, and missing values (don't know, refusal, not stated) as `NA(b)`. As you will see later, not applicable values are transformed to `NA::a`, and missing values are transformed to `NA::b`. ### Columns -The following are the columns that are listed in `variableDetails.csv`. Many of these columns need to be specified in order for `RecWTable` to be functional: +The following are the columns that are listed in `variableDetails.csv`. Many of these columns need to be specified in order for `RecWTable` to be functional. We will use the `sex` variable to illustrate how each column is specified: 1. **variable:** the name of the final transformed variable. In `variableDetails.csv`, we have designated the variable names used in CCHS cycles from 2007 to 2014 as the final transformed variable name. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, filter = 'top', options=list(columnDefs = list(list(visible=FALSE, targets=c(2:16)), list(className = 'dt-center', targets = 0:1)), pageLength = 5)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(2:16)), list(className = 'dt-center', targets = 0:1)), pageLength = 5)) ``` 2. **dummyVariable:** the dummy variable for each category in a transformed categorical variable. This is only applicable for categorical variables; for continuous variables it is set as `N/A`. The name of a dummy variable consists of the final variable name, the number of categories in the variable, and the category level for each category. Note that this column is not necessary for `RecWTable`. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(3:16)), list(className = 'dt-center', targets = 0:2)), pageLength = 5)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(3:16)), list(className = 'dt-center', targets = 0:2)), pageLength = 5)) ``` 3. **toType:** the variable type of the final transformed variable. In this column, a transformed variable that is categorical will be specified as `cat`; while a transformed variable that is continuous will be specified as `cont`. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, filter = 'top', options=list(columnDefs = list(list(visible=FALSE, targets=c(4:16)), list(className = 'dt-center', targets = 0:3)), pageLength = 5)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(4:16)), list(className = 'dt-center', targets = 0:3)), pageLength = 5)) ``` 4. **databaseStart:** the CCHS surveys that contain the variable of interest, separated by commas. Each CCHS survey contains a unique identifier in DDI document. @@ -79,14 +82,14 @@ cat('abstract: ', unlist(CCHS2001_DDI$ddiObject$codeBook$stdyDscr$stdyInfo$abstr ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(5:16)), list(className = 'dt-center', targets = 0:4)), pageLength = 5)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(5:16)), list(className = 'dt-center', targets = 0:4)), pageLength = 5)) ``` 5. **variableStart:** the original names of the variables as they are listed in each respective CCHS cycle, separated by commas. If the variable name in a particular CCHS survey is different from the transformed variable name, write out the CCHS survey identifier, add two colons, and write out the original variable name for that cycle. If the variable name in a particular CCHS survey is the same as the transformed variable name, the variable name is written out surrounded by square brackets. Note: this only needs to be written out **once**. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(6:16)), list(className = 'dt-center', targets = 0:5)), pageLength = 5)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(6:16)), list(className = 'dt-center', targets = 0:5)), pageLength = 5)) ``` + The categorical `age` variable in the 2001 CCHS survey is `DHHAGAGE`. If the final variable name for categorical age in the **variable** column is `DHHGAGE`, you would write the following in this column: `cchs-82M0013-E-2001-c1-1-general-file::DHHAGAGE` @@ -97,14 +100,14 @@ datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, filter = 'top', options=list(columnDefs = list(list(visible=FALSE, targets=c(7:16)), list(className = 'dt-center', targets = 0:6)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(7:16)), list(className = 'dt-center', targets = 0:6)), pageLength = 5, scrollX = TRUE)) ``` 7. **recTo:** the value you would like to recode each category value to. For continuous variables that are not transformed in type, you would write in this column `copy` so that the function copies the values without any transformations. For the not applicable category, write `NA::a`. For missing & else categories, write `NA::b` ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(8:16)), list(className = 'dt-center', targets = 0:7)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(8:16)), list(className = 'dt-center', targets = 0:7)), pageLength = 5, scrollX = TRUE)) ``` + For categorical variables that are not changing variable types (i.e. cat to cat), it is ideal to retain the same values as indicated in each CCHS survey. But for transformed categorical variables that have changed in type (i.e cat to cont), you will have to develop values that make the most sense to your analysis. In `variableDetails.csv`, variables that have gone from cat to cont have used midpoints of each category. @@ -113,28 +116,28 @@ datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(9:16)), list(className = 'dt-center', targets = 0:8)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(9:16)), list(className = 'dt-center', targets = 0:8)), pageLength = 5, scrollX = TRUE)) ``` 9. **catLabel:** short form label describing the category of a particular variable. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(10:16)), list(className = 'dt-center', targets = 0:9)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(10:16)), list(className = 'dt-center', targets = 0:9)), pageLength = 5, scrollX = TRUE)) ``` 10. **catLabelLong:** more detailed label describing the category of a particular variable. This label should be identical to what is shown in the CCHS data documentation, unless you are creating derived variables and would like to create your own label for it. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(11:16)), list(className = 'dt-center', targets = 0:10)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(11:16)), list(className = 'dt-center', targets = 0:10)), pageLength = 5, scrollX = TRUE)) ``` 11. **units:** the units of a particular variable. If there are no units for the variable, write `N/A`. Note, the function will not work if there different units between the rows of a variable. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(12:16)), list(className = 'dt-center', targets = 0:11)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(12:16)), list(className = 'dt-center', targets = 0:11)), pageLength = 5, scrollX = TRUE)) ``` 12. **recFrom:** the range of values for a particular category in a variable as indicated in the CCHS. See CCHS data documentation for each survey cycle and use the smallest and large values as your range to capture all values between the survey years. @@ -150,176 +153,176 @@ The rules for each category of a new variable are a string in `recFrom` and valu ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(13:16)), list(className = 'dt-center', targets = 0:12)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(13:16)), list(className = 'dt-center', targets = 0:12)), pageLength = 5, scrollX = TRUE)) ``` 13. **catStartLabel:** label describing each category. This label should be identical to what is shown in the CCHS data documentation. For the missing row, each missing category is described along with their coded values. You can import labels from the CCHS DDI files using `bllflow` helper functions. See [bllflow documentation](http://bllflow.projectbiglife.ca). ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(14:16)), list(className = 'dt-center', targets = 0:13)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(14:16)), list(className = 'dt-center', targets = 0:13)), pageLength = 5, scrollX = TRUE)) ``` 14. **variableStartShortLabel:** short form label describing the variable. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(15:16)), list(className = 'dt-center', targets = 0:14)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(15:16)), list(className = 'dt-center', targets = 0:14)), pageLength = 5, scrollX = TRUE)) ``` 15. **variableStartLabel:** more detailed label describing the variable. This label should be identical to what is shown in the CCHS data documentation. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(visible=FALSE, targets=c(16)), list(className = 'dt-center', targets = 0:15)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(16)), list(className = 'dt-center', targets = 0:15)), pageLength = 5, scrollX = TRUE)) ``` 16. **notes: **any relevant notes to inform the user running the `recode-with-table` function. Things to include here would be changes in wording between CCHS surveys, missing/changes in categories, and changes in variable type between CCHS surveys. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails, options=list(columnDefs = list(list(className = 'dt-center', targets = 0:16)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(75:79), ], options=list(columnDefs = list(list(className = 'dt-center', targets = 0:16)), pageLength = 5, scrollX = TRUE)) ``` -### Example: Body mass index (BMI) +## Example: Body mass index (BMI) This example will show how the transformed BMI variable was developed using `variableDetails.csv`. This is a continuous variable that has remained fairly constant in CCHS cycles between 2001 and 2014. -#### Rows +### Rows * For this variable, there are 4 rows, 1 for the continuous "category", 1 for not applicable, 1 for missing, and 1 for else. However, CCHS 2001 and 2003 code not applicable and the missing categories differently from other cycles so two extra rows will be created to account for this. In many instances there are changes in how variable categories are coded between CCHS cycles. But since the overall variable structure remains intact, extra rows can be used to help rectify this issue to make sure all values feed into the newly transformed variable. -#### Columns +### Columns 1. **variable:** the most common variable name for BMI is `HWTGBMI`. This should be written for each row. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(2:16)), list(className = 'dt-center', targets = 0:1)), dom = 't')) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(2:16)), list(className = 'dt-center', targets = 0:1)), dom = 't')) ``` 2. **dummyVariable:** BMI is a continuous variable, so it does not have dummy variables. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(3:16)), list(className = 'dt-center', targets = 0:2)), dom = 't')) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(3:16)), list(className = 'dt-center', targets = 0:2)), dom = 't')) ``` 3. **toType:** BMI was captured in the CCHS as a continuous variable. It does not make much sense to transform it into a categorical variable, so the toType should be `cont` in each row of BMI. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(4:16)), list(className = 'dt-center', targets = 0:3)), dom = 't')) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(4:16)), list(className = 'dt-center', targets = 0:3)), dom = 't')) ``` 4. **databaseStart:** BMI was captured in all CCHS surveys between 2001 and 2014, so in the first row with the continuous "category" and the else row, the CCHS identifers will be listed this column: ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338, 343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(5:16)), list(className = 'dt-center', targets = 0:4)), dom = 't')) +datatable(varDetails[c(392, 397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(5:16)), list(className = 'dt-center', targets = 0:4)), dom = 't')) ``` + For the not applicable and missing rows that pertain to the 2001 and 2003 CCHS surveys, only write the 2001 and 2003 identifiers in this column. For the not applicable and missing rows that pertain to the 2005 CCHS survey and onwards, write the identifiers for CCHS 2005 onwards. This is because the not applicable category and the missing categories are coded differently. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(339:342), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(5:16)), list(className = 'dt-center', targets = 0:4)), dom = 't')) +datatable(varDetails[c(393:396), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(5:16)), list(className = 'dt-center', targets = 0:4)), dom = 't')) ``` 5. **variableStart:** In the 2001, 2003, and 2005 CCHS surveys the BMI variable differs from the common name, while in the CCHS surveys from 2007-2014, the BMI variable is the same as the common name. However, the values for not applicable and missing categories changes after 2003. Therefore for the first & else rows, the variableStart column will look like this: ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338, 343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(6:16)), list(className = 'dt-center', targets = 0:5)), dom = 't')) +datatable(varDetails[c(392, 397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(6:16)), list(className = 'dt-center', targets = 0:5)), dom = 't')) ``` + For the not applicable and missing rows that pertain to the 2001 and 2003 CCHS surveys, the variable names for those two cycles will be written. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(339:340), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(6:16)), list(className = 'dt-center', targets = 0:5)), dom = 't')) +datatable(varDetails[c(393:394), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(6:16)), list(className = 'dt-center', targets = 0:5)), dom = 't')) ``` + For the not applicable and missing rows that pertain to the 2005 CCHS surveys onwards, the column will look like this: ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(341:342), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(6:16)), list(className = 'dt-center', targets = 0:5)), dom = 't')) +datatable(varDetails[c(395:396), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(6:16)), list(className = 'dt-center', targets = 0:5)), dom = 't')) ``` 6. **fromType:** As mentioned previously, BMI was measured as a continuous variable in the CCHS, so the fromType should be `cont` in each row of BMI. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(7:16)), list(className = 'dt-center', targets = 0:6)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(7:16)), list(className = 'dt-center', targets = 0:6)), dom = 't', scrollX = TRUE)) ``` 7. **recTo:** Since this is a continuous variable, the first row (the main "category") has `copy` written. For the not applicable rows `NA::a` is written. For the missing and else rows `NA::b` is written. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(8:16)), list(className = 'dt-center', targets = 0:7)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(8:16)), list(className = 'dt-center', targets = 0:7)), dom = 't', scrollX = TRUE)) ``` 8. **numValidCat:** Since this is a continuous variable, there are no actual categories; so `N/A` is written in each row. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(9:16)), list(className = 'dt-center', targets = 0:8)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(9:16)), list(className = 'dt-center', targets = 0:8)), dom = 't', scrollX = TRUE)) ``` 9. **catLabel:** For the first row `BMI` is written. Not applicable rows `not applicable` is written. Missing rows: `missing`. Else row: `else` ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(10:16)), list(className = 'dt-center', targets = 0:9)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(10:16)), list(className = 'dt-center', targets = 0:9)), dom = 't', scrollX = TRUE)) ``` 10. **catLabelLong:** For the first row, `body mass index` is written to give further detail on what BMI is. The other rows remain the same. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(11:16)), list(className = 'dt-center', targets = 0:10)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(11:16)), list(className = 'dt-center', targets = 0:10)), dom = 't', scrollX = TRUE)) ``` 11. **units:** BMI is measured in kg/m^2^, so `kg/m2` is written in each row. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(12:16)), list(className = 'dt-center', targets = 0:11)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(12:16)), list(className = 'dt-center', targets = 0:11)), dom = 't', scrollX = TRUE)) ``` 12. **recFrom:** Going through the CCHS data documentation from 2001 to 2014, it was found that the lowest BMI value was 11.91 and the highest BMI value was 57.9. Therefore the recFrom for the first row is written as `11.91:57.9`. In the 2001 and 2003 CCHS surveys not applicable was coded as 999.6 so the recFrom for this row would be `999.6:999.6`. Similarly, in the 2001 and 2003 CCHS surveys don't know was coded as 999.7, refusal was coded as 999.8, and not stated was coded as 999.9. Therefore the recFrom for the missing row for CCHS 2001 and 2003 would be `999.7:999.9`. In the not applicable row for the 2005 CCHS survey onwards, the recFrom is `999.96:999.96`. In the missing row for CCHS 2005 onwards, the recFrom is `999.97:999.99`. For the else row, just write `else`. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(13:16)), list(className = 'dt-center', targets = 0:12)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(13:16)), list(className = 'dt-center', targets = 0:12)), dom = 't', scrollX = TRUE)) ``` 13. **catStartLabel:** For the first row, `BMI / self-report (D,G)` is written as it is written in CCHS documentation. The other rows remain the same, and the values for each missing category are stated in the missing rows. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(14:16)), list(className = 'dt-center', targets = 0:13)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(14:16)), list(className = 'dt-center', targets = 0:13)), dom = 't', scrollX = TRUE)) ``` 14. **variableStartShortLabel:** Writing `BMI` for each row is sufficient for this variable. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(15:16)), list(className = 'dt-center', targets = 0:14)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(15:16)), list(className = 'dt-center', targets = 0:14)), dom = 't', scrollX = TRUE)) ``` 15. **variableStartLabel:** As per CCHS documentation, the label for this variable is `BMI / self-report - (D,G)`. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(16)), list(className = 'dt-center', targets = 0:15)), dom = 't', scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(visible=FALSE, targets=c(16)), list(className = 'dt-center', targets = 0:15)), dom = 't', scrollX = TRUE)) ``` 16. **notes: ** As described previously, there are differences between CCHS surveys with regards to coding the not applicable and missing categories. These are documented in this section. Aside from this, there are other changes and differences that should also be documented. In the 2001 CCHS survey, this variable was restricted to participants aged 20-64. As well, don't know (999.97) and refusal (999.98) were not asked in this survey. ```{r, echo=FALSE, warning=FALSE} library(DT) -datatable(varDetails[c(338:343), ], options=list(columnDefs = list(list(className = 'dt-center', targets = 0:16)), pageLength = 5, scrollX = TRUE)) +datatable(varDetails[c(392:397), ], options=list(columnDefs = list(list(className = 'dt-center', targets = 0:16)), pageLength = 5, scrollX = TRUE)) ``` diff --git a/Vignettes/variablesSheet.Rmd b/Vignettes/variablesSheet.Rmd index 14e6e58c..44e1b8ed 100644 --- a/Vignettes/variablesSheet.Rmd +++ b/Vignettes/variablesSheet.Rmd @@ -1,6 +1,5 @@ --- -title: "`Variables.csv` worksheet" -date: 'Last updated: 2019-08-30' +title: "variables.csv" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{2 - variables.csv} @@ -15,16 +14,9 @@ knitr::opts_chunk$set( ) ``` -## **Introduction** -At the heart of `cchsflow` are two worksheets (CSV files) that describe how to transform variables different CCHS cycles into common variables: `variables.csv` and `variableDetails.csv`. +## Introduction -## **Two worksheets in cchsFlow** -1) `variables.csv` --- a list of all variable transformation that are currently included in `cchsflow`. Also included labels, sections (groups of variables in common themes), variable type, and units. -2) `variableDetails.csv` --- details on how all variables are transformed. Included is information on categories for all variables for all CCHS cycles from 2001 to 2014. Also included are category labels for both the new transformed variable and also the original variables. Notes are included when variables cannot be easily combined together because categories are not identical between surveys. - -This vignette describes `variables.csv` worksheet. See also the vignette `variableDetails.csv`. The vig `Transform CCHS variables` provides examples of how to use the two worksheeets with the [`bllflow` package](http://bllflow.projectbiglife.ca) to transform variables. - -## `variables.csv` +This vignette describes `variables.csv` worksheet. See also the vignette `variableDetails.csv`. The vignette `Transform CCHS variables` provides examples of how to use the two worksheeets with the [`bllflow` package](http://bllflow.projectbiglife.ca) to transform variables. Read `variables.csv` diff --git a/_pkgdown.yml b/_pkgdown.yml index e35327dc..8f6cb025 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -7,3 +7,23 @@ template: development: mode: auto +home: + links: + - text: Calculators + href: https://www.projectbiglife.ca + +navbar: + structure: + left: [home, articles, reference] + right: github + components: + articles: + text: Worksheets + menu: + - text: Using cchsflow + href: articles/usingcchsflow.html + - text: variables.csv sheet + href: articles/variablesSheet.html + - text: variableDetails.csv sheet + href: articles/variableDetails.html + diff --git a/docs/404.html b/docs/404.html index c46c9b7a..5266089d 100644 --- a/docs/404.html +++ b/docs/404.html @@ -75,12 +75,9 @@ -
  • - Reference -
  • - `variableDetails.csv` worksheet + variables.csv sheet
  • - `Variables.csv` worksheet + variableDetails.csv sheet
  • + +
  • + Reference
  • diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html index 8c9c9df5..9f345cc2 100644 --- a/docs/CONTRIBUTING.html +++ b/docs/CONTRIBUTING.html @@ -75,12 +75,9 @@ -
  • - Reference -
  • - `variableDetails.csv` worksheet + variables.csv sheet
  • - `Variables.csv` worksheet + variableDetails.csv sheet
  • + +
  • + Reference
  • diff --git a/docs/articles/index.html b/docs/articles/index.html index 871fa9c8..dfd28991 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -75,12 +75,9 @@ -
  • - Reference -
  • - `variableDetails.csv` worksheet + variables.csv sheet
  • - `Variables.csv` worksheet + variableDetails.csv sheet
  • + +
  • + Reference
  • @@ -127,8 +127,8 @@

    All vignettes

    diff --git a/docs/articles/usingcchsflow.html b/docs/articles/usingcchsflow.html index 9aa40b60..f5be3f2d 100644 --- a/docs/articles/usingcchsflow.html +++ b/docs/articles/usingcchsflow.html @@ -43,12 +43,9 @@ -
  • - Reference -
  • - `variableDetails.csv` worksheet + variables.csv sheet
  • - `Variables.csv` worksheet + variableDetails.csv sheet
  • + +
  • + Reference