From 321885f221038c388810130771372ff875e59728 Mon Sep 17 00:00:00 2001 From: Jarno Rantanen Date: Fri, 24 Apr 2020 11:45:30 +0300 Subject: [PATCH 1/3] Add stringLiteralUnionFields helper to model module. --- src/common/io.ts | 6 ++++++ src/common/model.ts | 26 +++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/common/io.ts b/src/common/io.ts index 92a3fccd..c5d7a1b5 100644 --- a/src/common/io.ts +++ b/src/common/io.ts @@ -1,4 +1,5 @@ import * as t from 'io-ts'; +import { LiteralType } from 'io-ts'; function defineRegexValidatedStringType(name: string, regex: RegExp) { const guard = (input: unknown): input is string => typeof input === 'string' && !!input.match(regex); @@ -41,3 +42,8 @@ export const generalWellbeing = t.union([t.literal('fine'), t.literal('impaired' // range of 0–99 days export const duration = defineRegexValidatedStringType('symptomsDuration', /^[0-9]{1,2}$/); + +// @example isStringLiteralType(fever) => false +// @example isStringLiteralType(fever.types[0]) => true +export const isStringLiteralType = (x: unknown): x is LiteralType => + x instanceof LiteralType && typeof x.value === 'string'; diff --git a/src/common/model.ts b/src/common/model.ts index f03bf78e..b83a7e3a 100644 --- a/src/common/model.ts +++ b/src/common/model.ts @@ -1,5 +1,6 @@ import { isRight } from 'fp-ts/lib/Either'; import * as t from 'io-ts'; +import { UnionType } from 'io-ts'; import { PathReporter } from 'io-ts/lib/PathReporter'; import { AbuseScore } from '../backend/abuseDetection'; import { @@ -10,16 +11,18 @@ import { gender, generalWellbeing, iso8601DateString, + isStringLiteralType, notAnswered, postalCode, uuidString, yesOrNo, } from './io'; +import { nonNullable } from './types'; // Because AWS Athena prefers lower-case column names (https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html), // we use snake case for some of these models, instead of camel case (https://en.wikipedia.org/wiki/Letter_case#Special_case_styles). -const responseFields = { +export const responseFields = { fever: fever, cough: cough, breathing_difficulties: yesOrNo, @@ -38,6 +41,7 @@ const responseFields = { gender: gender, postal_code: postalCode, }; +export const responseFieldKeys = (Object.keys(responseFields) as any) as Array; // this is theoretically unsafe (https://stackoverflow.com/a/55012175) but practically a lot safer than going with string[] ¯\_(ツ)_/¯ export const FrontendResponseModel = t.strict( { @@ -84,3 +88,23 @@ export function assertIs>(codec: C): (x: unknown) => t.T } }; } + +// Defines tuples describing all response fields that are simple unions of string literals. +// @example [ +// [ 'healthcare_contact', [ 'yes', 'no' ] ], +// [ 'general_wellbeing', [ 'fine', 'impaired', 'bad' ] ], +// ... +// ] +// What makes these fields special is that their values are easy to GROUP BY, SUM() etc in queries. +export const stringLiteralUnionFields: [string, string[]][] = responseFieldKeys + .map(key => + responseFields[key] instanceof UnionType + ? ([ + key, + (responseFields[key] as any).types // TODO: Assert that responseFields[key] is UnionType> instead (how?), to get rid of the awkward any + .map((t: unknown) => (isStringLiteralType(t) ? t.value : null)) + .filter(nonNullable), + ] as [string, string[]]) + : null, + ) + .filter(nonNullable); From d915503501c119e659cceda3e287eab194972ebd Mon Sep 17 00:00:00 2001 From: Jarno Rantanen Date: Fri, 24 Apr 2020 11:47:15 +0300 Subject: [PATCH 2/3] Add query for getting the daily totals from Athena. --- src/backend/queries.ts | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/backend/queries.ts b/src/backend/queries.ts index 05151da9..36d0a9a5 100644 --- a/src/backend/queries.ts +++ b/src/backend/queries.ts @@ -1,3 +1,6 @@ +import { flatten } from 'lodash'; +import { stringLiteralUnionFields } from '../common/model'; + export const totalResponsesQuery = 'SELECT COUNT(*) as total_responses FROM responses'; export const postalCodeLevelDataQuery = `SELECT postal_code, @@ -34,3 +37,32 @@ export const postalCodeLevelDataQuery = `SELECT postal_code, FROM responses WHERE (country_code = 'FI' or country_code = '') GROUP BY postal_code ORDER BY responses DESC`; + +export const dailyTotalsQuery = ` +SELECT + day, + COUNT(*) AS total, + ${flatten( + stringLiteralUnionFields.map(([field, values]) => + values.map(value => `SUM(${field}_${value}) AS ${field}_${value}`), + ), + ).join(',\n')} +FROM + ( + SELECT + SUBSTR(timestamp, 1, 10) AS day, + ${flatten( + stringLiteralUnionFields.map(([field, values]) => + values.map(value => `IF(${field} = '${value}', 1, 0) AS ${field}_${value}`), + ), + ).join(',\n')} + FROM + responses + WHERE + country_code = 'FI' + OR + country_code = '' + ) +GROUP BY day +ORDER BY day +`; From aaeb0d49ca025a37ced3af504a76329ca5442eca Mon Sep 17 00:00:00 2001 From: Jarno Rantanen Date: Fri, 24 Apr 2020 11:47:48 +0300 Subject: [PATCH 3/3] Include population of daily_totals.json in storeDataDumpsToS3(). --- src/backend/main.ts | 60 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/src/backend/main.ts b/src/backend/main.ts index 55a1effc..0281999f 100644 --- a/src/backend/main.ts +++ b/src/backend/main.ts @@ -2,8 +2,15 @@ import AthenaExpress from 'athena-express'; import * as AWS from 'aws-sdk'; import { createHash } from 'crypto'; import { readFileSync } from 'fs'; +import { fromPairs } from 'lodash'; import { v4 as uuidV4 } from 'uuid'; -import { assertIs, BackendResponseModel, BackendResponseModelT, FrontendResponseModelT } from '../common/model'; +import { + assertIs, + BackendResponseModel, + BackendResponseModelT, + FrontendResponseModelT, + stringLiteralUnionFields, +} from '../common/model'; import { AbuseFingerprint, AbuseScore, @@ -12,7 +19,7 @@ import { performAbuseDetection, } from './abuseDetection'; import { mapPostalCode } from './postalCode'; -import { postalCodeLevelDataQuery, totalResponsesQuery } from './queries'; +import { dailyTotalsQuery, postalCodeLevelDataQuery, totalResponsesQuery } from './queries'; import { getSecret } from './secrets'; export const APP_VERSION = process.env.AWS_EXECUTION_ENV @@ -134,9 +141,10 @@ export async function storeDataDumpsToS3() { // // Perform Athena queries in parallel - const [totalResponsesResult, postalCodeLevelDataResponse] = await Promise.all([ + const [totalResponsesResult, postalCodeLevelDataResponse, dailyTotalsResponse] = await Promise.all([ athenaExpress.query({ sql: totalResponsesQuery, db }), athenaExpress.query({ sql: postalCodeLevelDataQuery, db }), + athenaExpress.query({ sql: dailyTotalsQuery, db }), ]); // @@ -147,6 +155,12 @@ export async function storeDataDumpsToS3() { const cityLevelData = await mapPostalCodeLevelToCityLevelData(postalCodeLevelDataResponse.Items, bucket); + const dailyTotalsData = dailyTotalsResponse.Items.map((item: any) => ({ + day: item.day, + total: item.total, + ...collateDailyTotalItem(item), + })); + // // Push data to S3 @@ -178,12 +192,26 @@ export async function storeDataDumpsToS3() { data: cityLevelData, }, }), + + s3PutJsonHelper({ + Bucket: bucket, + Key: 'daily_totals.json', + Body: { + meta: { + description: 'Total responses and field-specific totals per each day.', + generated: new Date().toISOString(), + link: `https://${domain}/daily_totals.json`, + }, + data: dailyTotalsData, + }, + }), ]); } const openDataFileNames = [ 'total_responses', 'city_level_general_results', + 'daily_totals', 'low_population_postal_codes', 'population_per_city', 'postalcode_city_mappings', @@ -312,6 +340,32 @@ async function mapPostalCodeLevelToCityLevelData(postalCodeLevelData: any[], buc return Object.values(resultsByCity); } +// @example collateDailyTotalItem({ +// day: '2020-03-26', +// total: '5823', +// fever_no: '5126', +// fever_slight: '623', +// fever_high: '74', +// ... +// }) => { +// day: '2020-03-26', +// total: 5823, +// fever: { no: 5126, slight: 623, high: 74 }, +// ... +// } +function collateDailyTotalItem(item: any) { + return { + day: item.day, + total: Number(item.total), + ...fromPairs( + stringLiteralUnionFields.map(([field, values]) => [ + field, + fromPairs(values.map(value => [value, Number(item[`${field}_${value}`])])), + ]), + ), + }; +} + // // S3 helpers