Skip to content

Commit

Permalink
Merge pull request #266 from futurice/add-daily-totals-data-dump
Browse files Browse the repository at this point in the history
Add daily totals data dump
  • Loading branch information
jareware authored Apr 24, 2020
2 parents 3b1ff00 + aaeb0d4 commit ab7fad9
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 4 deletions.
60 changes: 57 additions & 3 deletions src/backend/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@ import AthenaExpress from 'athena-express';
import * as AWS from 'aws-sdk';
import { createHash } from 'crypto';
import { readFileSync } from 'fs';
import { fromPairs } from 'lodash';
import { v4 as uuidV4 } from 'uuid';
import { assertIs, BackendResponseModel, BackendResponseModelT, FrontendResponseModelT } from '../common/model';
import {
assertIs,
BackendResponseModel,
BackendResponseModelT,
FrontendResponseModelT,
stringLiteralUnionFields,
} from '../common/model';
import {
AbuseFingerprint,
AbuseScore,
Expand All @@ -12,7 +19,7 @@ import {
performAbuseDetection,
} from './abuseDetection';
import { mapPostalCode } from './postalCode';
import { postalCodeLevelDataQuery, totalResponsesQuery } from './queries';
import { dailyTotalsQuery, postalCodeLevelDataQuery, totalResponsesQuery } from './queries';
import { getSecret } from './secrets';

export const APP_VERSION = process.env.AWS_EXECUTION_ENV
Expand Down Expand Up @@ -131,9 +138,10 @@ export async function storeDataDumpsToS3() {
//
// Perform Athena queries in parallel

const [totalResponsesResult, postalCodeLevelDataResponse] = await Promise.all([
const [totalResponsesResult, postalCodeLevelDataResponse, dailyTotalsResponse] = await Promise.all([
athenaExpress.query({ sql: totalResponsesQuery, db }),
athenaExpress.query({ sql: postalCodeLevelDataQuery, db }),
athenaExpress.query({ sql: dailyTotalsQuery, db }),
]);

//
Expand All @@ -144,6 +152,12 @@ export async function storeDataDumpsToS3() {

const cityLevelData = await mapPostalCodeLevelToCityLevelData(postalCodeLevelDataResponse.Items, bucket);

const dailyTotalsData = dailyTotalsResponse.Items.map((item: any) => ({
day: item.day,
total: item.total,
...collateDailyTotalItem(item),
}));

//
// Push data to S3

Expand Down Expand Up @@ -175,12 +189,26 @@ export async function storeDataDumpsToS3() {
data: cityLevelData,
},
}),

s3PutJsonHelper({
Bucket: bucket,
Key: 'daily_totals.json',
Body: {
meta: {
description: 'Total responses and field-specific totals per each day.',
generated: new Date().toISOString(),
link: `https://${domain}/daily_totals.json`,
},
data: dailyTotalsData,
},
}),
]);
}

const openDataFileNames = [
'total_responses',
'city_level_general_results',
'daily_totals',
'low_population_postal_codes',
'population_per_city',
'postalcode_city_mappings',
Expand Down Expand Up @@ -309,6 +337,32 @@ async function mapPostalCodeLevelToCityLevelData(postalCodeLevelData: any[], buc
return Object.values(resultsByCity);
}

// @example collateDailyTotalItem({
// day: '2020-03-26',
// total: '5823',
// fever_no: '5126',
// fever_slight: '623',
// fever_high: '74',
// ...
// }) => {
// day: '2020-03-26',
// total: 5823,
// fever: { no: 5126, slight: 623, high: 74 },
// ...
// }
function collateDailyTotalItem(item: any) {
return {
day: item.day,
total: Number(item.total),
...fromPairs(
stringLiteralUnionFields.map(([field, values]) => [
field,
fromPairs(values.map(value => [value, Number(item[`${field}_${value}`])])),
]),
),
};
}

//
// S3 helpers

Expand Down
32 changes: 32 additions & 0 deletions src/backend/queries.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import { flatten } from 'lodash';
import { stringLiteralUnionFields } from '../common/model';

export const totalResponsesQuery = 'SELECT COUNT(*) as total_responses FROM responses';

export const postalCodeLevelDataQuery = `SELECT postal_code,
Expand Down Expand Up @@ -34,3 +37,32 @@ export const postalCodeLevelDataQuery = `SELECT postal_code,
FROM responses WHERE (country_code = 'FI' or country_code = '')
GROUP BY postal_code
ORDER BY responses DESC`;

export const dailyTotalsQuery = `
SELECT
day,
COUNT(*) AS total,
${flatten(
stringLiteralUnionFields.map(([field, values]) =>
values.map(value => `SUM(${field}_${value}) AS ${field}_${value}`),
),
).join(',\n')}
FROM
(
SELECT
SUBSTR(timestamp, 1, 10) AS day,
${flatten(
stringLiteralUnionFields.map(([field, values]) =>
values.map(value => `IF(${field} = '${value}', 1, 0) AS ${field}_${value}`),
),
).join(',\n')}
FROM
responses
WHERE
country_code = 'FI'
OR
country_code = ''
)
GROUP BY day
ORDER BY day
`;
6 changes: 6 additions & 0 deletions src/common/io.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import * as t from 'io-ts';
import { LiteralType } from 'io-ts';

function defineRegexValidatedStringType(name: string, regex: RegExp) {
const guard = (input: unknown): input is string => typeof input === 'string' && !!input.match(regex);
Expand Down Expand Up @@ -41,3 +42,8 @@ export const generalWellbeing = t.union([t.literal('fine'), t.literal('impaired'

// range of 0–99 days
export const duration = defineRegexValidatedStringType('symptomsDuration', /^[0-9]{1,2}$/);

// @example isStringLiteralType(fever) => false
// @example isStringLiteralType(fever.types[0]) => true
export const isStringLiteralType = (x: unknown): x is LiteralType<string> =>
x instanceof LiteralType && typeof x.value === 'string';
26 changes: 25 additions & 1 deletion src/common/model.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { isRight } from 'fp-ts/lib/Either';
import * as t from 'io-ts';
import { UnionType } from 'io-ts';
import { PathReporter } from 'io-ts/lib/PathReporter';
import { AbuseScore } from '../backend/abuseDetection';
import {
Expand All @@ -10,16 +11,18 @@ import {
gender,
generalWellbeing,
iso8601DateString,
isStringLiteralType,
notAnswered,
postalCode,
uuidString,
yesOrNo,
} from './io';
import { nonNullable } from './types';

// Because AWS Athena prefers lower-case column names (https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html),
// we use snake case for some of these models, instead of camel case (https://en.wikipedia.org/wiki/Letter_case#Special_case_styles).

const responseFields = {
export const responseFields = {
fever: fever,
cough: cough,
breathing_difficulties: yesOrNo,
Expand All @@ -38,6 +41,7 @@ const responseFields = {
gender: gender,
postal_code: postalCode,
};
export const responseFieldKeys = (Object.keys(responseFields) as any) as Array<keyof typeof responseFields>; // this is theoretically unsafe (https://stackoverflow.com/a/55012175) but practically a lot safer than going with string[] ¯\_(ツ)_/¯

export const FrontendResponseModel = t.strict(
{
Expand Down Expand Up @@ -84,3 +88,23 @@ export function assertIs<C extends t.ExactC<any>>(codec: C): (x: unknown) => t.T
}
};
}

// Defines tuples describing all response fields that are simple unions of string literals.
// @example [
// [ 'healthcare_contact', [ 'yes', 'no' ] ],
// [ 'general_wellbeing', [ 'fine', 'impaired', 'bad' ] ],
// ...
// ]
// What makes these fields special is that their values are easy to GROUP BY, SUM() etc in queries.
export const stringLiteralUnionFields: [string, string[]][] = responseFieldKeys
.map(key =>
responseFields[key] instanceof UnionType
? ([
key,
(responseFields[key] as any).types // TODO: Assert that responseFields[key] is UnionType<Array<LiteralType>> instead (how?), to get rid of the awkward any
.map((t: unknown) => (isStringLiteralType(t) ? t.value : null))
.filter(nonNullable),
] as [string, string[]])
: null,
)
.filter(nonNullable);

0 comments on commit ab7fad9

Please sign in to comment.