-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpg_diffix--fir.sql
491 lines (422 loc) · 13.9 KB
/
pg_diffix--fir.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION pg_diffix" to load this file. \quit
REVOKE ALL ON SCHEMA @extschema@ FROM PUBLIC;
REVOKE ALL ON ALL TABLES IN SCHEMA @extschema@ FROM PUBLIC;
GRANT USAGE ON SCHEMA @extschema@ TO PUBLIC;
DO $$ BEGIN
-- Generate a random salt for the current database.
EXECUTE 'ALTER DATABASE ' || current_database() || ' SET pg_diffix.salt TO ''' || gen_random_uuid() || '''';
END $$ LANGUAGE plpgsql;
/* ----------------------------------------------------------------
* Internal functions
* ----------------------------------------------------------------
*/
CREATE FUNCTION placeholder_func(anyelement)
RETURNS anyelement
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION placeholder_func(anyelement, "any")
RETURNS anyelement
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION internal_qual_wrapper(boolean)
RETURNS boolean
AS 'MODULE_PATHNAME'
LANGUAGE C VOLATILE
SECURITY INVOKER SET search_path = '';
/* ----------------------------------------------------------------
* Utilities
* ----------------------------------------------------------------
*/
CREATE FUNCTION access_level()
RETURNS text
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION show_settings()
RETURNS table(name text, setting text, short_desc text)
LANGUAGE SQL
AS $$
SELECT
name, setting, short_desc
FROM pg_settings
WHERE name LIKE 'pg_diffix.%';
$$
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION show_labels()
RETURNS table(objtype text, objname text, label text)
LANGUAGE SQL
AS $$
SELECT objtype, objname, label
FROM pg_seclabels
WHERE provider = 'pg_diffix'
ORDER BY
CASE WHEN objtype = 'table' THEN 1 WHEN objtype = 'column' THEN 2 WHEN objtype = 'role' THEN 3 END,
objname;
$$
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION unnest_histogram(a ANYARRAY, OUT a_1d ANYARRAY)
RETURNS SETOF ANYARRAY
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE STRICT AS
$$
BEGIN
IF array_length(a, 1) > 0 THEN
FOREACH a_1d SLICE 1 IN ARRAY a LOOP
RETURN NEXT;
END LOOP;
END IF;
END
$$;
CREATE PROCEDURE mark_personal(table_name text, variadic aid_columns text[])
AS $$
DECLARE
aid_column text;
BEGIN
DELETE FROM pg_catalog.pg_seclabel WHERE provider = 'pg_diffix' AND objoid = table_name::regclass::oid AND label = 'aid';
EXECUTE 'SECURITY LABEL FOR pg_diffix ON TABLE ' || table_name || ' IS ''personal''';
FOREACH aid_column IN ARRAY aid_columns LOOP
EXECUTE 'SECURITY LABEL FOR pg_diffix ON COLUMN ' || table_name || '.' || aid_column || ' IS ''aid''';
END LOOP;
END;
$$ LANGUAGE plpgsql;
CREATE PROCEDURE mark_public(table_name text)
AS $$
BEGIN
DELETE FROM pg_catalog.pg_seclabel WHERE provider = 'pg_diffix' AND objoid = table_name::regclass::oid AND label = 'aid';
EXECUTE 'SECURITY LABEL FOR pg_diffix ON TABLE ' || table_name || ' IS ''public''';
END;
$$ LANGUAGE plpgsql;
CREATE PROCEDURE unmark_table(table_name text)
AS $$
BEGIN
DELETE FROM pg_catalog.pg_seclabel WHERE provider = 'pg_diffix' AND objoid = table_name::regclass::oid;
END;
$$ LANGUAGE plpgsql;
CREATE TYPE AccessLevel AS ENUM ('direct', 'anonymized_trusted', 'anonymized_untrusted');
CREATE PROCEDURE mark_role(role_name text, access_level AccessLevel)
AS $$
BEGIN
EXECUTE 'SECURITY LABEL FOR pg_diffix ON ROLE ' || quote_ident(role_name) || ' IS ''' || access_level || '''';
END;
$$ LANGUAGE plpgsql;
CREATE PROCEDURE unmark_role(role_name text)
AS $$
BEGIN
EXECUTE 'SECURITY LABEL FOR pg_diffix ON ROLE ' || quote_ident(role_name) || ' IS NULL';
END;
$$ LANGUAGE plpgsql;
CREATE PROCEDURE assert_column_is_not_aid(table_name text, column_name text)
AS $$
DECLARE
column_subid integer;
table_id integer;
is_aid_column boolean;
BEGIN
table_id := table_name::regclass::oid;
EXECUTE 'SELECT attnum FROM pg_catalog.pg_attribute
WHERE attrelid = ' || table_id || ' AND
NOT attisdropped AND
attname = ' || quote_literal(column_name) INTO STRICT column_subid;
EXECUTE 'SELECT EXISTS (SELECT FROM pg_catalog.pg_seclabel
WHERE provider = ''pg_diffix'' AND
objoid = ' || table_id || ' AND
objsubid = ' || column_subid || ' AND
label = ''aid'')' INTO is_aid_column;
IF is_aid_column THEN
RAISE EXCEPTION 'Column `%` is already marked as an AID.', column_name;
END IF;
END;
$$ LANGUAGE plpgsql;
CREATE PROCEDURE mark_not_filterable(table_name text, column_name text)
AS $$
BEGIN
CALL diffix.assert_column_is_not_aid(table_name, column_name);
EXECUTE 'SECURITY LABEL FOR pg_diffix ON COLUMN ' || table_name || '.' || column_name || ' IS ''not_filterable''';
END;
$$ LANGUAGE plpgsql;
CREATE PROCEDURE mark_filterable(table_name text, column_name text)
AS $$
BEGIN
CALL diffix.assert_column_is_not_aid(table_name, column_name);
EXECUTE 'SECURITY LABEL FOR pg_diffix ON COLUMN ' || table_name || '.' || column_name || ' IS NULL';
END;
$$ LANGUAGE plpgsql;
/* ----------------------------------------------------------------
* Common aggregation interface
* ----------------------------------------------------------------
*/
/*
* AnonAggState is a pointer in disguise. We want Postgres to pass it by value to avoid unintended data copying.
*
* The AnonAggState data is supposed to be finalized by the parent BucketScan.
* As a raw value, it cannot be used in expressions as it does not support any operators/functions.
* Projections of original aggregates are delayed until after finalization in the BucketScan node.
*
* However, an AnonAggState retrieved by a direct call to anonymizing aggregators may be inspected in the query output.
* Serialization is handled by `anon_agg_state_output`, which forwards it to the aggregate's explain implementation.
* The parse function `anon_agg_state_input` is a stub which will always throw an error.
*
* If anonymizing aggregators are invoked directly by SQL in a non-anonymizing query, then the AnonAggState
* will be allocated in the aggregation context of the current Agg node. Passing an AnonAggState up
* (for example from a subquery) outside of the intended scope may result in memory corruption.
*
* See `aggregation/common.h` for more info.
*/
CREATE TYPE AnonAggState;
CREATE FUNCTION anon_agg_state_input(cstring)
RETURNS AnonAggState
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT STABLE
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION anon_agg_state_output(AnonAggState)
RETURNS cstring
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT STABLE
SECURITY INVOKER SET search_path = '';
CREATE TYPE AnonAggState (
INPUT = anon_agg_state_input,
OUTPUT = anon_agg_state_output,
LIKE = internal
);
CREATE FUNCTION anon_agg_state_transfn(AnonAggState, variadic aids "any")
RETURNS AnonAggState
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION anon_agg_state_transfn(AnonAggState, value "any", variadic aids "any")
RETURNS AnonAggState
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION anon_agg_state_transfn(AnonAggState, arg1 "any", arg2 "any", variadic aids "any")
RETURNS AnonAggState
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION anon_agg_state_finalfn(AnonAggState, variadic aids "any")
RETURNS AnonAggState
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION anon_agg_state_finalfn(AnonAggState, value "any", variadic aids "any")
RETURNS AnonAggState
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION anon_agg_state_finalfn(AnonAggState, arg1 "any", arg2 "any", variadic aids "any")
RETURNS AnonAggState
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
/* ----------------------------------------------------------------
* Non-anonymizing aggregators
* ----------------------------------------------------------------
*/
CREATE AGGREGATE count_noise(*) (
sfunc = placeholder_func,
stype = float8,
initcond = 0.0
);
CREATE AGGREGATE count_noise(value "any") (
sfunc = placeholder_func,
stype = float8,
initcond = 0.0
);
CREATE AGGREGATE sum_noise(value "any") (
sfunc = placeholder_func,
stype = float8,
initcond = 0.0
);
CREATE AGGREGATE avg_noise(value "any") (
sfunc = placeholder_func,
stype = float8,
initcond = 0.0
);
/*
* count_histogram
*/
CREATE FUNCTION count_histogram_transfn(internal, value "any")
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION count_histogram_transfn(internal, value "any", bin_size bigint)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION count_histogram_finalfn(internal)
RETURNS bigint[][]
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE
SECURITY INVOKER SET search_path = '';
CREATE AGGREGATE count_histogram(value "any") (
sfunc = count_histogram_transfn,
stype = internal,
finalfunc = count_histogram_finalfn
);
CREATE AGGREGATE count_histogram(value "any", bin_size bigint) (
sfunc = count_histogram_transfn,
stype = internal,
finalfunc = count_histogram_finalfn
);
/* ----------------------------------------------------------------
* Anonymizing aggregators
* ----------------------------------------------------------------
*/
/*
* Aggregates sharing the same inputs and transition functions can get merged
* into a single transition calculation. We mark finalfunc_modify=read_write
* to force a unique state for each anonymizing aggregator.
*/
CREATE AGGREGATE low_count(variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
CREATE AGGREGATE anon_count_distinct(value "any", variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
CREATE AGGREGATE anon_count_star(variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
CREATE AGGREGATE anon_count_value(value "any", variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
CREATE AGGREGATE anon_sum(value "any", variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
CREATE AGGREGATE anon_count_histogram(aid_index integer, bin_size bigint, variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
CREATE AGGREGATE anon_count_distinct_noise(value "any", variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
CREATE AGGREGATE anon_count_star_noise(variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
CREATE AGGREGATE anon_count_value_noise(value "any", variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
CREATE AGGREGATE anon_sum_noise(value "any", variadic aids "any") (
sfunc = anon_agg_state_transfn,
stype = AnonAggState,
finalfunc = anon_agg_state_finalfn,
finalfunc_extra = true,
finalfunc_modify = read_write
);
/* ----------------------------------------------------------------
* Bucket-specific aggregates
* ----------------------------------------------------------------
*/
CREATE AGGREGATE is_suppress_bin(*) (
sfunc = placeholder_func,
stype = boolean,
initcond = false
);
/* ----------------------------------------------------------------
* Scalar functions
* ----------------------------------------------------------------
*/
CREATE FUNCTION round_by(value numeric, amount numeric)
RETURNS numeric AS $$
BEGIN
IF amount <= 0 THEN
RETURN NULL;
ELSE
RETURN round(value / amount) * amount;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION round_by(value double precision, amount double precision)
RETURNS double precision AS $$
BEGIN
IF amount <= 0 THEN
RETURN NULL;
ELSE
RETURN round(value / amount) * amount;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION ceil_by(value numeric, amount numeric)
RETURNS numeric AS $$
BEGIN
IF amount <= 0 THEN
RETURN NULL;
ELSE
RETURN ceil(value / amount) * amount;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION ceil_by(value double precision, amount double precision)
RETURNS double precision AS $$
BEGIN
IF amount <= 0 THEN
RETURN NULL;
ELSE
RETURN ceil(value / amount) * amount;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION floor_by(value numeric, amount numeric)
RETURNS numeric AS $$
BEGIN
IF amount <= 0 THEN
RETURN NULL;
ELSE
RETURN floor(value / amount) * amount;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT
SECURITY INVOKER SET search_path = '';
CREATE FUNCTION floor_by(value double precision, amount double precision)
RETURNS double precision AS $$
BEGIN
IF amount <= 0 THEN
RETURN NULL;
ELSE
RETURN floor(value / amount) * amount;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT
SECURITY INVOKER SET search_path = '';