Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(audience): validate and anonymize reported audience #235

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
3 changes: 2 additions & 1 deletion modules/defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@
import { fflags } from './fflags.js';

export const KNOWN_PROPERTIES = ['weight', 'id', 'referer', 'checkpoint', 't', 'source', 'target', 'cwv', 'CLS', 'FID', 'LCP', 'INP', 'TTFB'];
export const DEFAULT_TRACKING_EVENTS = ['click', 'cwv', 'form', 'viewblock', 'viewmedia', 'loadresource', 'utm', 'paid', 'email', 'consent'];
export const DEFAULT_TRACKING_EVENTS = ['click', 'cwv', 'form', 'viewblock', 'viewmedia', 'loadresource', 'utm', 'paid', 'email', 'consent', 'experiment', 'audience'];

fflags.enabled('example', () => DEFAULT_TRACKING_EVENTS.push('example'));
50 changes: 49 additions & 1 deletion modules/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
/* eslint-env browser */

import { KNOWN_PROPERTIES, DEFAULT_TRACKING_EVENTS } from './defaults.js';
import { urlSanitizers } from './utils.js';
import { dataValidator, dataPreProcessor, urlSanitizers } from './utils.js';
import { targetSelector, sourceSelector } from './dom.js';
import {
addAdsParametersTracking,
Expand Down Expand Up @@ -227,6 +227,7 @@ function getIntersectionObsever(checkpoint) {
});
return observer;
}

function addViewBlockTracking(element) {
const blockobserver = getIntersectionObsever('viewblock');
if (blockobserver) {
Expand Down Expand Up @@ -256,6 +257,51 @@ function addFormTracking(parent) {
});
}

function addDataAttributeTracking(
checkpoint,
attrs,
conditionFn = () => true,
mapFn = (el) => ({ target: targetSelector(el), source: sourceSelector(el) }),
) {
const handler = (mutations) => {
mutations
.filter(conditionFn)
.forEach((m) => {
let data = mapFn(m.target);
if (!dataValidator[checkpoint] || dataValidator[checkpoint](data)) {
if (dataPreProcessor[checkpoint]) {
data = dataPreProcessor[checkpoint](data);
}
sampleRUM(checkpoint, data);
}
});
};
const observer = window.MutationObserver ? new MutationObserver(handler) : null;
if (observer) {
observer.observe(document.body, {
childList: true, subtree: true, attributes: true, attributeFilter: attrs,
});
}
}

function addExperimentTracking() {
addDataAttributeTracking(
'experiment',
['data-experiment', 'data-variant'],
(el) => el.dataset && el.dataset.experiment && el.dataset.variant,
(el) => ({ source: el.dataset.experiment, target: el.dataset.variant }),
);
}

function addAudienceTracking() {
addDataAttributeTracking(
'audience',
['data-audience'],
(el) => el.dataset && document.body.dataset.audiences && el.dataset.audience,
(el) => ({ source: document.body.dataset.audiences, target: el.dataset.audience }),
);
}

function addObserver(ck, fn, block) {
return DEFAULT_TRACKING_EVENTS.includes(ck) && fn(block);
}
Expand Down Expand Up @@ -295,6 +341,8 @@ function addTrackingFromConfig() {
addCookieConsentTracking(sampleRUM);
addAdsParametersTracking(sampleRUM);
addEmailParameterTracking(sampleRUM);
addExperimentTracking();
addAudienceTracking();
fflags.enabled('language', () => {
const target = navigator.language;
const source = document.documentElement.lang;
Expand Down
45 changes: 45 additions & 0 deletions modules/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* governing permissions and limitations under the License.
*/

const DIFFERENTIAL_SELECTION_PROBABILITY = 0.6;
const { href } = window.location;

export const urlSanitizers = {
Expand Down Expand Up @@ -38,3 +39,47 @@ export const urlSanitizers = {
return `${u.origin}${u.pathname}`;
},
};

/**
* A map of validators that take the RUM data object as input and return a boolean indicating
* whether the data is valid and not and should be submitted to the backend.
*/
export const dataValidator = {
audience: (data) => !!(data.source
&& data.source.match(/^[\w-]+$/)
&& data.target
&& data.target.match(/^[\w-,]+$/)
&& ['default', ...data.target.split(',')].includes(data.source)),
experiment: (data) => !!(data.source
&& data.source.match(/^[\w-]+$/)
&& data.target
&& data.target.match(/^[\w-]+$/)),
};

/**
* Randomly anonymize the audience to dillute potential PII.
* @param {Object} data The RUM data for the audience
* @param {Object} data.source The source info for the event
* @param {Object} data.target The target info for the event
* @returns the modified data
*/
function anonymizeAudience({ source, target } = {}) {
const allAudiences = ['default', ...(source?.split(',') || [])];
const isRandomized = Math.random() < DIFFERENTIAL_SELECTION_PROBABILITY;
if (isRandomized) {
const randomAudience = Math.floor(Math.random() * allAudiences.length);
// eslint-disable-next-line no-param-reassign
source = allAudiences[randomAudience];
}
// eslint-disable-next-line no-param-reassign, no-unused-vars
target = [...new Set(['default', ...target.split(',')]).values()].join(':');
return { source, target };
}

/**
* A map of processors that take the RUM data object as input and manipulate it before it is sent
* to the backend.
*/
export const dataPreProcessor = {
audience: (data) => anonymizeAudience(data),
};
69 changes: 68 additions & 1 deletion test/unit/utils.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
*/

/* eslint-env mocha */
/* eslint-disable no-unused-expressions */

import { expect } from '@esm-bundle/chai';
import { urlSanitizers } from '../../modules/utils.js';
import { dataValidator, dataPreProcessor, urlSanitizers } from '../../modules/utils.js';

describe('test utils#urlSanitizers', () => {
it('urlSanitizers.full', () => {
Expand Down Expand Up @@ -81,3 +82,69 @@ describe('test utils#urlSanitizers', () => {
expect(urlSanitizers.path('http://localhost:3000/path/page.html?a=1&b=2')).to.be.equal('http://localhost:3000/path/page.html');
});
});

describe('test utils#dataValidator', () => {
describe('audience', () => {
it('has a validator for the "audience" checkpoint', () => {
expect(dataValidator.audience).to.be.ok;
});

it('validates that source and target are proper identifiers', () => {
expect(dataValidator.audience({ source: 'foo', target: 'foo' })).to.be.true;
expect(dataValidator.audience({ source: 'f-o-o', target: 'f-o-o' })).to.be.true;
expect(dataValidator.audience({ source: 'f_o_o', target: 'f_o_o' })).to.be.true;
expect(dataValidator.audience({ source: 'f00', target: 'f00' })).to.be.true;
expect(dataValidator.audience({ source: 'foo', target: 'foo,bar,baz' })).to.be.true;
expect(dataValidator.audience({ source: 'default', target: 'foo,bar,baz' })).to.be.true;

expect(dataValidator.audience({ source: 'foo', target: 'bar,baz' })).to.be.false;
expect(dataValidator.audience({ source: 'foo bar', target: 'baz qux' })).to.be.false;
expect(dataValidator.audience({ source: 'foo!', target: 'foo!' })).to.be.false;
});
});

describe('experiment', () => {
it('has a validator for the "experiment" checkpoint', () => {
expect(dataValidator.experiment).to.be.ok;
});

it('validates that source and target are proper identifiers', () => {
expect(dataValidator.experiment({ source: 'foo', target: 'bar' })).to.be.true;
expect(dataValidator.experiment({ source: 'f-o-o', target: 'b-a-r' })).to.be.true;
expect(dataValidator.experiment({ source: 'f_o_o', target: 'b_a_r' })).to.be.true;
expect(dataValidator.experiment({ source: 'f00', target: 'b4r' })).to.be.true;

expect(dataValidator.experiment({ source: 'foo', target: 'bar:baz' })).to.be.false;
expect(dataValidator.experiment({ source: 'foo bar', target: 'baz qux' })).to.be.false;
expect(dataValidator.experiment({ source: 'foo!', target: 'bar?' })).to.be.false;
});
});
});

describe('test utils#dataPreProcessor', () => {
describe('audience', () => {
let rnd;

before(() => {
rnd = Math.random();
});

after(() => {
Math.random = rnd;
});

it('returns the original audience if we are above the randomization threshold', () => {
Math.random = () => 0.6;
const data = dataPreProcessor.audience({ source: 'foo', target: 'foo,bar' });
expect(data.source).to.eq('foo');
expect(data.target).to.eq('default:foo:bar');
});

it('returns a random audience if we are below the randomization threshold', () => {
Math.random = () => 0.59;
const data = dataPreProcessor.audience({ source: 'foo', target: 'foo,bar' });
expect(['default', 'foo', 'bar'].includes(data.source)).to.true;
expect(data.target).to.eq('default:foo:bar');
});
});
});
Loading