Skip to content

Commit

Permalink
Include user agent for pseudoanonymized usage measurement (#491)
Browse files Browse the repository at this point in the history
Adds report_usage_id variable to jobs service.
Uses this variable to determine whether or not to include client_info when creating bigquery client.
Add unit test to check the opt-in/out works as expected for the client_info.
  • Loading branch information
tyroneschiff authored Mar 4, 2024
1 parent dfa3539 commit 2696581
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 5 deletions.
17 changes: 13 additions & 4 deletions backend/jobs/workers/bigquery/bq_worker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 Google Inc. All rights reserved.
# Copyright 2024 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -14,11 +14,11 @@

"""CRMint's abstract worker dealing with BigQuery."""


import os
import time

from google.api_core.client_info import ClientInfo
from google.cloud import bigquery

from jobs.workers import worker


Expand All @@ -42,7 +42,16 @@ class BQWorker(worker.Worker):
]

def _get_client(self):
return bigquery.Client(client_options={'scopes': self._SCOPES})
client_info = None
if 'REPORT_USAGE_ID' in os.environ:
client_id = os.getenv('REPORT_USAGE_ID')
opt_out = not bool(client_id)
if not opt_out:
client_info = ClientInfo(user_agent='cloud-solutions/crmint-usage-v3')
return bigquery.Client(
client_options={'scopes': self._SCOPES},
client_info=client_info,
)

def _get_prefix(self):
return f'{self._pipeline_id}_{self._job_id}_{self.__class__.__name__}'
Expand Down
46 changes: 46 additions & 0 deletions backend/tests/jobs/unit/workers/bq_worker_tests.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Tests for bq_worker."""

from unittest import mock
import os

from absl.testing import absltest
from absl.testing import parameterized

from google.auth import credentials
from google.cloud import bigquery
from google.api_core.client_info import ClientInfo

from jobs.workers import worker
from jobs.workers.bigquery import bq_worker
Expand Down Expand Up @@ -82,6 +85,49 @@ def test_generates_proper_bq_table_name_from_params(self):
self.assertEqual('a_project.a_dataset_id.a_table_id',
worker._generate_qualified_bq_table_name())

class BQWorkerGetClientTest(parameterized.TestCase):

@parameterized.parameters(
{
'report_usage_id_present': True,
'client_info_user_agent': 'cloud-solutions/crmint-usage-v3',
},
{
'report_usage_id_present': False,
'client_info_user_agent': None
},
)
def test_get_client_handles_report_usage_id(
self, report_usage_id_present, client_info_user_agent):
report_usage_id = 'some-usage-id' if report_usage_id_present else ''
with (
mock.patch.dict(
os.environ,
{'REPORT_USAGE_ID': report_usage_id}
if report_usage_id_present
else {},
),
mock.patch('os.getenv', return_value=report_usage_id) as getenv_mock,
mock.patch('google.cloud.bigquery.Client') as client_mock,
):
worker_inst = bq_worker.BQWorker({}, 0, 0)
worker_inst._get_client()

if report_usage_id_present:
getenv_mock.assert_called_with('REPORT_USAGE_ID')
else:
getenv_mock.assert_not_called()

client_mock.assert_called_once()
_, kwargs = client_mock.call_args
if report_usage_id_present:
self.assertIsInstance(kwargs['client_info'], ClientInfo)
self.assertEqual(
kwargs['client_info'].user_agent, client_info_user_agent
)
else:
self.assertIsNone(kwargs.get('client_info'))


if __name__ == '__main__':
absltest.main()
2 changes: 1 addition & 1 deletion cli/appcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _ask_permission(self):
fg='yellow')
msg += click.style(pkg_name, fg='red', bold=True)
msg += click.style(
' better! \nMay we anonymously report usage statistics to improve the'
' better! \nMay we anonymously report usage statistics to improve the '
'tool over time? \nMore info: https://github.com/google/crmint & '
'https://google.github.io/crmint',
fg='yellow')
Expand Down
4 changes: 4 additions & 0 deletions terraform/services.tf
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ resource "google_cloud_run_service" "jobs_run" {
name = "PUBSUB_VERIFICATION_TOKEN"
value = random_id.pubsub_verification_token.b64_url
}
env {
name = "REPORT_USAGE_ID"
value = var.report_usage_id
}
}

timeout_seconds = 900 # 15min
Expand Down

0 comments on commit 2696581

Please sign in to comment.