Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: partition Freshdesk ticket by month #98

Merged
merged 5 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docker-apply-production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,6 @@ jobs:
aws lambda update-function-code \
--function-name ${{matrix.lambda}} \
--image-uri $REGISTRY/${{matrix.lambda}}:${{ env.TAG_VERSION }} > /dev/null 2>&1
aws lambda wait function-active \
aws lambda wait function-updated \
--function-name ${{matrix.lambda}}

121 changes: 70 additions & 51 deletions export/platform/support/freshdesk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,51 +141,66 @@ def get_tickets(self):
"""Retrieve all tickets with product names and requester email types"""
all_tickets = []
yesterday = datetime.now() - timedelta(days=1)
url = f"{self.base_url}/api/v2/search/tickets?query=\"updated_at:'{yesterday.strftime('%Y-%m-%d')}'\""
response = requests.get(url, headers=self.headers)

if response.status_code == 200:
tickets = response.json()
for ticket in tickets.get("results", []):
status_num = ticket.get("status")
priority_num = ticket.get("priority")
source_num = ticket.get("source")
product_id = ticket.get("product_id")
requester_id = ticket.get("requester_id")
custom_fields = ticket.get("custom_fields", {})
conversations = self.get_ticket_conversations(ticket.get("id"))

filtered_ticket = {
"id": ticket.get("id"),
"status": status_num,
"status_label": STATUS_LOOKUP.get(status_num, "Unknown"),
"priority": priority_num,
"priority_label": PRIORITY_LOOKUP.get(priority_num, "Unknown"),
"source": source_num,
"source_label": SOURCE_LOOKUP.get(source_num, "Unknown"),
"created_at": ticket.get("created_at"),
"updated_at": ticket.get("updated_at"),
"due_by": ticket.get("due_by"),
"fr_due_by": ticket.get("fr_due_by"),
"is_escalated": ticket.get("is_escalated"),
"tags": ticket.get("tags", []),
"spam": ticket.get("spam", False),
"requester_email_suffix": self.get_requester_email_suffix(
requester_id
),
"type": ticket.get("type"),
"product_id": product_id,
"product_name": self.products_cache.get(str(product_id), "Unknown"),
"conversations_total_count": conversations.get("total_count"),
"conversations_reply_count": conversations.get("reply_count"),
"conversations_note_count": conversations.get("note_count"),
"language": custom_fields.get("cf_language"),
"province_or_territory": custom_fields.get("cf_provinceterritory"),
"organization": custom_fields.get("cf_organization"),
}
all_tickets.append(filtered_ticket)
else:
logger.error(f"Error fetching tickets: {response.status_code}")
page = 1
per_page = 30 # set by the API
while page <= 10: # API limit of 10 pages
url = f"{self.base_url}/api/v2/search/tickets?query=\"updated_at:'{yesterday.strftime('%Y-%m-%d')}'\"&page={page}"
response = requests.get(url, headers=self.headers)
if response.status_code == 200:
tickets = response.json()
for ticket in tickets.get("results", []):
status_num = ticket.get("status")
priority_num = ticket.get("priority")
source_num = ticket.get("source")
product_id = ticket.get("product_id")
requester_id = ticket.get("requester_id")
custom_fields = ticket.get("custom_fields", {})
conversations = self.get_ticket_conversations(ticket.get("id"))

filtered_ticket = {
"id": ticket.get("id"),
"status": status_num,
"status_label": STATUS_LOOKUP.get(status_num, "Unknown"),
"priority": priority_num,
"priority_label": PRIORITY_LOOKUP.get(priority_num, "Unknown"),
"source": source_num,
"source_label": SOURCE_LOOKUP.get(source_num, "Unknown"),
"created_at": ticket.get("created_at"),
"updated_at": ticket.get("updated_at"),
"due_by": ticket.get("due_by"),
"fr_due_by": ticket.get("fr_due_by"),
"is_escalated": ticket.get("is_escalated"),
"tags": ticket.get("tags", []),
"spam": ticket.get("spam", False),
"requester_email_suffix": self.get_requester_email_suffix(
requester_id
),
"type": ticket.get("type"),
"product_id": product_id,
"product_name": self.products_cache.get(
str(product_id), "Unknown"
),
"conversations_total_count": conversations.get("total_count"),
"conversations_reply_count": conversations.get("reply_count"),
"conversations_note_count": conversations.get("note_count"),
"language": custom_fields.get("cf_language"),
"province_or_territory": custom_fields.get(
"cf_provinceterritory"
),
"organization": custom_fields.get("cf_organization"),
}
all_tickets.append(filtered_ticket)

# Check if there are more tickets to fetch
if len(tickets.get("results", [])) == per_page:
logger.info(f"Fetching page {page + 1}...")
page += 1
else:
logger.info("All tickets fetched")
break
else:
logger.error(f"Error fetching tickets: {response.status_code}")
break

return all_tickets

Expand All @@ -194,8 +209,10 @@ def upload_to_s3(bucket, prefix, data):
"""Upload data to S3 bucket"""
s3_client = boto3.client("s3")

today = datetime.now().strftime("%Y-%m-%d")
key = f"{prefix}/{today}.json"
yesterday = datetime.now() - timedelta(days=1)
day = yesterday.strftime("%Y-%m-%d")
month = yesterday.strftime("%Y-%m")
key = f"{prefix}/MONTH={month}/{day}.json"

s3_client.put_object(
Bucket=bucket, Key=key, Body=json.dumps(data, ensure_ascii=False)
Expand All @@ -222,7 +239,9 @@ def handler(_event, _context):
client = FreshdeskClient(FRESHDESK_DOMAIN, freshdesk_api_key)
tickets = client.get_tickets()

logger.info(f"Saving {len(tickets)} tickets")
s3_path = upload_to_s3(S3_BUCKET_NAME, S3_OBJECT_PREFIX, tickets)

logger.info(f"Tickets saved to {s3_path}")
if tickets:
logger.info(f"Saving {len(tickets)} tickets")
s3_path = upload_to_s3(S3_BUCKET_NAME, S3_OBJECT_PREFIX, tickets)
logger.info(f"Tickets saved to {s3_path}")
else:
logger.info("No tickets found")
39 changes: 35 additions & 4 deletions export/platform/support/freshdesk/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,28 @@ def test_get_tickets(self, mock_freshdesk_client):
assert ticket["product_name"] == "Product 1"
assert ticket["conversations_total_count"] == 3

def test_get_tickets_pagination(self, mock_freshdesk_client):
with patch("requests.get") as mock_get:
mock_get.return_value.status_code = 200
mock_get.return_value.json.side_effect = [
{"results": [MOCK_TICKET for i in range(1, 31)]},
{"results": [MOCK_TICKET for i in range(1, 31)]},
{"results": [MOCK_TICKET for i in range(1, 5)]},
]

# Mock the conversation call
mock_freshdesk_client.get_ticket_conversations = Mock(
return_value={"total_count": 3, "reply_count": 2, "note_count": 1}
)

# Mock the requester email suffix
mock_freshdesk_client.get_requester_email_suffix = Mock(
return_value="external"
)

tickets = mock_freshdesk_client.get_tickets()
assert len(tickets) == 64


def test_upload_to_s3(mock_s3_client):
test_data = {"test": "data"}
Expand All @@ -151,16 +173,25 @@ def test_get_ssm_parameter(mock_ssm_client):
)


def test_handler(mock_ssm_client):
def test_handler_tickets(mock_ssm_client):
with patch("main.FreshdeskClient") as MockClient:
mock_client = Mock()
mock_client.get_tickets.return_value = [MOCK_TICKET]
MockClient.return_value = mock_client

with patch("main.upload_to_s3") as mock_upload:
mock_upload.return_value = "s3://test-bucket/test.json"

handler({}, {})

mock_client.get_tickets.assert_called_once()
mock_upload.assert_called_once()


def test_handler_no_tickets(mock_ssm_client):
with patch("main.FreshdeskClient") as MockClient:
mock_client = Mock()
mock_client.get_tickets.return_value = []
MockClient.return_value = mock_client

with patch("main.upload_to_s3") as mock_upload:
handler({}, {})
mock_client.get_tickets.assert_called_once()
mock_upload.assert_not_called()
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module "platform_support_freshdesk_export" {

lambda_name = local.freshdesk_lambda_name
lambda_schedule_expression = "cron(0 5 * * ? *)" # 5am UTC every day
lambda_timeout = "300"
s3_arn_write_path = "${var.raw_bucket_arn}/${local.freshdesk_export_path}/*"

lambda_policies = [
Expand Down
5 changes: 4 additions & 1 deletion terragrunt/aws/glue/crawlers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ resource "aws_glue_crawler" "platform_support_freshdesk_production" {
name = "Platform / Support / Freshdesk"
description = "Classify the Platform Freshdesk support ticket data"
database_name = aws_glue_catalog_database.platform_support_production_raw.name
table_prefix = "freskdesk_"
table_prefix = "platform_support_"
classifiers = [aws_glue_classifier.json_object_array.name]

role = aws_iam_role.glue_crawler.arn
Expand All @@ -70,6 +70,9 @@ resource "aws_glue_crawler" "platform_support_freshdesk_production" {
TableThreshold = 1
}
}
Grouping = {
TableGroupingPolicy = "CombineCompatibleSchemas"
}
CreatePartitionIndex = true
Version = 1
})
Expand Down
Loading