Skip to content

Commit

Permalink
Merge pull request #367 from simon04/caddy_json
Browse files Browse the repository at this point in the history
Add Caddy json log support
  • Loading branch information
michalkleiner authored Jan 31, 2024
2 parents 5624527 + e6e4d00 commit d1cd2fe
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 0 deletions.
49 changes: 49 additions & 0 deletions import_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,54 @@ def remove_ignored_groups(self, groups):
for group in groups:
del self.json[group]

class CaddyJsonFormat(BaseFormat):
def __init__(self, name):
super(CaddyJsonFormat, self).__init__(name)
self.json = None
self.date_format = '%Y-%m-%dT%H:%M:%S.%f'

def check_format_line(self, line):
try:
self.json = json.loads(line)
return "request" in self.json and "user_id" in self.json and "resp_headers" in self.json
except:
return False

def match(self, line):
try:
self.json = json.loads(line)
return self
except:
self.json = None
return None

def get(self, key):
try:
return self.get_all().get(key)
except KeyError:
raise BaseFormatException()

def get_all(self,):
tz = datetime.timezone.utc
date = datetime.datetime.fromtimestamp(self.json['ts'], tz=tz)
self.json['date'] = date.strftime(self.date_format)
self.json['timezone'] = date.strftime('%z')
self.json['length'] = str(self.json['size'])
self.json['status'] = str(self.json['status'])
self.json['generation_time_milli'] = str(self.json['duration'] * 1000.)
self.json['userid'] = self.json['user_id']
self.json['ip'] = self.json['request']['client_ip']
self.json['host'] = self.json['request']['host']
self.json['method'] = self.json['request']['method']
self.json['path'] = self.json['request']['uri']
self.json['referrer'] = next(iter(self.json['request']['headers'].get('Referer', [])), None)
self.json['user_agent'] = next(iter(self.json['request']['headers'].get('User-Agent', [])), None)
return self.json

def remove_ignored_groups(self, groups):
for group in groups:
del self.json[group]

class RegexFormat(BaseFormat):

def __init__(self, name, regex, date_format=None):
Expand Down Expand Up @@ -590,6 +638,7 @@ def get(self, key):
'elb': RegexFormat('elb', _ELB_LOG_FORMAT, '%Y-%m-%dT%H:%M:%S'),
'traefik_json': TraefikJsonFormat('traefik_json'),
'nginx_json': NginxJsonFormat('nginx_json'),
'caddy_json': CaddyJsonFormat('caddy_json'),
'ovh': RegexFormat('ovh', _OVH_FORMAT),
'haproxy': RegexFormat('haproxy', _HAPROXY_FORMAT, '%d/%b/%Y:%H:%M:%S.%f'),
'gandi': RegexFormat('gandi', _GANDI_SIMPLE_HOSTING_FORMAT, '%d/%b/%Y:%H:%M:%S')
Expand Down
3 changes: 3 additions & 0 deletions tests/logs/caddy_json.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"level":"info","ts":1703373474.8155608,"logger":"http.log.access","msg":"handled request","request":{"remote_ip":"1.2.3.4","remote_port":"64985","client_ip":"1.2.3.4","proto":"HTTP/2.0","method":"GET","host":"example.com","uri":"/beta/","headers":{"Te":["trailers"],"User-Agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0"],"Accept":["text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/jxl,image/webp,*/*;q=0.8"],"Upgrade-Insecure-Requests":["1"],"Sec-Fetch-Mode":["navigate"],"Sec-Fetch-Dest":["document"],"Sec-Fetch-Site":["none"],"Sec-Fetch-User":["?1"],"Accept-Language":["en-IE"],"Accept-Encoding":["gzip, deflate, br"],"Dnt":["1"],"Sec-Gpc":["1"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h2","server_name":"example.com"}},"bytes_read":0,"user_id":"","duration":0.001335486,"size":3609,"status":200,"resp_headers":{"Content-Encoding":["gzip"],"Vary":["Accept-Encoding"],"Server":["Caddy","nginx/1.20.1"],"Alt-Svc":["h3=\":443\"; ma=2592000"],"Date":["Sat, 23 Dec 2023 23:17:54 GMT"],"Content-Type":["text/html"],"Access-Control-Allow-Methods":["GET"],"Access-Control-Allow-Origin":["*"]}}
{"level":"info","ts":1703373474.9011197,"logger":"http.log.access","msg":"handled request","request":{"remote_ip":"1.2.3.4","remote_port":"64985","client_ip":"1.2.3.4","proto":"HTTP/2.0","method":"GET","host":"example.com","uri":"/beta/assets/index-mMaLXldj.css","headers":{"Accept-Encoding":["gzip, deflate, br"],"Dnt":["1"],"Sec-Fetch-Dest":["style"],"User-Agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0"],"Accept":["text/css,*/*;q=0.1"],"Referer":["https://example.com/beta/"],"Sec-Fetch-Mode":["cors"],"Sec-Fetch-Site":["same-origin"],"Te":["trailers"],"Accept-Language":["en-IE"],"Sec-Gpc":["1"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h2","server_name":"example.com"}},"bytes_read":0,"user_id":"","duration":0.00143684,"size":34534,"status":200,"resp_headers":{"Content-Length":["34534"],"Last-Modified":["Sat, 23 Dec 2023 12:27:10 GMT"],"Etag":["\"6586d21e-86e6\""],"Vary":["Accept-Encoding"],"Content-Encoding":["br"],"Accept-Ranges":["bytes"],"Date":["Sat, 23 Dec 2023 23:17:54 GMT"],"Content-Type":["text/css"],"Server":["Caddy","nginx/1.20.1"],"Alt-Svc":["h3=\":443\"; ma=2592000"]}}
{"level":"info","ts":1703373475.141868,"logger":"http.log.access","msg":"handled request","request":{"remote_ip":"1.2.3.4","remote_port":"64985","client_ip":"1.2.3.4","proto":"HTTP/2.0","method":"GET","host":"example.com","uri":"/beta/assets/index-HIcRLzdf.js","headers":{"Accept":["*/*"],"Accept-Language":["en-IE"],"Accept-Encoding":["gzip, deflate, br"],"Sec-Fetch-Dest":["script"],"Sec-Fetch-Site":["same-origin"],"Te":["trailers"],"User-Agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0"],"Sec-Gpc":["1"],"Referer":["https://example.com/beta/"],"Sec-Fetch-Mode":["cors"],"Dnt":["1"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h2","server_name":"example.com"}},"bytes_read":0,"user_id":"","duration":0.242570094,"size":217912,"status":200,"resp_headers":{"Server":["Caddy","nginx/1.20.1"],"Alt-Svc":["h3=\":443\"; ma=2592000"],"Content-Type":["application/javascript"],"Accept-Ranges":["bytes"],"Content-Length":["217912"],"Etag":["\"6586d21e-35338\""],"Content-Encoding":["br"],"Vary":["Accept-Encoding"],"Date":["Sat, 23 Dec 2023 23:17:54 GMT"],"Last-Modified":["Sat, 23 Dec 2023 12:27:10 GMT"]}}
53 changes: 53 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ def _test_junk(format_name, log_file = None):
assert(format.name == format_name)

def _test_multiple_spaces(format_name, log_file = None):
if format_name == 'caddy_json':
return
if log_file is None:
log_file = 'logs/%s.log' % format_name

Expand Down Expand Up @@ -416,6 +418,22 @@ def check_traefik_json_groups(groups):
assert groups['userid'] == '-'
assert groups['user_agent'] == 'Prometheus/2.40.5'

def check_caddy_json_groups(groups):
assert groups['ts'] == 1703373474.8155608
assert groups['duration'] == 0.001335486
assert groups['date'] == '2023-12-23T23:17:54.815561'
assert groups['timezone'] == '+0000'
assert groups['generation_time_milli'] == '1.3354860000000002'
assert groups['host'] == 'example.com'
assert groups['ip'] == '1.2.3.4'
assert groups['length'] == '3609'
assert groups['method'] == 'GET'
assert groups['path'] == '/beta/'
assert groups['referrer'] == None
assert groups['status'] == '200'
assert groups['userid'] == ''
assert groups['user_agent'] == 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0'

def check_icecast2_groups(groups):
check_ncsa_extended_groups(groups)

Expand Down Expand Up @@ -466,6 +484,41 @@ def _test_with_junk(format_name, path):
# 'Testing parsing of format "common" with ncsa_extended log'
_test( 'common', 'logs/ncsa_extended.log')

def test_caddy_json_parsing():
"""test parsing of caddy_json.log file"""

file_ = 'logs/caddy_json.log'

import_logs.stats = import_logs.Statistics()
import_logs.config = Config()
import_logs.config.options.enable_static = False
import_logs.config.options.replay_tracking = False
import_logs.config.format = None
import_logs.resolver = Resolver()
import_logs.parser = import_logs.Parser()
import_logs.Recorder = Recorder()
Recorder.recorders = []
import_logs.parser.parse(file_)

hits = [hit.__dict__ for hit in Recorder.recorders]

assert hits[0]['status'] == '200'
assert hits[0]['is_error'] == False
assert hits[0]['extension'] == '/beta/'
assert hits[0]['is_download'] == False
assert hits[0]['referrer'] == ''
assert hits[0]['generation_time_milli'] == 1.3354860000000002
assert hits[0]['host'] == 'foo'
assert hits[0]['filename'] == 'logs/caddy_json.log'
assert hits[0]['is_redirect'] == False
assert hits[0]['date'] == datetime.datetime(2023, 12, 23, 23, 17, 54, 815561)
assert hits[0]['lineno'] == 0
assert hits[0]['ip'] == '1.2.3.4'
assert hits[0]['path'] == '/beta/'
assert hits[0]['is_robot'] == False
assert hits[0]['full_path'] == '/beta/'
assert hits[0]['user_agent'] == 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0'

def test_iis_custom_format():
"""test IIS custom format name parsing."""

Expand Down

0 comments on commit d1cd2fe

Please sign in to comment.