From 08b4e06f100f76d83f0b792281cf098452ebb118 Mon Sep 17 00:00:00 2001 From: Thor77 <thor77@thor77.org> Date: Sun, 23 Jul 2017 17:31:45 +0200 Subject: [PATCH] Refactor and simplify log-parsing * _parse_line parses one line at a time for simplified testing and return a list of event.Event's instead of applying changes directly to a Clients-obj * parse_log just bundles the logs (using _bundle_logs), opens them, parses them (using _parse_line) and applies returned events to a Clients-obj in the correct order With these changes some sort of caching is possible because events are not bound to a specific client-object and are easily sortable due to their attached timestamp. --- tsstats/log.py | 177 ++++++++++++++++++------------------------------- 1 file changed, 63 insertions(+), 114 deletions(-) diff --git a/tsstats/log.py b/tsstats/log.py index ca28156..5e605ad 100644 --- a/tsstats/log.py +++ b/tsstats/log.py @@ -1,16 +1,17 @@ # -*- coding: utf-8 -*- - +# TODO: Implemented online_dc again +import itertools import logging import re from codecs import open from collections import namedtuple from glob import glob from os.path import basename -from time import time import pendulum -from tsstats.client import Client, Clients +from tsstats import events +from tsstats.client import Clients re_log_filename = re.compile(r'ts3server_(?P<date>\d{4}-\d\d-\d\d)' '__(?P<time>\d\d_\d\d_\d\d.\d+)_(?P<sid>\d).log') @@ -26,41 +27,9 @@ re_disconnect_invoker = re.compile( TimedLog = namedtuple('TimedLog', ['path', 'timestamp']) Server = namedtuple('Server', ['sid', 'clients']) - logger = logging.getLogger('tsstats') -def parse_logs(log_glob, ident_map=None, online_dc=True, *args, **kwargs): - ''' - parse logs from `log_glob` - - :param log_glob: path to server-logs (supports globbing) - :param ident_map: identmap used for Client-initializations - - :type log_glob: str - :type ident_map: dict - - :return: clients bundled by virtual-server - :rtype: tsstats.log.Server - ''' - server = [] - for virtualserver_id, logs in\ - _bundle_logs(glob(log_glob)).items(): - clients = Clients(ident_map) - # keep last log out of the iteration for now - for log in logs[:-1]: - # don't reconnect connected clients for all logs except last one - # because that would lead to insane onlinetimes - _parse_details(log.path, clients=clients, online_dc=False, - *args, **kwargs) - # now parse details of last log with correct online_dc set - _parse_details(logs[-1].path, clients=clients, online_dc=online_dc, - *args, **kwargs) - if len(clients) >= 1: - server.append(Server(virtualserver_id, clients)) - return server - - def _bundle_logs(logs): ''' bundle `logs` by virtualserver-id @@ -103,84 +72,64 @@ def _bundle_logs(logs): return vserver_logfiles -def _parse_details(log_path, ident_map=None, clients=None, online_dc=True): - ''' - extract details from log-files - - detailed parsing is done here: onlinetime, kicks, pkicks, bans, pbans - - :param log_path: path to log-file - :param ident_map: :doc:`identmap` - :param clients: clients-object to add parsing-results to - :param online_cd: disconnect online clients after parsing - - :type log_path: str - :type ident_map: dict - :type clients: tsstats.client.Clients - :type online_cd: bool - - :return: parsed clients - :rtype: tsstats.client.Clients - ''' - start_time = time() - if clients is None: - clients = Clients(ident_map) - log_file = open(log_path, encoding='utf-8') - # process lines - logger.debug('Started parsing of %s', log_file.name) - for line in log_file: - match = re_log_entry.match(line) +def _parse_line(line): + parsed_events = [] + match = re_log_entry.match(line) + if not match: + logger.debug('No match: "%s"', line) + return [] + match = match.groupdict() + logdatetime = pendulum.parse(match['timestamp']) + message = match['message'] + if message.startswith('client'): + match = re_dis_connect.match(message) if not match: - logger.debug('No match: "%s"', line) - continue - match = match.groupdict() - logdatetime = pendulum.parse(match['timestamp']) - message = match['message'] - if message.startswith('client'): - match = re_dis_connect.match(message) - if not match: - logger.debug('Not supported client action: "%s"', message) - continue - nick, clid = match.group('nick'), match.group('clid') - client = clients.setdefault( - clid, Client(clients.ident_map.get(clid, clid), nick) - ) - # set current nick - client.nick = nick - # add nick to history - client.nick_history.add(nick) + logger.debug('Unsupported client action: "%s"', message) + return [] + nick, clid = match.group('nick'), match.group('clid') - action = match.group('action') - if action == 'connected': - client.connect(logdatetime) - elif action == 'disconnected': - client.disconnect(logdatetime) - if 'invokeruid' in message: - re_disconnect_data = re_disconnect_invoker.findall( - message) - invokernick, invokeruid = re_disconnect_data[0] - invoker = clients.setdefault(invokeruid, - Client(invokeruid)) - invoker.nick = invokernick - if 'bantime' in message: - invoker.ban(client) - else: - invoker.kick(client) - elif message == 'stopped': - # make sure all clients are disconnected at server stop - [ - client.disconnect(logdatetime) - for client in clients - if client.connected - ] - if online_dc: - def _reconnect(client): - client.disconnect(pendulum.now()) - client.connected += 1 - [_reconnect(client) for client in clients if client.connected] - logger.debug( - 'Finished parsing of %s in %s seconds', - log_file.name, time() - start_time - ) - log_file.close() - return clients + parsed_events.append(events.nick(logdatetime, clid, nick)) + + action = match.group('action') + if action == 'connected': + parsed_events.append(events.connect(logdatetime, clid)) + elif action == 'disconnected': + parsed_events.append(events.disconnect(logdatetime, clid)) + if 'invokeruid' in message: + re_disconnect_data = re_disconnect_invoker.findall( + message) + invokernick, invokeruid = re_disconnect_data[0] + parsed_events.append( + events.nick(logdatetime, invokeruid, invokernick) + ) + if 'bantime' in message: + parsed_events.append( + events.ban(logdatetime, invokeruid, clid) + ) + else: + parsed_events.append( + events.kick(logdatetime, invokeruid, clid) + ) + return parsed_events + + +def parse_logs(log_glob, ident_map=None, online_dc=True, *args, **kwargs): + ''' + parse logs from `log_glob` + + :param log_glob: path to server-logs (supports globbing) + :param ident_map: identmap used for Client-initializations + + :type log_glob: str + :type ident_map: dict + ''' + for virtualserver_id, logs in _bundle_logs(glob(log_glob)).items(): + clients = Clients(ident_map) + for log in logs: + with open(log.path, encoding='utf-8') as f: + # parse logfile line by line and filter lines without events + events = filter(None, map(_parse_line, f)) + # chain apply events to Client-obj + clients.apply_events(itertools.chain.from_iterable(events)) + # assemble Server-obj and yield + yield Server(virtualserver_id, clients)