Refactor and simplify log-parsing

* _parse_line parses one line at a time for simplified testing
and return a list of event.Event's instead of applying changes directly
to a Clients-obj
* parse_log just bundles the logs (using _bundle_logs), opens them,
parses them (using _parse_line) and applies returned events to a
Clients-obj in the correct order

With these changes some sort of caching is possible because events are
not bound to a specific client-object and are easily sortable due to
their attached timestamp.
This commit is contained in:
Thor77 2017-07-23 17:31:45 +02:00
parent 20d40c8890
commit 08b4e06f10
1 changed files with 63 additions and 114 deletions

View File

@ -1,16 +1,17 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# TODO: Implemented online_dc again
import itertools
import logging import logging
import re import re
from codecs import open from codecs import open
from collections import namedtuple from collections import namedtuple
from glob import glob from glob import glob
from os.path import basename from os.path import basename
from time import time
import pendulum import pendulum
from tsstats.client import Client, Clients from tsstats import events
from tsstats.client import Clients
re_log_filename = re.compile(r'ts3server_(?P<date>\d{4}-\d\d-\d\d)' re_log_filename = re.compile(r'ts3server_(?P<date>\d{4}-\d\d-\d\d)'
'__(?P<time>\d\d_\d\d_\d\d.\d+)_(?P<sid>\d).log') '__(?P<time>\d\d_\d\d_\d\d.\d+)_(?P<sid>\d).log')
@ -26,41 +27,9 @@ re_disconnect_invoker = re.compile(
TimedLog = namedtuple('TimedLog', ['path', 'timestamp']) TimedLog = namedtuple('TimedLog', ['path', 'timestamp'])
Server = namedtuple('Server', ['sid', 'clients']) Server = namedtuple('Server', ['sid', 'clients'])
logger = logging.getLogger('tsstats') logger = logging.getLogger('tsstats')
def parse_logs(log_glob, ident_map=None, online_dc=True, *args, **kwargs):
'''
parse logs from `log_glob`
:param log_glob: path to server-logs (supports globbing)
:param ident_map: identmap used for Client-initializations
:type log_glob: str
:type ident_map: dict
:return: clients bundled by virtual-server
:rtype: tsstats.log.Server
'''
server = []
for virtualserver_id, logs in\
_bundle_logs(glob(log_glob)).items():
clients = Clients(ident_map)
# keep last log out of the iteration for now
for log in logs[:-1]:
# don't reconnect connected clients for all logs except last one
# because that would lead to insane onlinetimes
_parse_details(log.path, clients=clients, online_dc=False,
*args, **kwargs)
# now parse details of last log with correct online_dc set
_parse_details(logs[-1].path, clients=clients, online_dc=online_dc,
*args, **kwargs)
if len(clients) >= 1:
server.append(Server(virtualserver_id, clients))
return server
def _bundle_logs(logs): def _bundle_logs(logs):
''' '''
bundle `logs` by virtualserver-id bundle `logs` by virtualserver-id
@ -103,84 +72,64 @@ def _bundle_logs(logs):
return vserver_logfiles return vserver_logfiles
def _parse_details(log_path, ident_map=None, clients=None, online_dc=True): def _parse_line(line):
''' parsed_events = []
extract details from log-files match = re_log_entry.match(line)
if not match:
detailed parsing is done here: onlinetime, kicks, pkicks, bans, pbans logger.debug('No match: "%s"', line)
return []
:param log_path: path to log-file match = match.groupdict()
:param ident_map: :doc:`identmap` logdatetime = pendulum.parse(match['timestamp'])
:param clients: clients-object to add parsing-results to message = match['message']
:param online_cd: disconnect online clients after parsing if message.startswith('client'):
match = re_dis_connect.match(message)
:type log_path: str
:type ident_map: dict
:type clients: tsstats.client.Clients
:type online_cd: bool
:return: parsed clients
:rtype: tsstats.client.Clients
'''
start_time = time()
if clients is None:
clients = Clients(ident_map)
log_file = open(log_path, encoding='utf-8')
# process lines
logger.debug('Started parsing of %s', log_file.name)
for line in log_file:
match = re_log_entry.match(line)
if not match: if not match:
logger.debug('No match: "%s"', line) logger.debug('Unsupported client action: "%s"', message)
continue return []
match = match.groupdict() nick, clid = match.group('nick'), match.group('clid')
logdatetime = pendulum.parse(match['timestamp'])
message = match['message']
if message.startswith('client'):
match = re_dis_connect.match(message)
if not match:
logger.debug('Not supported client action: "%s"', message)
continue
nick, clid = match.group('nick'), match.group('clid')
client = clients.setdefault(
clid, Client(clients.ident_map.get(clid, clid), nick)
)
# set current nick
client.nick = nick
# add nick to history
client.nick_history.add(nick)
action = match.group('action') parsed_events.append(events.nick(logdatetime, clid, nick))
if action == 'connected':
client.connect(logdatetime) action = match.group('action')
elif action == 'disconnected': if action == 'connected':
client.disconnect(logdatetime) parsed_events.append(events.connect(logdatetime, clid))
if 'invokeruid' in message: elif action == 'disconnected':
re_disconnect_data = re_disconnect_invoker.findall( parsed_events.append(events.disconnect(logdatetime, clid))
message) if 'invokeruid' in message:
invokernick, invokeruid = re_disconnect_data[0] re_disconnect_data = re_disconnect_invoker.findall(
invoker = clients.setdefault(invokeruid, message)
Client(invokeruid)) invokernick, invokeruid = re_disconnect_data[0]
invoker.nick = invokernick parsed_events.append(
if 'bantime' in message: events.nick(logdatetime, invokeruid, invokernick)
invoker.ban(client) )
else: if 'bantime' in message:
invoker.kick(client) parsed_events.append(
elif message == 'stopped': events.ban(logdatetime, invokeruid, clid)
# make sure all clients are disconnected at server stop )
[ else:
client.disconnect(logdatetime) parsed_events.append(
for client in clients events.kick(logdatetime, invokeruid, clid)
if client.connected )
] return parsed_events
if online_dc:
def _reconnect(client):
client.disconnect(pendulum.now()) def parse_logs(log_glob, ident_map=None, online_dc=True, *args, **kwargs):
client.connected += 1 '''
[_reconnect(client) for client in clients if client.connected] parse logs from `log_glob`
logger.debug(
'Finished parsing of %s in %s seconds', :param log_glob: path to server-logs (supports globbing)
log_file.name, time() - start_time :param ident_map: identmap used for Client-initializations
)
log_file.close() :type log_glob: str
return clients :type ident_map: dict
'''
for virtualserver_id, logs in _bundle_logs(glob(log_glob)).items():
clients = Clients(ident_map)
for log in logs:
with open(log.path, encoding='utf-8') as f:
# parse logfile line by line and filter lines without events
events = filter(None, map(_parse_line, f))
# chain apply events to Client-obj
clients.apply_events(itertools.chain.from_iterable(events))
# assemble Server-obj and yield
yield Server(virtualserver_id, clients)