Refactor and simplify log-parsing

* _parse_line parses one line at a time for simplified testing
and return a list of event.Event's instead of applying changes directly
to a Clients-obj
* parse_log just bundles the logs (using _bundle_logs), opens them,
parses them (using _parse_line) and applies returned events to a
Clients-obj in the correct order

With these changes some sort of caching is possible because events are
not bound to a specific client-object and are easily sortable due to
their attached timestamp.
This commit is contained in:
Thor77 2017-07-23 17:31:45 +02:00
parent 20d40c8890
commit 08b4e06f10
1 changed files with 63 additions and 114 deletions

View File

@ -1,16 +1,17 @@
# -*- coding: utf-8 -*-
# TODO: Implemented online_dc again
import itertools
import logging
import re
from codecs import open
from collections import namedtuple
from glob import glob
from os.path import basename
from time import time
import pendulum
from tsstats.client import Client, Clients
from tsstats import events
from tsstats.client import Clients
re_log_filename = re.compile(r'ts3server_(?P<date>\d{4}-\d\d-\d\d)'
'__(?P<time>\d\d_\d\d_\d\d.\d+)_(?P<sid>\d).log')
@ -26,41 +27,9 @@ re_disconnect_invoker = re.compile(
TimedLog = namedtuple('TimedLog', ['path', 'timestamp'])
Server = namedtuple('Server', ['sid', 'clients'])
logger = logging.getLogger('tsstats')
def parse_logs(log_glob, ident_map=None, online_dc=True, *args, **kwargs):
'''
parse logs from `log_glob`
:param log_glob: path to server-logs (supports globbing)
:param ident_map: identmap used for Client-initializations
:type log_glob: str
:type ident_map: dict
:return: clients bundled by virtual-server
:rtype: tsstats.log.Server
'''
server = []
for virtualserver_id, logs in\
_bundle_logs(glob(log_glob)).items():
clients = Clients(ident_map)
# keep last log out of the iteration for now
for log in logs[:-1]:
# don't reconnect connected clients for all logs except last one
# because that would lead to insane onlinetimes
_parse_details(log.path, clients=clients, online_dc=False,
*args, **kwargs)
# now parse details of last log with correct online_dc set
_parse_details(logs[-1].path, clients=clients, online_dc=online_dc,
*args, **kwargs)
if len(clients) >= 1:
server.append(Server(virtualserver_id, clients))
return server
def _bundle_logs(logs):
'''
bundle `logs` by virtualserver-id
@ -103,84 +72,64 @@ def _bundle_logs(logs):
return vserver_logfiles
def _parse_details(log_path, ident_map=None, clients=None, online_dc=True):
'''
extract details from log-files
detailed parsing is done here: onlinetime, kicks, pkicks, bans, pbans
:param log_path: path to log-file
:param ident_map: :doc:`identmap`
:param clients: clients-object to add parsing-results to
:param online_cd: disconnect online clients after parsing
:type log_path: str
:type ident_map: dict
:type clients: tsstats.client.Clients
:type online_cd: bool
:return: parsed clients
:rtype: tsstats.client.Clients
'''
start_time = time()
if clients is None:
clients = Clients(ident_map)
log_file = open(log_path, encoding='utf-8')
# process lines
logger.debug('Started parsing of %s', log_file.name)
for line in log_file:
def _parse_line(line):
parsed_events = []
match = re_log_entry.match(line)
if not match:
logger.debug('No match: "%s"', line)
continue
return []
match = match.groupdict()
logdatetime = pendulum.parse(match['timestamp'])
message = match['message']
if message.startswith('client'):
match = re_dis_connect.match(message)
if not match:
logger.debug('Not supported client action: "%s"', message)
continue
logger.debug('Unsupported client action: "%s"', message)
return []
nick, clid = match.group('nick'), match.group('clid')
client = clients.setdefault(
clid, Client(clients.ident_map.get(clid, clid), nick)
)
# set current nick
client.nick = nick
# add nick to history
client.nick_history.add(nick)
parsed_events.append(events.nick(logdatetime, clid, nick))
action = match.group('action')
if action == 'connected':
client.connect(logdatetime)
parsed_events.append(events.connect(logdatetime, clid))
elif action == 'disconnected':
client.disconnect(logdatetime)
parsed_events.append(events.disconnect(logdatetime, clid))
if 'invokeruid' in message:
re_disconnect_data = re_disconnect_invoker.findall(
message)
invokernick, invokeruid = re_disconnect_data[0]
invoker = clients.setdefault(invokeruid,
Client(invokeruid))
invoker.nick = invokernick
if 'bantime' in message:
invoker.ban(client)
else:
invoker.kick(client)
elif message == 'stopped':
# make sure all clients are disconnected at server stop
[
client.disconnect(logdatetime)
for client in clients
if client.connected
]
if online_dc:
def _reconnect(client):
client.disconnect(pendulum.now())
client.connected += 1
[_reconnect(client) for client in clients if client.connected]
logger.debug(
'Finished parsing of %s in %s seconds',
log_file.name, time() - start_time
parsed_events.append(
events.nick(logdatetime, invokeruid, invokernick)
)
log_file.close()
return clients
if 'bantime' in message:
parsed_events.append(
events.ban(logdatetime, invokeruid, clid)
)
else:
parsed_events.append(
events.kick(logdatetime, invokeruid, clid)
)
return parsed_events
def parse_logs(log_glob, ident_map=None, online_dc=True, *args, **kwargs):
'''
parse logs from `log_glob`
:param log_glob: path to server-logs (supports globbing)
:param ident_map: identmap used for Client-initializations
:type log_glob: str
:type ident_map: dict
'''
for virtualserver_id, logs in _bundle_logs(glob(log_glob)).items():
clients = Clients(ident_map)
for log in logs:
with open(log.path, encoding='utf-8') as f:
# parse logfile line by line and filter lines without events
events = filter(None, map(_parse_line, f))
# chain apply events to Client-obj
clients.apply_events(itertools.chain.from_iterable(events))
# assemble Server-obj and yield
yield Server(virtualserver_id, clients)