From 08b4e06f100f76d83f0b792281cf098452ebb118 Mon Sep 17 00:00:00 2001
From: Thor77 <thor77@thor77.org>
Date: Sun, 23 Jul 2017 17:31:45 +0200
Subject: [PATCH] Refactor and simplify log-parsing

* _parse_line parses one line at a time for simplified testing
and return a list of event.Event's instead of applying changes directly
to a Clients-obj
* parse_log just bundles the logs (using _bundle_logs), opens them,
parses them (using _parse_line) and applies returned events to a
Clients-obj in the correct order

With these changes some sort of caching is possible because events are
not bound to a specific client-object and are easily sortable due to
their attached timestamp.
---
 tsstats/log.py | 177 ++++++++++++++++++-------------------------------
 1 file changed, 63 insertions(+), 114 deletions(-)

diff --git a/tsstats/log.py b/tsstats/log.py
index ca28156..5e605ad 100644
--- a/tsstats/log.py
+++ b/tsstats/log.py
@@ -1,16 +1,17 @@
 # -*- coding: utf-8 -*-
-
+# TODO: Implemented online_dc again
+import itertools
 import logging
 import re
 from codecs import open
 from collections import namedtuple
 from glob import glob
 from os.path import basename
-from time import time
 
 import pendulum
 
-from tsstats.client import Client, Clients
+from tsstats import events
+from tsstats.client import Clients
 
 re_log_filename = re.compile(r'ts3server_(?P<date>\d{4}-\d\d-\d\d)'
                              '__(?P<time>\d\d_\d\d_\d\d.\d+)_(?P<sid>\d).log')
@@ -26,41 +27,9 @@ re_disconnect_invoker = re.compile(
 TimedLog = namedtuple('TimedLog', ['path', 'timestamp'])
 Server = namedtuple('Server', ['sid', 'clients'])
 
-
 logger = logging.getLogger('tsstats')
 
 
-def parse_logs(log_glob, ident_map=None, online_dc=True, *args, **kwargs):
-    '''
-    parse logs from `log_glob`
-
-    :param log_glob: path to server-logs (supports globbing)
-    :param ident_map: identmap used for Client-initializations
-
-    :type log_glob: str
-    :type ident_map: dict
-
-    :return: clients bundled by virtual-server
-    :rtype: tsstats.log.Server
-    '''
-    server = []
-    for virtualserver_id, logs in\
-            _bundle_logs(glob(log_glob)).items():
-        clients = Clients(ident_map)
-        # keep last log out of the iteration for now
-        for log in logs[:-1]:
-            # don't reconnect connected clients for all logs except last one
-            # because that would lead to insane onlinetimes
-            _parse_details(log.path, clients=clients, online_dc=False,
-                           *args, **kwargs)
-        # now parse details of last log with correct online_dc set
-        _parse_details(logs[-1].path, clients=clients, online_dc=online_dc,
-                       *args, **kwargs)
-        if len(clients) >= 1:
-            server.append(Server(virtualserver_id, clients))
-    return server
-
-
 def _bundle_logs(logs):
     '''
     bundle `logs` by virtualserver-id
@@ -103,84 +72,64 @@ def _bundle_logs(logs):
     return vserver_logfiles
 
 
-def _parse_details(log_path, ident_map=None, clients=None, online_dc=True):
-    '''
-    extract details from log-files
-
-    detailed parsing is done here: onlinetime, kicks, pkicks, bans, pbans
-
-    :param log_path: path to log-file
-    :param ident_map: :doc:`identmap`
-    :param clients: clients-object to add parsing-results to
-    :param online_cd: disconnect online clients after parsing
-
-    :type log_path: str
-    :type ident_map: dict
-    :type clients: tsstats.client.Clients
-    :type online_cd: bool
-
-    :return: parsed clients
-    :rtype: tsstats.client.Clients
-    '''
-    start_time = time()
-    if clients is None:
-        clients = Clients(ident_map)
-    log_file = open(log_path, encoding='utf-8')
-    # process lines
-    logger.debug('Started parsing of %s', log_file.name)
-    for line in log_file:
-        match = re_log_entry.match(line)
+def _parse_line(line):
+    parsed_events = []
+    match = re_log_entry.match(line)
+    if not match:
+        logger.debug('No match: "%s"', line)
+        return []
+    match = match.groupdict()
+    logdatetime = pendulum.parse(match['timestamp'])
+    message = match['message']
+    if message.startswith('client'):
+        match = re_dis_connect.match(message)
         if not match:
-            logger.debug('No match: "%s"', line)
-            continue
-        match = match.groupdict()
-        logdatetime = pendulum.parse(match['timestamp'])
-        message = match['message']
-        if message.startswith('client'):
-            match = re_dis_connect.match(message)
-            if not match:
-                logger.debug('Not supported client action: "%s"', message)
-                continue
-            nick, clid = match.group('nick'), match.group('clid')
-            client = clients.setdefault(
-                clid, Client(clients.ident_map.get(clid, clid), nick)
-            )
-            # set current nick
-            client.nick = nick
-            # add nick to history
-            client.nick_history.add(nick)
+            logger.debug('Unsupported client action: "%s"', message)
+            return []
+        nick, clid = match.group('nick'), match.group('clid')
 
-            action = match.group('action')
-            if action == 'connected':
-                client.connect(logdatetime)
-            elif action == 'disconnected':
-                client.disconnect(logdatetime)
-                if 'invokeruid' in message:
-                    re_disconnect_data = re_disconnect_invoker.findall(
-                        message)
-                    invokernick, invokeruid = re_disconnect_data[0]
-                    invoker = clients.setdefault(invokeruid,
-                                                 Client(invokeruid))
-                    invoker.nick = invokernick
-                    if 'bantime' in message:
-                        invoker.ban(client)
-                    else:
-                        invoker.kick(client)
-        elif message == 'stopped':
-            # make sure all clients are disconnected at server stop
-            [
-                client.disconnect(logdatetime)
-                for client in clients
-                if client.connected
-            ]
-    if online_dc:
-        def _reconnect(client):
-            client.disconnect(pendulum.now())
-            client.connected += 1
-        [_reconnect(client) for client in clients if client.connected]
-    logger.debug(
-        'Finished parsing of %s in %s seconds',
-        log_file.name, time() - start_time
-    )
-    log_file.close()
-    return clients
+        parsed_events.append(events.nick(logdatetime, clid, nick))
+
+        action = match.group('action')
+        if action == 'connected':
+            parsed_events.append(events.connect(logdatetime, clid))
+        elif action == 'disconnected':
+            parsed_events.append(events.disconnect(logdatetime, clid))
+            if 'invokeruid' in message:
+                re_disconnect_data = re_disconnect_invoker.findall(
+                    message)
+                invokernick, invokeruid = re_disconnect_data[0]
+                parsed_events.append(
+                    events.nick(logdatetime, invokeruid, invokernick)
+                )
+                if 'bantime' in message:
+                    parsed_events.append(
+                        events.ban(logdatetime, invokeruid, clid)
+                    )
+                else:
+                    parsed_events.append(
+                        events.kick(logdatetime, invokeruid, clid)
+                    )
+        return parsed_events
+
+
+def parse_logs(log_glob, ident_map=None, online_dc=True, *args, **kwargs):
+    '''
+    parse logs from `log_glob`
+
+    :param log_glob: path to server-logs (supports globbing)
+    :param ident_map: identmap used for Client-initializations
+
+    :type log_glob: str
+    :type ident_map: dict
+    '''
+    for virtualserver_id, logs in _bundle_logs(glob(log_glob)).items():
+        clients = Clients(ident_map)
+        for log in logs:
+            with open(log.path, encoding='utf-8') as f:
+                # parse logfile line by line and filter lines without events
+                events = filter(None, map(_parse_line, f))
+                # chain apply events to Client-obj
+                clients.apply_events(itertools.chain.from_iterable(events))
+        # assemble Server-obj and yield
+        yield Server(virtualserver_id, clients)