260 lines
7.2 KiB
Python
Executable File
260 lines
7.2 KiB
Python
Executable File
#
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
import datetime
|
|
import json
|
|
import logging
|
|
import operator
|
|
import os
|
|
import time
|
|
|
|
# try importing redis redis
|
|
try:
|
|
import redis
|
|
except ImportError:
|
|
print("redis not available, fall back to volatile stats backend")
|
|
redis = None
|
|
|
|
# try importing pygal
|
|
try:
|
|
import pygal
|
|
except ImportError:
|
|
print("pygal not available, fall back to text based stats")
|
|
pygal = None
|
|
pygal =None
|
|
|
|
__doc__ = """
|
|
statistics backend optionally using redis
|
|
"""
|
|
|
|
|
|
class RedisDB:
|
|
"""
|
|
redis based backend for storing stats
|
|
"""
|
|
def __init__(self):
|
|
self._redis = redis.Redis()
|
|
self.exists = self._redis.exists
|
|
self.get = self._redis.get
|
|
self.set = self._redis.set
|
|
|
|
class DictDB:
|
|
"""
|
|
volatile dictionary based database backend for storing stats in memory
|
|
"""
|
|
def __init__(self):
|
|
self._d = dict()
|
|
|
|
def get(self, k):
|
|
if self.exists(k):
|
|
return self._d[k]
|
|
|
|
def set(self, k, v):
|
|
self._d[k] = v
|
|
|
|
def exists(self, k):
|
|
return k in self._d
|
|
|
|
|
|
class Grapher:
|
|
"""
|
|
generic grapher that does nothing
|
|
"""
|
|
|
|
def collect(self, data_sorted, multiplier, calc_netsize):
|
|
"""
|
|
do the magic calculations
|
|
yields (x, netsize_y, rph_y)
|
|
"""
|
|
total = 0
|
|
hours = 0
|
|
req_s = []
|
|
netsize_s = []
|
|
window = []
|
|
for hour, val in data_sorted:
|
|
years = hour / ( 365 * 24 )
|
|
days = ( hour - years * 365 * 24 ) / 24
|
|
hours = hour - ( ( years * 365 * 24 ) + ( days * 24 ) )
|
|
hour = datetime.datetime.strptime('%0.4d_%0.3d_%0.2d' % (years, days, hours), '%Y_%j_%H')
|
|
if val > 0:
|
|
total += val
|
|
hours += 1
|
|
per_hour = float(total) / hours
|
|
window.append(val)
|
|
while len(window) > window_len:
|
|
window.pop(0)
|
|
mean = sum(window) / len(window)
|
|
netsize = int(calc_netsize(mean, multiplier))
|
|
yield (hour, netsize, val)
|
|
|
|
def generate(self, data_sorted, multiplier, calc_netsize):
|
|
"""
|
|
:param data_sorted: sorted list of (hour, hitcount) tuple
|
|
:param multiplier: multiplier to use on graph Y axis
|
|
:param calc_netsize: function that calculates the network size given a mean value and multiplier
|
|
:return (netsize, requests) graph tuple:
|
|
"""
|
|
|
|
class SVGText:
|
|
"""
|
|
svg hold text
|
|
"""
|
|
def __init__(self, data='undefined'):
|
|
self.data = data
|
|
|
|
def render(self):
|
|
return """<?xml version="1.0" standalone="no"?>
|
|
<svg viewBox="0 0 80 40" xmlns="http://www.w3.org/2000/svg">
|
|
<desc>fallback svg</desc>
|
|
<rect x="0" y="0" width="80" height="40" stroke="red" fill="None">
|
|
</rect>
|
|
<text x="30" y="20">{}</text>
|
|
</svg>
|
|
""".format(self.data)
|
|
|
|
class TextGrapher(Grapher):
|
|
"""
|
|
generates svg manually that look like ass
|
|
"""
|
|
|
|
def generate(self, data_sorted, multiplier, calc_netsize):
|
|
nsize = 0
|
|
rph = 0
|
|
t = 0
|
|
for hour, netsize, reqs in self.collect(data_sorted, multiplier, calc_netsize):
|
|
t += 1
|
|
nsize += netsize
|
|
rpy += reqs
|
|
if t:
|
|
nsize /= t
|
|
rph /= t
|
|
return SVGText("MEAN NETSIZE: {} routers".format(nsize)), SVGText("MEAN REQUETS: {} req/hour".format(rph))
|
|
|
|
class PygalGrapher(Grapher):
|
|
"""
|
|
generates svg graphs using pygal
|
|
"""
|
|
|
|
def generate(self, data_sorted, multiplier, calc_netsize):
|
|
|
|
_netsize_graph = pygal.DateY(show_dots=False,x_label_rotation=20)
|
|
_requests_graph = pygal.DateY(show_dots=False,x_label_rotation=20)
|
|
|
|
_netsize_graph.title = 'Est. Network Size (multiplier: %d)' % multiplier
|
|
_requests_graph.title = 'Requests Per Hour'
|
|
|
|
netsize_s, req_s = list(), list()
|
|
for hour, netsize, reqs in self.collect(data_sorted, multiplier, calc_netsize):
|
|
netsize_s.append((hour, netsize))
|
|
req_s.append((hour, reqs))
|
|
|
|
_netsize_graph.add('Routers', netsize_s)
|
|
_requests_graph.add('news.xml Requests', req_s)
|
|
return _netsize_graph, _requests_graph
|
|
|
|
|
|
class StatsEngine:
|
|
"""
|
|
Stats engine for news.xml
|
|
"""
|
|
|
|
_log = logging.getLogger('StatsEngine')
|
|
|
|
def __init__(self):
|
|
self._cfg_fname = 'settings.json'
|
|
if redis:
|
|
self._db = RedisDB()
|
|
try:
|
|
self._db.exists('nothing')
|
|
except:
|
|
self._log.warn("failed to connect to redis, falling back to volatile stats backend")
|
|
self._db = DictDB()
|
|
else:
|
|
self._db = DictDB()
|
|
if pygal:
|
|
self._graphs = PygalGrapher()
|
|
else:
|
|
self._graphs = TextGrapher()
|
|
|
|
self._last_hour = self.get_hour()
|
|
|
|
def _config_str(self, name):
|
|
with open(self._cfg_fname) as f:
|
|
return str(json.load(f)[name])
|
|
|
|
def _config_int(self, name):
|
|
with open(self._cfg_fname) as f:
|
|
return int(json.load(f)[name])
|
|
|
|
def multiplier(self):
|
|
return self._config_int('mult')
|
|
|
|
def tslice(self):
|
|
return self._config_int('slice')
|
|
|
|
def window_len(self):
|
|
return self._config_int('winlen')
|
|
|
|
@staticmethod
|
|
def get_hour():
|
|
"""
|
|
get the current our as an int
|
|
"""
|
|
dt = datetime.datetime.utcnow()
|
|
return dt.hour + (int(dt.strftime('%j')) * 24 ) + ( dt.year * 24 * 365 )
|
|
|
|
def calc_netsize(self, per_hour, mult):
|
|
return float(per_hour) * 24 / 1.5 * mult
|
|
|
|
@staticmethod
|
|
def _hour_key(hour):
|
|
return 'newsxml.hit.{}'.format(hour)
|
|
|
|
def hit(self, lang=None):
|
|
"""
|
|
record a request
|
|
"""
|
|
hour = self.get_hour()
|
|
keyname = self._hour_key(hour)
|
|
if not self._db.exists(keyname):
|
|
self._db.set(keyname, '0')
|
|
val = self._db.get(keyname)
|
|
self._db.set(keyname, str(int(val) + 1))
|
|
|
|
def _load_data(self, hours):
|
|
"""
|
|
load hit data
|
|
"""
|
|
hour = self.get_hour()
|
|
data = list()
|
|
while hours > 0:
|
|
keyname = self._hour_key(hour)
|
|
val = self._db.get(keyname)
|
|
if val:
|
|
data.append((hour, int(val)))
|
|
hour -= 1
|
|
hours -= 1
|
|
return data
|
|
|
|
def regen_graphs(self, tslice, window_len, mult):
|
|
data = self._load_data(tslice)
|
|
data_sorted = sorted(data, key=operator.itemgetter(0))
|
|
if len(data_sorted) > tslice:
|
|
data_sorted = data_sorted[-tslice:]
|
|
self._netsize_graph, self._requests_graph = self._graphs.generate(data_sorted, self.multiplier(), self.calc_netsize)
|
|
|
|
|
|
|
|
def netsize(self, tslice, window, mult):
|
|
#if not hasattr(self,'_netsize_graph'):
|
|
self.regen_graphs(tslice, window, mult)
|
|
return self._netsize_graph.render()
|
|
|
|
def requests(self, tslice, window, mult):
|
|
#if not hasattr(self,'_requests_graph'):
|
|
self.regen_graphs(tslice, window, mult)
|
|
return self._requests_graph.render()
|
|
|
|
|
|
engine = StatsEngine()
|