# coding: utf-8 # debianforum.de donation plot # written by TRex, modified by Meillo import urllib from datetime import datetime, date from io import StringIO from itertools import groupby from sys import argv import matplotlib from lxml import etree # must be executed before pyplot import matplotlib.use('agg') import matplotlib.pyplot as plt if len(argv) != 2: print("Usage: " + argv[0] + " OUTFILE.png") exit(1) outfile = argv[1] years = range(2003, datetime.now().year + 1) donations_url = "https://wiki.debianforum.de/Debianforum.de/Spenden" expenses_url = "https://wiki.debianforum.de/Debianforum.de/Ausgaben" urls = { year: ( donations_url + '/' + str(year) if year < years[-1] else donations_url) for year in years} y = dict() for year, url in urls.items(): f = urllib.urlopen(url) y[year] = f.read() expense_html = urllib.urlopen(expenses_url).read() def parse_html_to_list(html): """ Parse HTML table to list of tuples, expecting three cells with date, note, amount in each row. """ data = [] parser = etree.HTMLParser() tree = etree.parse(StringIO(html.decode('utf-8')), parser) rows = tree.xpath('//table//tr') for index, row in enumerate(rows): cells = row.getchildren() if len(cells) == 3 and index > 0: date = cells[0].text.strip() try: date = datetime.strptime(date, "%d.%m.%Y") except Exception: continue note = cells[1].xpath(".//a/text()") if not note: note = cells[1].xpath(".//text()") amount = float(cells[2].text.strip().replace(",", ".")) data.append((date, note[0].strip(), amount)) return data donations = [] for year, donate_html in y.items(): yearly_list = parse_html_to_list(donate_html) donations += yearly_list donations.sort() expenses = parse_html_to_list(expense_html) def _groupy(item): return item[0].year, item[0].month def get_date_grouped_list(list_): agg_list = [] for ((year, month), items) in groupby(list_, _groupy): agg_list.append((date(year, month, 1), sum([x[2] for x in items]))) return agg_list donations_prefix = [(date(donations[0][0].year, x + 1, 1), '', 0) for x in range(0, donations[0][ 0].month - 1)] # cheating. Now even better. agg_donations = get_date_grouped_list(donations_prefix + donations) agg_expenses = get_date_grouped_list(donations_prefix + expenses) agg_donations.sort() agg_expenses.sort() fig = plt.figure(figsize=(15, 7)) # donations plt.plot([x[0] for x in agg_donations], [x[1] for x in agg_donations], color="green", label="Spenden") # expenses plt.plot([x[0] for x in agg_expenses], [x[1] for x in agg_expenses], color="red", label="Ausgaben") # spartopf cash_available = [] cash = 0 d = dict(agg_donations) # shitty workaround: this month is missing. probably a bug in the source table. for k, v in agg_expenses + [(date(2009, 3, 1), 0)]: donation = d.pop(k, 0) cash = cash - v + donation cash_available.append((k, cash)) cash_available.sort() plt.plot([x[0] for x in cash_available], [x[1] for x in cash_available], color="blue", label="Spendenkonto") plt.xlabel("Datum") plt.ylabel("Euro") plt.legend() plt.savefig(outfile) plt.close(fig) exit()