# coding: utf-8

# debianforum.de donation plot
# written by TRex, modified by Meillo

import urllib
from datetime import datetime, date
from io import StringIO
from itertools import groupby
from sys import argv

import matplotlib
from lxml import etree

# must be executed before pyplot import
matplotlib.use('agg')
import matplotlib.pyplot as plt


if len(argv) != 2:
    print("Usage: " + argv[0] + " OUTFILE.png")
    exit(1)
outfile = argv[1]

years = range(2003, datetime.now().year + 1)
donations_url = "https://wiki.debianforum.de/Debianforum.de/Spenden"
expenses_url = "https://wiki.debianforum.de/Debianforum.de/Ausgaben"

urls = {
    year: (
        donations_url + '/' + str(year) if year < years[-1] else donations_url)
    for year in years}

y = dict()
for year, url in urls.items():
    f = urllib.urlopen(url)
    y[year] = f.read()

expense_html = urllib.urlopen(expenses_url).read()


def parse_html_to_list(html):
    """
    Parse HTML table to list of tuples, expecting three cells with date, note, amount in each row.
    """
    data = []
    parser = etree.HTMLParser()
    tree = etree.parse(StringIO(html.decode('utf-8')), parser)
    rows = tree.xpath('//table//tr')
    for index, row in enumerate(rows):
        cells = row.getchildren()
        if len(cells) == 3 and index > 0:
            date = cells[0].text.strip()
            try:
                date = datetime.strptime(date, "%d.%m.%Y")
            except Exception:
                continue
            note = cells[1].xpath(".//a/text()")
            if not note:
                note = cells[1].xpath(".//text()")
            amount = float(cells[2].text.strip().replace(",", "."))

            data.append((date, note[0].strip(), amount))
    return data

donations = []

for year, donate_html in y.items():
    yearly_list = parse_html_to_list(donate_html)
    donations += yearly_list
donations.sort()

expenses = parse_html_to_list(expense_html)


def _groupy(item):
    return item[0].year, item[0].month


def get_date_grouped_list(list_):
    agg_list = []
    for ((year, month), items) in groupby(list_, _groupy):
        agg_list.append((date(year, month, 1), sum([x[2] for x in items])))
    return agg_list


donations_prefix = [(date(donations[0][0].year, x + 1, 1), '', 0) for x in
                    range(0, donations[0][
                        0].month - 1)]  # cheating. Now even better.
agg_donations = get_date_grouped_list(donations_prefix + donations)
agg_expenses = get_date_grouped_list(donations_prefix + expenses)

agg_donations.sort()
agg_expenses.sort()

fig = plt.figure(figsize=(15, 7))
# donations
plt.plot([x[0] for x in agg_donations], [x[1] for x in agg_donations],
         color="green", label="Spenden")
# expenses
plt.plot([x[0] for x in agg_expenses], [x[1] for x in agg_expenses],
         color="red", label="Ausgaben")

# spartopf
cash_available = []
cash = 0
d = dict(agg_donations)
# shitty workaround: this month is missing. probably a bug in the source table.
for k, v in agg_expenses + [(date(2009, 3, 1), 0)]:
    donation = d.pop(k, 0)
    cash = cash - v + donation

    cash_available.append((k, cash))
cash_available.sort()

plt.plot([x[0] for x in cash_available], [x[1] for x in cash_available],
         color="blue", label="Spendenkonto")
plt.xlabel("Datum")
plt.ylabel("Euro")
plt.legend()

plt.savefig(outfile)
plt.close(fig)

exit()
