#!/usr/bin/env python
# Copyright 2007-2008, Canonical, Ltd.
# Author: Kees Cook <kees@ubuntu.com>
#         Jamie Strandboge <jamie@canonical.com>
# License: GPLv3
#
# Extract certain pieces of information from LP (some day this should
# all vanish into the launchpad API so we don't have scrape any more).
import sys, time, os, re
import urllib2, cookielib
import libxml2
import optparse
import tempfile, shutil
from launchpadbugs import http_connection

sys.path.append(os.path.join(os.environ['UCT'],'scripts'))
import cve_lib

cve_lib.read_config()
cookie_processor = http_connection.LPCookieProcessor()
cookie_processor.load_file(cve_lib.config["plb_authentication"])
opener = urllib2.build_opener(cookie_processor)

# slience parse warnings (from launchpad-python-bugs)
def noerr(ctx, str):
    pass
libxml2.registerErrorHandler(noerr, None)

def xmlurl(url):
    # Get the HTML
    try:
        doc = opener.open(url).read()
        # Parse into XML tree
        return libxml2.htmlParseDoc(doc,None)
    except:
        print >>sys.stderr, "Failed: %s" % (url)
        raise

class LaunchpadPackageInfo(object):
    def __init__(self, debug=False):
        self.base_url = 'https://launchpad.net'
        self.debug = debug
        return self

    def get_ppa_xml(self, group):
        ppa_url = '%s/~%s/+archive' % (self.base_url, group)

        ppa_url_list = '%s/~%s/+archive' % (self.base_url, group)

        if self.debug:
            print >>sys.stderr, "ppa_url = '%s'" % (ppa_url)

        return xmlurl(ppa_url_list)

    def normalize_url(url):
        if url.startswith('/'):
            url = base_url + url
        if url.startswith('+'):
            url = ppa_url + '/' + url
        return url

    def _load_pkg_details_from_xml(pkgs, xml):
        name = xml.xpathEval('self::tr/td[1]')[0].content.strip()
        pkg, version = name.split(' - ')
        rel  = xml.xpathEval('self::tr/td[6]')[0].content.strip().lower()
        if rel not in cve_lib.releases:
            raise ValueError, "Unknown release '%s':\n" % (rel) + head.content
        detail = xml.xpathEval('self::tr/following-sibling::tr[1]')[0]

        pkgs.setdefault(pkg, dict())
        pkgs[pkg].setdefault(rel, dict())
        if self.debug:
            print >>sys.stderr, "Source(%s): %s %s" % (rel, pkg, version)

        # Source details
        pkgs[pkg][rel].setdefault('source', dict())
        pkgs[pkg][rel]['source'].setdefault('version', version)
        src_changes = xml.xpathEval('self::tr//a[contains(@href,"_source.changes")]')[0].prop('href')
        pkgs[pkg][rel]['source'].setdefault('changes', src_changes)
        if self.debug:
            print >>sys.stderr, "Source(%s) changes: %s" % (rel, src_changes)

        # Build details
        for build_li in detail.xpathEval('self::tr//h3[.="Builds"]/following-sibling::ul[1]//li'):
            build_a = build_li.xpathEval('self::li/a')[0]
            build_url = normalize_url(build_a.prop('href'))
            arch = build_a.content.strip().lower()
            pkgs[pkg][rel].setdefault(arch, dict())
            pkgs[pkg][rel][arch].setdefault('build', build_url)

            build_img = build_li.xpathEval('self::li/img')[0]
            state = build_img.prop('src').split('-')[1]
            pkgs[pkg][rel][arch].setdefault('build_state', state)

            if self.debug:
                print >>sys.stderr, "Build(%s,%s) %s URL: %s" % (rel, arch, state, build_url)

        # Diff
        for diff_a in detail.xpathEval('self::tr//h3[.="Available diffs"]/following-sibling::ul[1]//a'):
            diff_url = normalize_url(diff_a.prop('href'))
            pkgs[pkg][rel]['source'].setdefault('ancestor-diff', diff_url)
            if self.debug:
                print >>sys.stderr, "Diff(%s) URL: %s" % (rel, diff_url)

        # Files
        for file_a in detail.xpathEval('self::tr//h3[.="Download files from Librarian"]/following-sibling::ul//a'):
            file_url = normalize_url(file_a.prop('href'))
            if file_url.endswith('deb'):
                arch = file_url.split('_').pop().split('.')[0]
                # hack for "all": attach to i386
                if arch == 'all':
                    arch = 'i386'
                pkgs[pkg][rel][arch].setdefault('binaries', dict())
                name = file_url.split('_')[-3].split('/').pop()
                pkgs[pkg][rel][arch]['binaries'].setdefault(name, file_url)
                if self.debug:
                    print >>sys.stderr, "Binary(%s,%s) URL: %s" % (rel, arch, file_url)
            else:
                if file_url.endswith('.diff.gz'):
                    pkgs[pkg][rel]['source'].setdefault('diff', file_url)
                elif file_url.endswith('.dsc'):
                    pkgs[pkg][rel]['source'].setdefault('dsc', file_url)
                elif file_url.endswith('.tar.gz'):
                    pkgs[pkg][rel]['source'].setdefault('orig', file_url)
                else:
                    raise ValueError, "Unknown downloadable file from %s %s '%s'" % (pkg, version, file_url)
                if self.debug:
                    print >>sys.stderr, "Source(%s) URL: %s" % (rel, file_url)

    def get_pkg_details(self, ppa_xml):
        # pkg -> { release, release -> { version } }
        pkgs = dict()

        try:
            packages_list = ppa_xml.xpathEval("//table[@id='packages_list']")[0]
        except:
            print >>sys.stderr, "Failed:\n" + ppa_xml.content
            raise

        for pkg_tr in packages_list.xpathEval('//tbody/tr[(@class = "archive_package_row") or (@class = "ppa_package_row")]'):
            self._load_pkg_details_from_xml(pkgs, pkg_tr)

        return pkgs

    def get_pkg_builds(self, pkgs, pkg, rel, arch):
        # changes_url
        # log_url
        build = dict()
        if pkgs[pkg][rel][arch]['build_state'] != 'success':
            if self.debug:
                print >>sys.stderr, "Skipping '%s' build for %s %s %s" % (pkgs[pkg][rel][arch]['build_state'], pkg,rel,arch)
            return None
        if self.debug:
            print >>sys.stderr, "Loading build log for %s %s %s" % (pkg,rel,arch)
        build_xml = xmlurl(pkgs[pkg][rel][arch]['build'])

        try:
            build.setdefault('changes_url', os.path.join(pkgs[pkg][rel][arch]['build'], build_xml.xpathEval('//th[.="Changes file:"]/../td/a[contains(@href,"/%s_%s_")]' % (pkg, version))[0].prop('href')))
        except:
            raise ValueError, "%s %s %s %s.changes does not exist" % (pkg, rel, version, arch)

        try:
            build.setdefault('log_url', os.path.join(pkgs[pkg][rel][arch]['build'], build_xml.xpathEval('//th[.="Build log:"]/../td/a[contains(@href,".txt.gz")]')[0].prop('href')))
        except:
            raise ValueError, "%s %s %s %s build log not found" % (pkg, rel, version, arch)

        return build
