# -*- coding: utf-8 -*-
# Copyright (C) 2010, 2011  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


"""
A GetMediumURL client module comparing data found with stored in a file.

It should be useful for checking if changes in plugins or sites don't
change data found for some media.
"""


from collections import defaultdict
from hashlib import sha512
from optparse import OptionParser
import os.path
import warnings
from subprocess import Popen, PIPE
from threading import Thread

from lxml import etree
from urlreader import URLReader
from urlreader.caches import DirectoryCache

from getmediumurl.matcher import Matcher


def assert_equals(key, first, second):
    """Fail if *first* != *second*."""
    # For some reason ConfigParser doesn't use Unicode strings.
    if not isinstance(first, unicode) and isinstance(first, str):
        first = first.decode("utf-8")
    if first != second:
        print "%s: %r != %r" % (key, first, second)


def makesum(reader, sum_video_only):
    """Return SHA-512 sum of document at `reader` object."""
    summer = sha512()
    # Don't depend on specific server used to download the video.
    if sum_video_only:
        # Get video and audio data and check it, the file contains
        # other data which changes often.
        ffmpeg = Popen(("ffmpeg", "-i", "-", "-vcodec", "copy",
                        "-f", "rawvideo", "-"), stdin=PIPE, stdout=PIPE,
                       stderr=PIPE)
        ffmpeg.stderr.close()

        def update():
            """Update summer in another thread."""
            while True:
                fragment = ffmpeg.stdout.read(4096)
                if not fragment:
                    break
                summer.update(fragment)

        updater = Thread(target=update)
        updater.start()
        for fragment in reader:
            ffmpeg.stdin.write(fragment)
        ffmpeg.stdin.close()
        updater.join()
    else:
        for fragment in reader:
            summer.update(fragment)
    return summer.hexdigest()


_CODECS_TO_TYPES = {
    "flv": "video/x-flv",
    "mov,mp4,m4a,3gp,3g2,mj2": "video/mp4",
    "matroska,webm": "video/webm",
    }


def ffprobe_check(false_size, data, mime_type, width, height):
    """Check if ``ffprobe`` reports the same data as given in arguments."""
    ffprobe = Popen(("ffprobe", "-show_format", "-show_streams", "-"),
                    stdin=PIPE, stdout=PIPE, stderr=PIPE)
    for line in ffprobe.communicate(data)[0].split("\n"):
        if line.startswith("format_name="):
            if mime_type != u"":
                fmt = line.split("=")[1]
                try:
                    assert_equals("ff_mime_type", mime_type,
                                  _CODECS_TO_TYPES[fmt])
                except KeyError:
                    print "unknown format", fmt
        if false_size:
            continue
        if line.startswith("width="):
            if width is not None:
                assert_equals("ff_width", width, int(line.split("=")[1]))
        if line.startswith("height="):
            if height is not None:
                assert_equals("ff_height", height, int(line.split("=")[1]))


def check_thumbnail(matcher, page_medium, plugin_medium, checksum):
    """Check thumbnail sum or just cache it."""
    thumbnail_sum = page_medium.xpath("thumbnail/@sha512")
    if not thumbnail_sum:
        assert_equals("thumbnail", plugin_medium.thumbnail, None)
        return
    if not checksum and plugin_medium.thumbnail is not None:
        matcher.urlreader(plugin_medium.thumbnail).prepare_content()
    elif plugin_medium.thumbnail is not None:
        summer = sha512()
        for fragment in matcher.urlreader(plugin_medium.thumbnail):
            summer.update(fragment)
        assert_equals("thumbnail", thumbnail_sum[0], summer.hexdigest())
    else:
        print "thumbnail not found"


def check_file(matcher, plugin_formats, page_format, checksum):
    """Check data about a single file."""
    pagesum = page_format.get("sha512", "") if checksum else None
    mime_type = page_format.get("type", "")
    width = page_format.get("width", None)
    width = int(width) if width is not None else None
    height = page_format.get("height", None)
    height = int(height) if height is not None else None
    fmt = (pagesum, mime_type, width, height)
    if plugin_formats[fmt]:
        if checksum:
            ffprobe_check(page_format.get("false_size", False),
                          matcher.urlreader(plugin_formats[fmt][0]).content,
                          mime_type, width, height)
        plugin_formats[fmt].pop()
        if not plugin_formats[fmt]:
            del plugin_formats[fmt]
    else:
        print "format not found", pagesum, mime_type, width, height


#: Names of all meta tag names.
METAS = ("url", "title", "author_name", "author_url", "website",
         "description", "license", "language")


def check_medium(matcher, page_medium, plugin_medium, checksum):
    """Check if the same data about medium can be obtained."""
    for name in METAS:
        try:
            meta = page_medium.xpath("meta[@name='%s']" % name)[0]
        except IndexError:
            meta_text = None if name != "language" else u""
        else:
            meta_text = u" ".join(meta.text.split())
        assert_equals(name, meta_text, getattr(plugin_medium, name))
    check_thumbnail(matcher, page_medium, plugin_medium, checksum)
    plugin_formats = defaultdict(list)
    sum_video_only = page_medium.get("sum_video_only", False)
    for fmt in plugin_medium:
        if checksum:
            file_sum = makesum(matcher.urlreader(fmt.url),
                           sum_video_only)
        else:
            file_sum = None
            matcher.urlreader(fmt.url).prepare_content()
        plugin_formats[(file_sum, fmt.mime_type,
                        fmt.width, fmt.height)].append(fmt.url)
    for page_format in page_medium.xpath("format"):
        check_file(matcher, plugin_formats, page_format, checksum)
    if plugin_formats:
        print "unspecified formats found "
        print "\n".join(u" ".join((str(elem) for elem in fmt))
                        for fmt in plugin_formats.iterkeys())


def check_page(matcher, url, page, checksum):
    """Check media on a page."""
    print url
    plugin = matcher.match(url)
    if plugin is None:
        print "not matched"
        return
    page_media = list(page)
    plugin_media = list(plugin)
    i = -1
    for i in xrange(0, min(len(page_media), len(plugin_media))):
        check_medium(matcher, page_media[i], plugin_media[i], checksum)
    if i + 1 < len(page_media):
        print "Unmatched media listed:"
        for medium in page_media[i:]:
            print etree.tostring(medium)
    if i + 1 < len(plugin_media):
        print "Unmatched media found:"
        for medium in plugin_media[i:]:
            print medium.title


def xml_format(urlreader, fmt, checksum):
    """Return a `lxml.etree.Element` representing specific format."""
    element = etree.Element("format")
    # TODO somehow compute the sum.
    assert checksum is not None
    urlreader(fmt.url).prepare_content()
    if fmt.mime_type:
        element.set("mime_type", fmt.mime_type)
    if fmt.width is not None:
        element.set("width", str(fmt.width))
    if fmt.height is not None:
        element.set("height", str(fmt.height))
    return element


def xml_thumbnail(document, checksum):
    """Return a `lxml.etree.Element` representing specific thumbnail."""
    element = etree.Element("thumbnail")
    if checksum:
        summer = sha512()
        for fragment in document:
            summer.update(fragment)
        element.set("sha512", summer.hexdigest())
    else:
        document.prepare_content()
    return element


def xml_medium(medium, checksum):
    """Return a `lxml.etree.Element` representing specific element."""
    element = etree.Element("medium")
    for meta in METAS:
        value = getattr(medium, meta)
        if meta == "language" and value == "":
            value = None
        if value is not None:
            meta_element = etree.Element("meta")
            meta_element.set("name", meta)
            meta_element.text = value
            element.append(meta_element)
    for fmt in medium:
        element.append(xml_format(medium.urlreader, fmt, checksum))
    thumbnail = medium.thumbnail
    if thumbnail is not None:
        element.append(xml_thumbnail(medium.urlreader(thumbnail), checksum))
    return element


def xml_page(matcher, url, checksum):
    """Return a `lxml.etree.Element` representing specific page."""
    element = etree.Element("page")
    element.set("url", url)
    plugin = matcher.match(url)
    if plugin is not None:
        for medium in plugin:
            element.append(xml_medium(medium, checksum))
    return element


DEFAULT_FILE = os.path.join(os.path.expanduser("~"),
                            ".getmediumurl-plugin-test.xml")
DEFAULT_CACHE = os.path.join(os.path.expanduser("~"),
                             ".getmediumurl-plugin-test-cache")


def main():
    """The main function."""

    warnings.simplefilter("error")

    parser = OptionParser(usage="usage: %prog [options] [configuration files]")
    parser.add_option("-f", "--fetch",
                      action="store_true",
                      dest="fetch",
                      default=False,
                      help="fetch data not found in the cache")
    parser.add_option("-d", "--directory",
                      dest="directory",
                      default=DEFAULT_CACHE,
                      help="path to cache directory")
    parser.add_option("-c", "--no-checksum",
                      action="store_false",
                      dest="checksum",
                      default=True,
                      help="don't compute SHA512 checksums")
    parser.add_option("-u", "--url",
                      action="append",
                      dest="urls",
                      default=[],
                      help="check only URLs specified with this option")
    (options, args) = parser.parse_args()
    backends = None if options.fetch else ()
    caches = (DirectoryCache(options.directory),)

    args = args or (DEFAULT_FILE,)
    urls = set(options.urls)
    limit_urls = bool(urls)

    matcher = Matcher(urlreader=URLReader(backends=backends, caches=caches))

    for arg in args:
        print "Using config file", arg
        tree = etree.parse(arg)
        for page in tree.getroot():
            url = page.get("url")
            if url is None:
                continue
            if limit_urls:
                if url not in urls:
                    continue
                urls.remove(url)
            check_page(matcher, url, page, options.checksum)

    first = True
    for url in urls:
        if first:
            print "Not in config"
            first = False
        print etree.tostring(xml_page(matcher, url, options.checksum),
                             pretty_print=True)
