#!/usr/bin/env python
#
# Copyright 2012-2013 SUSE Linux
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

import argparse
import datetime
import glob
import os
import re
import sys
import tarfile
import urllib


COMMIT_HASH_SIZE = 7


def download_tarball(url, filename):
    """Download an upstream tarball

    :url: remote source of the tarball
    :filename: where to save the downloaded tarball

    """
    try:
        urllib.urlretrieve(url, filename)
    except IOError as e:
        sys.exit(e)


def get_changelog_from_tarball(tar_name):
    """Get the ChangeLog file form the tarball as a string."""
    try:
        tar = tarfile.open(tar_name)
        changelog_file = next(f for f in tar.getnames()
                              if f.endswith('ChangeLog'))
        t = tar.getmember(changelog_file)
        changelog = tar.extractfile(t).read()
    except tarfile.ReadError:
        sys.exit("Could not read the file %s" % tar_name)
    except StopIteration:
        sys.exit("Could not find a 'ChangeLog' file. ")
        tar.close()
    return changelog


def get_parent_dir(tar_name):
    """Return the filename of the top-level directory inside the tarball."""
    tar = tarfile.open(tar_name)
    parent_dir = tar.firstmember.name
    tar.close()

    return parent_dir


def parse_version_from_parent_dir(parent_dir, version_regexp):
    """Parse the parent_dir filename and return a version string."""
    try:
        match = re.match(version_regexp, parent_dir)
    except re.error as e:
        sys.exit("Could not use '%s' as regular expression to find "
                 "version: " % (version_regexp, e))

    if match is None:
        sys.exit("Could not use '%s' as regular expression to find "
                 "version in '%s': no match" % (version_regexp, parent_dir))
    elif len(match.groups()) != 1:
        sys.exit("Could not use '%s' as regular expression to find "
                 "version in '%s': more than one match" %
                 (version_regexp, parent_dir))
    else:
        version = match.group(1)

    return version


def get_upstream_commit(changelog):
    """Get the latest commit in the upstream git repository as a string."""
    try:
        return re.search(r'^commit (.*?)$', changelog, re.MULTILINE).group(1)
    except AttributeError:
        raise StandardError('Could not parse ChangeLog file.')


def parse_changelog(changelog):
    """Parse a git ChangeLog file

    :changelog: string with the contents of the file

    Returns an iterable of _sre.SRE_Match match objects from python's `re`

    Each object will have the following attributes:
    commit, mergecommit, author, date, message

    """
    return re.finditer(r'^commit (?P<commit>.*?)$'
                       '(?:\nMerge:\s+\S+\s+(?P<mergecommit>.*?)\n|\n)'
                       '.*?'
                       '^Author:\s+(?P<author>.*?)$'
                       '.*?'
                       '^Date:\s+(?P<date>.*?)$'
                       '.*?'
                       '\n\n\s+(?P<message>.*?)$'
                       '(?:\n\n|\n.*?)',
                       changelog, re.MULTILINE | re.DOTALL)


def get_commit_from_spec(package, plain_version=False):
    """Parse the spec's Version field for a previously set commit version

    :package: name of the package
    :plain_version: expect upstream version string instead of customized one

    Returns None in case no commit could be read.

    """
    try:
        f = open(package + '.spec')
        if plain_version:
            # Assuming that the version string we're looking for was in
            # the name of the top-level directory that we extracted from
            # the tarball, we can read it here from the %setup field in
            # the specfile.
            return re.search(r'^%setup.*-n.*?\.g([0-9a-f]{6,})(\+git.*)?$',
                             f.read(), flags=re.MULTILINE).group(1)
        else:
            return re.search(r'^Version:\s+.*\+git\.\d+\.(.*?)(\s+#.*)?$',
                             f.read(), flags=re.MULTILINE).group(1)
    except AttributeError:
        return None
    finally:
        f.close()


def get_package_version(upstream_version, upstream_commit):
    return '%s+git.%s.%s' % (upstream_version,
                             datetime.datetime.now().strftime('%s'),
                             upstream_commit[:COMMIT_HASH_SIZE])


def parse_update_spec_file(contents, package_version,
                           tarball_parent_dir, filename):

    if package_version:
        contents = re.sub(r'\n((Version:\s+).*)\n',
                          r'\n\g<2>%s\n' % package_version,
                          contents, count=1)
    # only replace the "-n" option, leave everything else intact
    contents = re.sub(r'\n(%setup.*?-n\s+)(\S+?)(|[ \t]+.*?)\n',
                      r'\n\g<1>%s\g<3>\n' % tarball_parent_dir,
                      contents, count=1)
    contents = re.sub(r'\n((Source0?:\s+).*)\n',
                      r'\n\g<2>%s\n' % filename,
                      contents, count=1)
    return contents


def update_spec_files(package_version, tarball_parent_dir, filename):
    for specfile in glob.glob('./*.spec'):
        try:
            f = open(specfile, 'r+')
            contents = f.read()
            f.seek(0)
            f.truncate()
            contents = parse_update_spec_file(contents, package_version,
                                              tarball_parent_dir, filename)
            f.write(contents)
        finally:
            f.close()


def diff_changes(changes_list, package_commit):
    """Return a list of dict changes newer than the ones in package_version

    :changes_list: a list of dicts from the ChangeLog file
    :package_commit: a git commit hash of the current version from the
    spec file

    Returns an empty list if there are no newer commits.

    """
    new_changes = []
    for change in changes_list:
        change = change.groupdict()
        if change['commit'].startswith(package_commit):
            break
        new_changes.append(change)

    return new_changes


def create_changes(changes_list, package_version, package_commit, email):
    """Return a string with the new changes for the .changes file

    :changes_list: a list of dicts from the ChangeLog file
    :package_version: release version string for the .changes file entry
    :package_commit: a git commit hash of the current version from the
    spec file
    :email: email address used for the .changes file entry

    """
    changes_diff = diff_changes(changes_list, package_commit)
    if not changes_diff:
        print("There are no new changes.")
        return

    messages = []
    merged = []
    for c in changes_diff:
        if c['mergecommit']:
            m = c['message']
            messages.append(m[len('Merge "'):m.rfind('"')])
            merged.append(c['mergecommit'])
        # do not add a message that was already added via "Merge..." commits
        elif c['commit'][:7] not in merged:
            messages.append(c['message'])

    # Let's use chronological order
    messages.reverse()

    timestamp = datetime.datetime.utcnow().strftime('%a %b %e %T UTC %Y')
    commits = "  + " + "\n  + ".join(messages)

    change = (
        '-------------------------------------------------------------------'
        '\n%(timestamp)s - %(email)s\n'
        '\n'
        '- Update to version %(package_version)s:\n'
        '%(commits)s\n'
        '\n' % {"timestamp": timestamp, "email": email,
                "package_version": package_version, "commits": commits})

    return change


def update_changes_file(package, changes):
    try:
        f = open(package + '.changes', 'r+')
        contents = f.read()
        f.seek(0)
        f.write(changes)
        f.write(contents)
    finally:
        f.close()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Git Tarballs')
    parser.add_argument('--url', required=True,
                        help='upstream tarball URL to download')
    parser.add_argument('--filename',
                        help='where to save the downloaded tarball')
    parser.add_argument('--package',
                        help='the OBS package name')
    parser.add_argument('--email', required=True,
                        help='email of the commit author '
                        '(for the .changes file)')
    parser.add_argument('--version-regexp', default='.*-([^-]+)',
                        help='regular expression for extracting version from '
                        'top-level directory in tarball '
                        '(default: ".*-([^-]+)")')
    parser.add_argument('--outdir',
                        help='osc service parameter that does nothing')
    parser.add_argument('--plain-version', help='use upstream version string')
    args = parser.parse_args()

    if not args.filename:
        args.filename = args.url.rsplit("/", 1)[1]
    if not args.package:
        args.package = os.getcwd().rsplit("/", 1)[1]

    download_tarball(args.url, args.filename)

    changelog = get_changelog_from_tarball(args.filename)
    try:
        changes_list = parse_changelog(changelog)
        upstream_commit = get_upstream_commit(changelog)
    except StandardError:
        changes_list = None
        upstream_commit = None

    tarball_parent_dir = get_parent_dir(args.filename)
    upstream_version = parse_version_from_parent_dir(tarball_parent_dir,
                                                     args.version_regexp)

    if args.plain_version:
        pkg_commit = get_commit_from_spec(args.package, plain_version=True)
        pkg_version = upstream_version
    else:
        pkg_commit = get_commit_from_spec(args.package)
        pkg_version = get_package_version(upstream_version, upstream_commit)
    if not pkg_commit:
        # on first run, we only set the fields in the spec file;
        # there are no changes
        update_spec_files(pkg_version, tarball_parent_dir, args.filename)
        print("Initialized .spec file with git_tarball.")
        sys.exit()

    if changes_list:
        changes = create_changes(changes_list, pkg_version,
                                 pkg_commit, args.email)
    else:
        changes = None

    update_spec_files(pkg_version,
                      tarball_parent_dir, args.filename)
    if changes:
        update_changes_file(args.package, changes)
