#!/usr/bin/env python
#-----------------------------------------------------------------------------
# Name:        mkplaylist.py
# Purpose:     Create playlists from directory scans.
#
# Author:      Marc 'BlackJack' Rintsch
#
# Created:     2004-11-09
# Copyright:   (c) 2004, 2005
# Licence:     GPL
#-----------------------------------------------------------------------------
"""Make a playlist file.

:var factory: instance of a `PlaylistEntryFactory`.
:var HAVE: dictionary of module names that maps to booleans that tell
    if the module was imported successfully.
:var MODULES: names of modules to read meta data from various media
    file types.
:var TYPES: tuples with information about recognized types.  Each tuple
    consists of the file name extension(s), a descriptive name, and
    a function to read meta data from that file type.
:var WRITERS: dictionary that maps playlist format names to functions
    that write a sequence of `PlaylistEntry` objects in that format
    to a file.

:todo: Check if docstrings and code are still in sync.
:todo: Refactor cache code.  Introduce a Cache class.  Maybe subclassing
    `PlaylistEntryFactory` with a caching version.  Keep in mind that this
    scales, i.e. implementing an SQLite cache or using AmaroK's db should be
    considered too.
:todo: Find a strategically favourable place to minimise the contents of the
    meta data dictionaries to the bare minimum to cut down the cache file size.
    It is not necessary to have the version of the vorbis library in ogg meta
    data for example.
"""
from __future__ import division
import sys
import os
import os.path
import random
import logging
import cPickle as pickle
from itertools import chain

__author__ = "Marc 'BlackJack' Rintsch <marc(at)rintsch(dot)de>"
__version__ = '0.4.5'
__date__ = '$Date: 2006-05-20 15:30:16 +0200 (Sat, 20 May 2006) $'
__revision__ = '$Rev: 855 $'

__docformat__ = 'reStructuredText'

# Disable `pylint` name convention warning for names on module level that
# are not ``def``\ed functions and still have no conventional constant names,
# i.e. only capital letters.
# 
# pylint: disable-msg=C0103

#
# Use logging for ouput at different *levels*.
#
logging.getLogger().setLevel(logging.INFO)
log = logging.getLogger("mkplaylist")
handler = logging.StreamHandler(sys.stderr)
log.addHandler(handler)


#-----------------------------------------------------------------------------
# Configure stuff.  Checking at runtime which packages are there to read
# meta data from media files.
#
# All packages and modules that are optional will be imported within the
# following code section and there is a global dictionary `HAVE` that
# maps module names to boolean values which are set if a module was
# imported or not.

def try_import(module_names):
    """Tries to import modules.
    
    The modules are imported into the global namespace.
    
    :param module_names: names of the modules to import.
    :type module_names: sequence of str
    
    :return: a dictionary with module names mapped to `True` if the
        module was successfully imported, `False` otherwise.
    :rtype: dict of (str, bool)
    
    :todo: Change `have` from dictionary to set.
    """
    have = dict()
    for module_name in module_names:
        try:
            module = __import__(module_name)
            have[module_name] = True
            globals()[module_name] = module
        except ImportError:
            have[module_name] = False
    
    return have

#
# Make sure that static code checkers like PyChecker and PyLint know about
# the module names and then try to import the modules.
#
mad = id3reader = ID3 = ogg = None
MODULES = ('mad', 'id3reader', 'ID3', 'ogg', 'ogg.vorbis')
HAVE = try_import(MODULES)

#-----------------------------------------------------------------------------
# Functions to read meta data from media files.
#
# Every format should have a name for function to read the meta data
# consisting of the prefix "read_" and the file name extension.
# This name should be initialized with `None`.
#
# Then one or more real functions are defined and finally one of them
# should be bound to the name mentioned above, depending on the
# availibility of modules used in the functions.
#
# The functions return a dictionary with the meta data.  The minimal
# postcondition is an empty dictionary if no information could be
# read from the file.
#
# Keys to use (yes, they are all uppercase!):
#
# ARTIST, TITLE, TIME (playing time in seconds)
#
# All other keys are currently not used by any output format.

# 
# Dummy reader
# 

def dummy_reader(dummy):
    """Returns an empty dictionary and ignores the path."""
    return dict()

#
# MP3
#

read_mp3 = dummy_reader

def read_mp3_id3(path):
    """Reads info from MP3 file.  Just ID3 V1 tags."""
    info = ID3.ID3(path).as_dict()
    mad_file = mad.MadFile(path)
    info['TIME'] = str(int(mad_file.total_time() / 1000))
    return info


def read_mp3_id3reader(path):
    """Reads info from MP3 file."""
    info = dict()
    mad_file = mad.MadFile(path)
    info['TIME'] = str(int(mad_file.total_time() / 1000))
    
    try:
        reader = id3reader.Reader(path)
        # we only need the following three informations
        for value_name, key in (('performer', 'ARTIST'),
                                ('album', 'ALBUM'),
                                ('title', 'TITLE')):
            value = reader.getValue(value_name)
            if value:
                if isinstance(value, unicode):
                    value = value.encode('latin-1', 'replace')
                info[key] = value
    except id3reader.Id3Error:
        log.info("Could not read mp3 file: " + path)

    return info


if HAVE['mad'] and HAVE['id3reader']:
    read_mp3 = read_mp3_id3reader
elif HAVE['mad'] and HAVE['ID3']:
    read_mp3 = read_mp3_id3


#
# Ogg Vorbis
#

read_ogg = dummy_reader

def read_ogg_ogg(path):
    """Reads info from Ogg Vorbis file."""
    
    info = dict()
    vfile = ogg.vorbis.VorbisFile(path)
    info = vfile.comment().as_dict()
    for (key, value) in info.items():
        if isinstance(value[0], unicode):
            info[key] = value[0].encode('utf-8')
    # -1 tells time_total to return the time of the whole stream
    # in seconds (type float).
    info['TIME'] = str(int(vfile.time_total(-1)))
    return info


if HAVE['ogg.vorbis']:
    read_ogg = read_ogg_ogg

#-----------------------------------------------------------------------------

class PlaylistEntry:
    """A generic playlist entry with a `path` attribute and dictionary
    like behavior for meta data.
    
    `PlaylistEntry` objects can be converted to strings and are
    comparable with their `path` attribute as key.
    
    The meta data contains at least the path of the media file.
    
    :ivar path: path of the media file.
    :type path: str
    :ivar metadata: meta data of the media file.
    :type metadata: dict of str -> str
    
    :invariant: self['path'] is not None
    """
    def __init__(self, path, metadata=None):
        """Creates a playlist entry.
        
        :param path: the path of the media file.
        :type path: str
        :param metadata: a dictionary with meta data.
        :type metadata: dict of str -> str
        
        :precondition: The meta data must not contain a key 'path'.
        :postcondition: The meta data contains the `path` as key.
        """
        self.path = path
        self.time = os.stat(path).st_mtime
        
        if metadata is None:
            metadata = dict()
        
        metadata['PATH'] = self.path
        self.metadata = metadata
        
        
        # TODO: Some black magic to fill the metadata from examining
        #       the file name if the dict is empty.
    
    def __getitem__(self, key):
        return self.metadata.get(key, '')
    
    def __setitem__(self, key, value):
        self.metadata[key] = value
        if key == 'PATH':
            self.path = key
    
    def __cmp__(self, other):
        return cmp(self.path, other.path)
        
    def __str__(self):
        return self.path
        

class PlaylistEntryFactory:
    """A media file factory allows registritation of media file types,
    their file name extensions and functions for reading meta data from
    the files.
    
    This is not a "real" class but more an abuse of a class as a
    namespace.  If the program grows so large that it will become
    unavoidable to split it into several modules, the contents of
    this class may be moved to the top level of a module.
    
    :ivar types: dictionary that maps a file name extension to a
        tuple containing a descriptive name of the type and a
        sequence of functions to read meta data from this file type.
    :type types: dict
    :ivar cachefile: pathname of a file to store the pickled data from cached 
        PlaylistEntries
    :type cachefile: string
    """
    def __init__(self):
        self.types = dict()
        self.cache = dict()
        self.cachefile = None

    def read_cache(self, filename):
        """Reads cached playlist entries from `filename`."""
        try:
            cachefile = open(filename, "rb")
            self.cache = pickle.load(cachefile)
            cachefile.close()
        except IOError, error:
            # 
            # Error is just logged because it is not critical.
            # 
            log.info(error)
            log.info("Can't open cache %r" % filename)

    def write_cache(self, filename):
        """Writes cached playlist entries to `filename`."""
        try:
            cachefile = open(filename, "wb")
            pickle.dump(self.cache, cachefile)
            cachefile.close()
        except IOError, error:
            # 
            # Error is just logged because it is not critical.  Writing the
            # cache is not really necessary for the main purpose of the
            # program.
            # 
            log.info(error)
            log.info("Can't open cache %r" % filename)

    def register_type(self, name, extensions, metadata_reader=None):
        """Register a media file type, its name, and a function to
        read meta data from the file.
        
        :note: Re-registering extensions replaces the existing
            entries.  Acceptable behavior!?
        
        :param name: descriptive name of the media file type.
        :type name: str
        :param extensions: a single file name extension or a sequence
            of file name extensions of the media file type.
        :type extensions: str or sequence of str
        :param metadata_reader: function that reads the meta data from
            a given media file.
        :type metadata_reader: callable or `None`
        """
        if isinstance(extensions, basestring):
            extensions = [extensions]
        
        for extension in extensions:
            if not extension.startswith('.'):
                extension = '.' + extension
            self.types[extension.lower()] = (name, metadata_reader)


    def is_media_file(self, path):
        """Check file if it is a known media file.

        The check is based on the filename extension and is case
        insensitive.

        :param path: filename of the file to check.
        :type path: str

        :returns: `True` if known media file, `False` otherwise.
        :rtype: bool
        """
        file_extension = os.path.splitext(path)[1].lower()
        return file_extension in self.types


    def create_entry(self, path):
        """Reads metadata and returns PlaylistEntry objects.
        
        If path is not in the cache or the cache entry is outdated (the files
        mtime is not equal to the cache-entry mtime) the cache entry gets
        updated.
        
        :param path: path to the media file.
        :type path: str
        
        :return: dictionary with meta data.
        :rtype: dict of str -> str
        """
        # 
        # Check if path is cached.
        # 
        abspath = os.path.abspath(path)
        if abspath in self.cache:
            playlist_entry = self.cache[abspath]
            if os.stat(abspath).st_mtime == playlist_entry.time:
                log.debug("(cached)")
                # 
                # The `path` attribute has to be set because the cached value
                # may contain a relative or an absolute path.
                # 
                playlist_entry.path = path
                return playlist_entry
            log.debug("(outdated)")
        
        file_extension = os.path.splitext(path)[1].lower()
        metadata_reader = self.types[file_extension][1]
        playlist_entry = PlaylistEntry(path, metadata_reader(path))
        self.cache[abspath] = playlist_entry
        log.debug("(new)")
        return playlist_entry

#
# Create a PlaylistEntryFactory instance and populate it with the
# known file extensions.
#
factory = PlaylistEntryFactory()

TYPES = (
    ("mp3", "MPEG II Layer 3", read_mp3),
    ("ogg", "Ogg Vorbis", read_ogg),
    ("wav", "Wave", dummy_reader),
    ("flac", "Free Losless Audio Codec", dummy_reader),
    ("ac3", "AC3/A52", dummy_reader),
    ("mod", "Tracker Module", dummy_reader),
    ("s3m", "Scream Tracker 3", dummy_reader),
    ("it", "Impulse Tracker", dummy_reader))

for (_ext, _name, _metadata_reader) in TYPES:
    factory.register_type(_name, _ext, _metadata_reader)


#-----------------------------------------------------------------------------
# Playlist writers.

def write_m3u(playlist, outfile):
    """Writes the playlist in m3u format."""
    
    for entry in playlist:
        print >> outfile, entry


def write_extm3u(playlist, outfile):
    """Writes the playlist in extended m3u format."""
    
    print >> outfile, '#EXTM3U'
    for entry in playlist:
        if entry['ARTIST'] and entry['TITLE'] and entry['TIME']:
            print >> outfile, '#EXTINF:%s,%s - %s' % (entry['TIME'],
                                                      entry['ARTIST'],
                                                      entry['TITLE'])
        print >> outfile, entry


def write_pls(playlist, outfile):
    """Write the `playlist` in PLS format.
    
    :todo: Guess playlist name from `outfile.name`.
    :todo: Add command line option for playlist title.
    """
    print >> outfile, '[playlist]'
    print >> outfile, 'PlaylistName=Playlist'
    i = 0
    for i, entry in enumerate(playlist):
        i += 1
        print >> outfile, 'File%d=%s' % (i, entry)
        title = entry['TITLE'] or os.path.basename(str(entry))
        print >> outfile, 'Title%d=%s' % (i, title)
        print >> outfile, 'Length%d=%s' % (i, entry['TIME'])
    print >> outfile, 'NumberOfEntries=%d' % i
    print >> outfile, 'Version=2'


WRITERS = { 'm3u': write_m3u,
            'extm3u': write_extm3u,
            'pls': write_pls }

#-----------------------------------------------------------------------------


def search(path, absolute_paths=True):
    """Iterates over media files in `path` (recursivly).
    
    Symlinked directories are skipped to avoid endless
    scanning due to possible cycles in directory structure.
    
    :param path: root of the directory tree to search.
    :type path: str
    :param absolute_paths: converts relative path names to absolute ones
        if set to `True`.
    :type absolute_paths: bool
    
    :returns: iterator over `PlaylistEntry` objects.
    :rtype: iterable of `PlaylistEntry`
    """
    for (root, dummy, filenames) in os.walk(path):
        log.info("Scanning %s...", root)
        # TODO: Check if there are linked directories.
        for filename in filenames:
            if factory.is_media_file(filename):
                log.debug("found %s", filename)
                full_name = os.path.join(root, filename)
                if absolute_paths:
                    full_name = os.path.abspath(full_name)
                yield factory.create_entry(full_name)


def main():
    """Main function."""
    
    from optparse import OptionParser
    
    usage = ("usage: %prog [-h|--help|--version]\n"
             "       %prog [options] directory [directory ...]")
    
    parser = OptionParser(usage=usage, version="%prog " + __version__)
    parser.add_option("-o", "--output", type="string", dest="outfile",
                     default='-',
                     help="name of the output file or '-' for stdout (default)")
    parser.add_option("-f", "--output-format", type="choice",
                      dest="output_format", default="extm3u",
                      choices=WRITERS.keys(),
                      help="format of the output %r (default: %%default)" %
                            WRITERS.keys())
    parser.add_option("-r", "--relative-paths", action="store_true",
                      dest="relative_paths", default=False,
                      help="write relative paths. (default: absolute paths)")
    parser.add_option("--shuffle", action="store_true", default=False,
                      help="shuffle the playlist before saving it.")
    parser.add_option("-c", "--cache", action="store_true", default=False,
                      dest="use_cache",
                      help="use a cache file. (default: %default)")
    parser.add_option("--cache-file", type="string", dest="cache_file",
                      default='~/.mkplaylist.cache',
                      help="name of the cache file (default: %default)")
    parser.add_option("-v", "--verbose", action="store_true", default=False,
                      dest="verbose", help="be more verbose.")
    parser.add_option("-q", "--quiet", action="store_true", default=False,
                      dest="quiet", help="be really quiet.")
    
    (options, args) = parser.parse_args()
    
    if len(args) == 0:
        parser.error("wrong number of arguments")
    
    write_playlist = WRITERS[options.output_format]
    
    if options.quiet:
        log.setLevel(logging.WARNING)
    
    if options.verbose:
        log.setLevel(logging.DEBUG)
    
    #
    # Reading cache file.
    #
    if options.use_cache:
        options.cache_file = os.path.expanduser(options.cache_file)
        log.debug("Cachefile is " + options.cache_file)
        factory.read_cache(options.cache_file)
    
    #
    # Input.
    #
    media_files = list(chain(*[search(path, not options.relative_paths)
                               for path in args]))
    
    #
    # Processing.
    #
    if options.shuffle:
        random.shuffle(media_files)
    else:
        media_files.sort()

    #
    # Output.
    #
    outfile = sys.stdout
    if options.outfile != '-':
        outfile = file(options.outfile, "w")
    
    write_playlist(media_files, outfile)
    outfile.close()
    
    log.info("That's %d file(s).", len(media_files))
    
    # 
    # Writing cache file.
    # 
    if options.use_cache:
        factory.write_cache(options.cache_file)

if __name__ == '__main__':
    main()
