# -*- coding: utf-8 -*-

#    This file is part of Gnomolicious.
#
#    Gnomolicious is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    Gnomolicious is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with Gnomolicious; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#    (C) 2005 Nicolas Évrard <nicoe@nutellux.be>

__revision__ = "$Id: delicious.py,v 1.4 2006/01/02 23:15:27 nicoe Exp $"

import re
import urllib
import urllib2
import StringIO
import datetime
import logging
import md5
import lxml.etree

from sets import Set as set

logger = logging.getLogger('gnomolicious')
API_URL = 'http://del.icio.us/api'
NAMESPACES = {'rdf' : 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
              'rss' : 'http://purl.org/rss/1.0/',
              'dc' : 'http://purl.org/dc/elements/1.1/' }
TIME_RE = re.compile('''([0-9]{4,4})-([0-9]{2,2})-([0-9]{2,2})
                        T
                        ([0-9]{2,2}):([0-9]{2,2}):([0-9]{2,2})''', re.VERBOSE)

class Singleton(object):

    _classes = {}
    def __new__(cls):
        if cls in cls._classes:
            logger.debug('Returning the old class')
            return cls._classes[cls]
        obj = object.__new__(cls)
        cls._classes[cls] = obj
        return obj

class RequestHandler(Singleton):
    
    def __init__(self):
        self.auth_handler = urllib2.HTTPBasicAuthHandler()
        self.opener = urllib2.build_opener(self.auth_handler)

    def addAuthentication(self, username, password):
        self.auth_handler.add_password('del.icio.us API', 'http://del.icio.us',
                                       username, password)

def get_url(url, auth_data={}):
    """
    >>> isinstance(get_url('http://www.google.com'), StringIO.StringIO)
    True
    >>> isinstance(get_url('http://del.icio.us/api/update', {
    ...    'user' : 'nicoe', 'pass' : 'tototo'}), StringIO.StringIO)
    True
    """
    req = RequestHandler()
    if auth_data:
        req.addAuthentication(auth_data['user'], auth_data['pass'])
    return StringIO.StringIO(req.opener.open(url).read())

class Post(object):
    def __init__(self, user, xml):
        self.user = user
        self.href = WebPage(xml.xpath('rss:link', NAMESPACES)[0].text)
        self.title = xml.xpath('rss:title', NAMESPACES)[0].text
        self.time = xml.xpath('dc:date', NAMESPACES)[0].text
        tags = xml.xpath('dc:subject', NAMESPACES)[0].text.split()
        self.tags = set([Tag(tag) for tag in tags])

    def __hash__(self):
        return hash(self.href)

    def __str__(self):
        x = '%s\n' % self.href
        return x + '\n  '.join(['  %s: %s' % (x, getattr(self, x)) for x in
                                ('tags', 'time', 'user')])

class User(object):

    def __init__(self, uname, password=''):
        self.username = uname
        self.password = password
        self.__last_update = datetime.datetime.min
        self.__tags = []

    def _get_url(self):
        return 'http://del.icio.us/%s' % self.username
    url = property(_get_url)

    def _get_rss_url(self):
        return 'http://del.icio.us/rss/%s' % self.username
    rss_url = property(_get_rss_url)

    def __hash__(self):
        return hash(self.username)

    def _get_posts(self):
        doctree = lxml.etree.parse(get_url(self.rss_url))
        latest_posts = doctree.xpath('/rdf:RDF/rss:item', NAMESPACES)
        for post in latest_posts:
            yield Post(self, post)
    posts = property(_get_posts)

    def _fetch_tags(self):
        self.__last_update = datetime.datetime.now()
        self.__tags = []
        auth = {'user' : self.username, 'pass' : self.password}
        doctree = lxml.etree.parse(get_url('%s/tags/get' % API_URL, auth))
        for tag in doctree.xpath('/tags/*'):
            self.__tags.append(tag.attrib)

    def _get_tags(self):
        """
        >>> nicoe = User('nicoe', 'tuxedo')
        >>> isinstance(nicoe.tags, list) and bool(nicoe.tags)
        True
        """
        auth = {'user' : self.username, 'pass' : self.password}
        update = lxml.etree.parse(get_url('%s/posts/update' % API_URL, auth))
        date_str = update.xpath('/update[1]')[0].attrib['time']
        update = datetime.datetime(*map(int, TIME_RE.match(date_str).groups()))
        if update > self.__last_update:
            self._fetch_tags()
        return self.__tags
    tags = property(_get_tags)

    def do_post(self, url, desc, ext, tags):
        auth = {'user' : self.username, 'pass' : self.password}
        data = {'url' : url,
                'description' : desc,
                'extended' : ext,
                'tags' : tags}
        posturl = '%s/posts/add?' % API_URL + urllib.urlencode(data)
        print get_url(posturl, auth)

class Tag(object):

    def __init__(self, tagname):
        self.name = name.lower()

    def __hash__(self):
        return hash(self.name)

    def _get_url(self):
        return 'http://del.icio.us/tag/%s' % self.name
    url = property(_get_url)

    def _get_rss_url(self):
        return 'http://del.icio.us/rss/tag/%s' % self.name
    url = property(_get_rss_url)

script_re = re.compile('<script[^>]*>.*?</script>', re.DOTALL)
form_re = re.compile('<form[^>]*>.*?</form>', re.DOTALL)
wrong_chars = ''.join([chr(x) for x in range(0, 32) + range(127, 160)])
wrong_chars_re = re.compile('[%s]' % wrong_chars)

def remove_x(matcher):
    def method(html):
        """
        >>> e = 'a<script>1</script>b<script type="text/javascript">6</script>c'
        >>> remove_scripts(e)
        'abc'
        
        >>> e = 'a<form>1</form>b<form action="toto">6</form>c'
        >>> remove_forms(e)
        'abc'

        >>> e = 'abcdeé'
        >>> remove_strangechars(e)
        'abcde\\xc3\\xa9'
        """
        return matcher.sub('', html)
    return method

remove_forms = remove_x(form_re)
remove_scripts = remove_x(script_re)
remove_strangechars = remove_x(wrong_chars_re)

class WebPage(object):

    def __init__(self, url):
        self.url = url
        self.__last_fetch = datetime.datetime.min
        self.__tags = {}

    def _get_html_tree(self):
        """
        >>> linuxfr = WebPage('http://linuxfr.org/my/')
        >>> linuxfr._get_html_tree() is not None
        True
        """
        url_hash = md5.new(self.url).hexdigest()
        html_file = get_url('http://del.icio.us/url/%s' % url_hash).read()
        html_file = remove_scripts(html_file)
        html_file = remove_forms(html_file)
        html_file = remove_strangechars(html_file)
        return lxml.etree.parse(StringIO.StringIO(html_file))

    def _fetch_tags(self):
        self.__last_fetch = datetime.datetime.now()
        doctree = self._get_html_tree()
        if doctree:
            a_tag = doctree.xpath("//a[@class='tag']")
            self.__tags = {}
            for a in a_tag:
                self.__tags[a.text] = self.__tags.get(a.text, 0) + 1

    def _regexp_fetch_tags(self):
        url_hash = md5.new(self.url).hexdigest()
        html = get_url('http://del.icio.us/url/%s' % url_hash).read()
        self.__tags = {}
        regexp = re.compile('<a class="tag"( .*?=.*?)*?>([^<]*?)</a>')
        for match in regexp.finditer(html):
            tag = match.groups()[1]
            self.__tags[tag] = self.__tags.get(tag, 0) + 1

    
    def _get_tags(self):
        """
        >>> google = WebPage('http://www.google.com/')
        >>> google_tags = google.tags
        >>> len(google_tags) > 0
        True
        """
        diff_date = datetime.datetime.now() - self.__last_fetch
        if (diff_date.seconds + diff_date.days * (24*60*60)) >= 600:
            #self._fetch_tags()
            self._regexp_fetch_tags()
        return self.__tags
    tags = property(_get_tags)

def _test():
    import doctest
    doctest.testmod()

if __name__ == '__main__':
    _test()
