import re, string, htmlentitydefs, urllib, urlparse, locale, time, calendar
import error

url_re = re.compile('^\w+://')
entity = re.compile(r'\&.\w*?\;')
html_tags = re.compile(r'\<.*?\>')

def is_url(text):
    return url_re.match(text)

def read_text(fragment, chars):
    """Read chars cdata characters from html fragment fragment"""
    count = 0
    in_tag = 0
    in_attr = 0
    in_entity = 0
    res_len = 0
    if fragment is None:
        return ""
    frag_len = len(fragment)
    i = -1
    res = []
    previous = None
    while res_len < chars and i < frag_len - 1:
        i += 1
        current = fragment[i]
        if in_tag:
            if in_attr:
                if current == '"':
                    in_attr = 0
            else:
                if current == '"':
                    in_attr = 1
                elif current == '>':
                    in_tag = 0
        elif in_entity:
            res.append(current)
            if current == ';':
                in_entity = 0
                previous = ""
                res_len += 1
        else:
            if current == "<":
                in_tag = 1
            elif current == "&":
                in_entity = 1
                res.append("&")
            # avoid beginning whitespace, newlines and returns, and multiple
            # whitespaces
            elif len(res) or current not in string.whitespace:
                if current == '\n' or current == '\r':
                    current = ' '
                    if current == previous == ' ':
                        continue
                res.append(current)
                previous = current
                res_len += 1
    return "".join(res)

def convert_entities(text):
    def conv(ents):
        entities = htmlentitydefs.entitydefs
        ents = ents.group(0)
        ent_code = entities.get(ents[1:-1], None)
        if ent_code is not None:
            ents = unicode(ent_code, get_locale(enc=True))

            # check if it still needs conversion
            if (entity.search(ents) is None):
                return ents

        if ents[1] == '#':
            code = ents[2:-1]
            base = 10
            if code[0] == 'x':
                code = code[1:]
                base = 16
            return unichr(int(code, base))
        else:
            return

    in_entity = entity.search(text)
    if in_entity is None:
        return text
    else:
        ctext = in_entity.re.sub(conv, text)
        return ctext

def is_html(text):
    is_tag = html_tags.search(text)
    if is_tag: return 1
    else: return 0

def complete_url(url, feed_location):
    url = urllib.quote(url, safe=string.punctuation)
    if urlparse.urlparse(url)[0] == '':
        return urlparse.urljoin(feed_location, url)
    else:
        return url

def get_url_location(url):
    url = urllib.quote(url, safe=string.punctuation)
    parsed_url = urlparse.urlsplit(url)
    return urlparse.urlunsplit((parsed_url[0], parsed_url[1], '','',''))

def get_locale(lang=False, enc=False):
    default = ('en_US', 'ISO8859-1')

    try:
        language, encoding = locale.getlocale(locale.LC_MESSAGES)
    except (ValueError, Exception):
        return default

    if language is not None:
        # only setlocale uses this atm. extend when needed.
        language = locale.normalize(language)


    if lang:
        return language
    elif enc:
        if (encoding is not None) and (encoding != ''):
            return encoding
        else:
            return 'ISO8859-1'
    else:
        return (language, encoding)


weekday = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

def format_date(date, format, encoding):
    try:
        timestr = time.strftime(format, time.localtime(calendar.timegm(date)))
    except Exception, ex:
        #time is time.struct_time
        timestr = "%s, %02d %s %02d:%02d" % (weekday[date.tm_wday], date.tm_mday,
                                             months[date.tm_mon - 1],
                                             date.tm_hour, date.tm_min)

    return unicode(timestr, encoding)

MAX_DISPLAY_CHAR = 60
def convert_title(title, description = ''):
    if title is None or title == '':
        try:
            if len(description):
                title = read_text(description, MAX_DISPLAY_CHAR) + '...'
        except TypeError:
            error.log("description is unsized")
            # no title, no description, leave it.
            return
    if is_html(title):
        title = read_text(title, MAX_DISPLAY_CHAR)
    return convert_entities(title)
