import re, string, htmlentitydefs, urllib, urlparse, locale

url_re = re.compile('^\w+://')
entity = re.compile(r'\&.\w*?\;')
html_tags = re.compile(r'\<.*?\>')

def is_url(text):
    return url_re.match(text)

def read_text(fragment, chars):
    """Read chars cdata characters from html fragment fragment"""
    count = 0
    in_tag = 0
    in_attr = 0
    in_entity = 0
    res_len = 0
    if fragment is None:
        return ""
    frag_len = len(fragment)
    i = -1
    res = []
    previous = None
    while res_len < chars and i < frag_len - 1:
        i += 1
        current = fragment[i].encode('utf-8')
        if in_tag:
            if in_attr:
                if current == '"':
                    in_attr = 0
            else:
                if current == '"':
                    in_attr = 1
                elif current == '>':
                    in_tag = 0
        elif in_entity:
            res.append(current)
            if current == ';':
                in_entity = 0
                previous = ""
                res_len += 1
        else:
            if current == "<":
                in_tag = 1
            elif current == "&":
                in_entity = 1
                res.append("&")
            # avoid beginning whitespace, newlines and returns, and multiple
            # whitespaces
            elif len(res) or current not in string.whitespace:
                if current == '\n' or current == '\r':
                    current = ' '
                    if current == previous == ' ':
                        continue
                res.append(current)
                previous = current
                res_len += 1
    return "".join(res)

def convert_entities(text):
    def conv(ents):
        entities = htmlentitydefs.entitydefs
        ents = ents.group(0)
        ent_code = entities.get(ents[1:-1], None)
        if ent_code is not None:
            return unicode(ent_code, 'iso-8859-1')
        else:
            if ents[1] == '#':
                code = ents[2:-1]
                base = 10
                if code[0] == 'x':
                    code = code[1:]
                    base = 16
                return unichr(int(code, base))
            else:
                return

    in_entity = entity.search(text)
    if in_entity is None:
        return text
    else:
        # convert all entities found in text then return the converted text
        ctext = in_entity.re.sub(conv, text)
        return ctext

def is_html(text):
    is_tag = html_tags.search(text)
    if is_tag: return 1
    else: return 0

def complete_url(url, feed_location):
    url = urllib.quote(url, safe=string.punctuation)
    if urlparse.urlparse(url)[0] == '':
        return urlparse.urljoin(feed_location, url)
    else:
        return url

def get_encoding():
    try:
        lang, encoding = locale.getdefaultlocale()
        if encoding is not None and encoding != '': return encoding
        else: return 'ISO8859-1'
    except ValueError:
        return 'ISO8859-1'
