# -*- coding: utf-8 -*-
# Copyright (C) 2010 Osama Khalid osamak[at]gnu.org
# Copyright (C) 2010  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


# Based on youogg's utilities.py file from SVN revision 50.


"""
Implementations of `getmediumurl.reader.URLReader` using different
backends.
"""


from urllib2 import build_opener, URLError, HTTPError
import socket

from getmediumurl.cache import cachedproperty
from getmediumurl.reader import URLReader, ReaderError, NotFoundError
from getmediumurl.readercache import dict_cache
from getmediumurl.utils import LOGGER


__all__ = ("UrllibURLReader", "DEFAULT_URLREADER")


socket.setdefaulttimeout(30)
#: HTTP ``User-Agent`` field value used by `_OPENER`.
_USER_AGENT = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.6) " \
    "Gecko/20100709 Trisquel/4.0 (taranis) Firefox/3.6.6"

#: ``urllib2`` opener used by `UrllibURLReader`.
_OPENER = build_opener()
_OPENER.addheaders = [("User-agent", _USER_AGENT)]


class UrllibURLReader(URLReader):

    """Find documents at URLs using ``urllib2``."""

    def __init__(self, url):
        """Read an `url`."""
        super(UrllibURLReader, self).__init__(url)
        self._document = None
        try:
            self._document = _OPENER.open(self._url)
        except HTTPError, ex:
            if ex.code == 404:
                raise NotFoundError(ex)
            else:
                raise ReaderError(ex)
        except URLError, ex:
            raise ReaderError(ex)
        self._target = self._document.url
        self._type = self._document.info().get("Content-Type", u"")

    def __del__(self):
        """Close."""
        if self._document is not None:
            self._document.close()

    @cachedproperty
    def content(self):
        """The document content as a string."""
        LOGGER.info("Reading %s", self._url)
        try:
            content = self._document.read()
        except URLError, ex:
            raise ReaderError(ex)
        self._document.close()
        self._document = None
        return content

    @property
    def content_type(self):
        """The document Content-Type header."""
        return self._type

    @property
    def url(self):
        """The document URL after redirections."""
        return self._target


#: A callable returning an appropriate `URLReader` instance.
DEFAULT_URLREADER = dict_cache(UrllibURLReader)
