# Written by John Hoffman
# Modified by Cameron Dale
# see LICENSE.txt for license information
#
# $Id: HTTPDownloader.py 400 2008-07-19 23:52:10Z camrdale-guest $

"""Manage downloading pieces over HTTP.

@type logger: C{logging.Logger}
@var logger: the logger to send all log messages to for this module
@type VERSION: C{string}
@var VERSION: the UserAgent identifier sent to all sites
@type haveall: L{haveComplete}
@var haveall: instance of the seed's bitfield

"""

from DebTorrent.CurrentRateMeasure import Measure
from random import randint
from urlparse import urlparse
from httplib import HTTPConnection, BadStatusLine
from urllib import quote
from threading import Thread
import logging
from DebTorrent.__init__ import product_name,version_short

logger = logging.getLogger('DebTorrent.BT1.HTTPDownloader')

VERSION = product_name+'/'+version_short

class haveComplete:
    """Dummy class similar to L{Debtorrent.bitfield.Bitfield}.
    
    This class represents the HTTP seed's bitfield, which is always complete
    and has every piece because it is a seed.
    
    """
    def complete(self):
        """Dummy function to always return true."""
        return True
    def __getitem__(self, x):
        """Dummy function to always return true."""
        return True
haveall = haveComplete()

class SingleDownload:
    """Control HTTP downloads from a single site.
    
    @type downloader: L{HTTPDownloader}
    @ivar downloader: the collection of all HTTP downloads
    @type baseurl: C{string}
    @ivar baseurl: the complete URL to append download info to
    @type netloc: C{string}
    @ivar netloc: the webserver address and port to connect to 
        (from the L{baseurl}
    @type connection: C{HTTPConnection}
    @ivar connection: the connection to the HTTP server
    @type seedurl: C{string}
    @ivar seedurl: the path component from the L{baseurl}
    @type params: C{string}
    @ivar params: the parameters component from the L{baseurl}
    @type query: C{string}
    @ivar query: the query component from the L{baseurl}
    @type headers: C{dictionary}
    @ivar headres: the HTTP headers to send in the request
    @type measure: L{DebTorrent.CurrentRateMeasure.Measure}
    @ivar measure: tracks the download rate from the site
    @type index: C{int}
    @ivar index: the piece index currently being downloaded
    @type url: C{string}
    @ivar url: the URL to request from the site
    @type requests: C{list} of requests
    @ivar requests: a list of the requests for a piece's ranges
    @type request_size: C{int}
    @ivar request_size: the total size of all requests
    @type endflag: C{boolean}
    @ivar endflag: whether the download might be in end-game mode
    @type error: C{string}
    @ivar error: the error received from the server
    @type retry_period: C{int}
    @ivar retry_period: the time to wait before making another request
    @type _retry_period: C{int}
    @ivar _retry_period: the server-specified time to wait before making
        another request
    @type errorcount: C{int}
    @ivar errorcount: the number of download errors that have occurred since
        the last successful download from the site
    @type goodseed: C{boolean}
    @ivar goodseed: whether there has been a successful download from the seed
    @type active: C{boolean}
    @ivar active: whether there is a download underway
    @type cancelled: C{boolean}
    @ivar cancelled: whether the download has been cancelled
    @type received_data: C{string}
    @ivar received_data: the data returned from the most recent request
    @type connection_status: C{int}
    @ivar connection_status: the status code returned by the server for the 
        most recent request
    
    """
    
    def __init__(self, downloader, url):
        """Initialize the instance.
        
        @type downloader: L{HTTPDownloader}
        @param downloader: the collection of all HTTP downloads
        @type url: C{string}
        @param url: the base URL to add download info to
        
        """
        
        self.downloader = downloader
        self.baseurl = url
        try:
            (scheme, self.netloc, path, params, query, fragment) = urlparse(url)
        except:
            logger.exception('cannot parse http seed address: '+url)
            self.downloader.errorfunc('cannot parse http seed address: '+url)
            return
        if scheme != 'http':
            logger.error('http seed url not http: '+url)
            self.downloader.errorfunc('http seed url not http: '+url)
            return
        try:
            self.connection = HTTPConnection(self.netloc)
        except:
            logger.exception('cannot connect to http seed: '+url)
            self.downloader.errorfunc('cannot connect to http seed: '+url)
            return
        self.seedurl = path
        if path[-1:] != '/':
            self.seedurl += '/'
        if params:
            self.params = ';'+params
        else:
            self.params = ''
        if query:
            self.query = '?'+query+'&'
        else:
            self.query = ''
        
        self.headers = {'User-Agent': VERSION}
        self.measure = Measure(downloader.max_rate_period)
        self.index = None
        self.url = ''
        self.requests = []
        self.request_size = 0
        self.endflag = False
        self.error = None
        self.retry_period = 2
        self._retry_period = None
        self.errorcount = 0
        self.goodseed = False
        self.active = False
        self.cancelled = False
        self.resched(randint(2,10))

    def resched(self, len = None):
        """(Re)Schedule a download from the HTTP seed.
        
        @type len: C{int}
        @param len: the amount of time to wait before doing the download (seconds)
        
        """
        
        if len is None:
            len = self.retry_period
        if self.errorcount > 3:
            len = len * (self.errorcount - 2)
        self.downloader.rawserver.add_task(self.download, len)

    def _want(self, index):
        """Determine whether the piece is needed.
        
        @type index: C{int}
        @param index: the piece index
        @rtype: C{boolean}
        @return: whether the piece is needed
        
        """

        # Only use HTTP to download pieces not found in peers
        #if self.downloader.picker.numhaves[index] > 0:
        #    return False

        if self.endflag:
            return self.downloader.storage.do_I_have_requests(index)
        else:
            return self.downloader.storage.is_unstarted(index)

    def download(self):
        """Start a request for a piece.
        
        Finds a new piece to download from the picker, creates the URL for the
        request, and then starts the request.
        
        """
        
        self.cancelled = False
        if self.downloader.picker.am_I_complete():
            self.downloader.downloads.remove(self)
            return
        self.index = self.downloader.picker.next(haveall, self._want)
        if ( self.index is None and not self.endflag
                     and not self.downloader.peerdownloader.has_downloaders() ):
            self.endflag = True
            self.index = self.downloader.picker.next(haveall, self._want)
        if self.index is None:
            self.endflag = True
            self.resched()
        else:
            logger.info('downloading piece '+str(self.index))
            (start, end, length, file) = self.downloader.storage.storage.get_file_range(self.index)
            filename = self.downloader.filenamefunc()
            if len(filename) > 0 and file.startswith(filename):
                file = file[1+len(filename):]
            self.url = ( self.seedurl + file + self.params + self.query )
            self._get_requests()
            if self.headers.has_key('Range'):
                del self.headers['Range']
            if self.request_size < length:
                self.headers['Range'] = 'bytes=' + self._request_ranges(start, end)
            rq = Thread(target = self._request, name = 'HTTPDownloader.SingleDownload._request')
            rq.setDaemon(False)
            rq.start()
            self.active = True

    def _request(self):
        """Do the request.
        
        Send the request to the server and wait for the response. Then 
        process the response and save the result.
        
        """
        import encodings.ascii
        import encodings.punycode
        import encodings.idna
        
        self.error = None
        self.received_data = None
        try:
            logger.debug('sending request: GET '+self.url+' '+str(self.headers))
            self.connection.request('GET',self.url, None, self.headers)
            
            # Check for closed persistent connection due to server timeout
            try:
                r = self.connection.getresponse()
            except BadStatusLine:
                # Reopen the connection to get a new socket
                self.connection.close()
                self.connection.connect()
                self.connection.request('GET',self.url, None, self.headers)
                r = self.connection.getresponse()
                
            logger.debug('got response: '+str(r.status)+', '+r.reason+', '+str(r.getheaders()))
            self.connection_status = r.status
            self.received_data = r.read()
        except Exception, e:
            logger.exception('error accessing http seed: '+str(e))
            self.error = 'error accessing http seed: '+str(e)
            try:
                self.connection.close()
            except:
                pass
            try:
                self.connection = HTTPConnection(self.netloc)
            except:
                self.connection = None  # will cause an exception and retry next cycle
        self.downloader.rawserver.add_task(self.request_finished)

    def request_finished(self):
        """Process the completed request and schedule another."""
        self.active = False
        if self.error is not None:
            if self.goodseed:
                logger.warning(self.error)
                self.downloader.errorfunc(self.error)
            self.errorcount += 1
        if self.received_data:
            self.errorcount = 0
            if not self._got_data():
                self.received_data = None
        if not self.received_data:
            self._release_requests()
            self.downloader.peerdownloader.piece_flunked(self.index)
        if self._retry_period:
            self.resched(self._retry_period)
            self._retry_period = None
            return
        self.resched()

    def _got_data(self):
        """Process the returned data from the request.
        
        Update the rate measures, pass the data to the storage, mark the piece
        as complete.
        
        @rtype: C{boolean}
        @return: whether the data was good
        
        """
        
        if self.connection_status not in [200, 206]:
            logger.warning('bad status from http seed: '+str(self.connection_status))
            self.errorcount += 1
            return False
        self._retry_period = 1
        if len(self.received_data) != self.request_size:
            if self.goodseed:
                logger.warning('corrupt data from http seed')
                self.downloader.errorfunc('corrupt data from http seed - redownloading')
            return False
        self.measure.update_rate(len(self.received_data))
        self.downloader.measurefunc(len(self.received_data))
        if self.cancelled:
            return False
        if not self._fulfill_requests():
            return False
        if not self.goodseed:
            self.goodseed = True
            self.downloader.seedsfound += 1
        if self.downloader.storage.do_I_have(self.index):
            self.downloader.picker.complete(self.index)
            self.downloader.peerdownloader.check_complete(self.index)
            self.downloader.gotpiecefunc(self.index)
        return True
    
    def _get_requests(self):
        """Get the requests for a piece."""
        self.requests = []
        self.request_size = 0L
        while self.downloader.storage.do_I_have_requests(self.index):
            r = self.downloader.storage.new_request(self.index)
            self.requests.append(r)
            self.request_size += r[1]
        self.requests.sort()

    def _fulfill_requests(self):
        """Pass the downloaded data to the storage.
        
        @rtype: C{boolean}
        @return: whether the piece was successfully received (hash checked)
        
        """
        
        start = 0L
        success = True
        while self.requests:
            begin, length = self.requests.pop(0)
            if not self.downloader.storage.piece_came_in(self.index, begin,
                            self.received_data[start:start+length]):
                logger.warning('piece failed hash check')
                success = False
                break
            start += length
        return success

    def _release_requests(self):
        """Release any pending requests for piece ranges."""
        for begin, length in self.requests:
            self.downloader.storage.request_lost(self.index, begin, length)
        self.requests = []

    def _request_ranges(self, offset, end):
        """Build a list of ranges to request from the site.

        @type offset: C{long}
        @param offset: the offset within the file that the piece starts at
        @type end: C{long}
        @param end: the offset within the file that the piece ends at
        @rtype: C{string}
        @return: the comma separated ranges to request
        
        """
        
        s = ''
        begin, length = self.requests[0]
        for begin1, length1 in self.requests[1:]:
            if begin + length == begin1:
                length += length1
                continue
            else:
                if s:
                    s += ','
                assert offset+begin+length <= end
                s += str(offset + begin)+'-'+str(offset+begin+length-1)
                begin, length = begin1, length1
        if s:
            s += ','
        assert offset+begin+length <= end
        s += str(offset+begin)+'-'+str(offset+begin+length-1)
        return s
        
    
class HTTPDownloader:
    """Collection of all the HTTP downloads.
    
    @type storage: L{StorageWrapper.StorageWrapper}
    @ivar storage: the piece storage instance
    @type picker: L{PiecePicker.PiecePicker}
    @ivar picker: the piece choosing instance
    @type rawserver: L{Debtorrent.RawServer.RawServer}
    @ivar rawserver: the server
    @type finflag: C{threading.Event}
    @ivar finflag: the flag indicating when the download is complete
    @type errorfunc: C{method}
    @ivar errorfunc: the method to call when an error occurs
    @type peerdownloader: L{Downloader.Downloader}
    @ivar peerdownloader: the instance of the collection of normal downloaders
    @type infohash: C{string}
    @ivar infohash: the info hash
    @type max_rate_period: C{float}
    @ivar max_rate_period: maximum amount of time to guess the current 
            rate estimate represents
    @type gotpiecefunc: C{method}
    @ivar gotpiecefunc: the method to call when a piece comes in
    @type measurefunc: C{method}
    @ivar measurefunc: the method to call to add downloaded data to the total
        download rate measurement
    @type filenamefunc: C{method}
    @ivar filenamefunc: the method to call to determine the file name that
        the download is being saved under
    @type downloads: C{list} of L{SingleDownload}
    @ivar downloads: the list of all current download connections to sites
    @type seedsfound: C{int}
    @ivar seedsfound: the number of seeds successfully downloaded from
    
    """
    
    def __init__(self, storage, picker, rawserver,
                 finflag, errorfunc, peerdownloader,
                 max_rate_period, infohash, measurefunc, gotpiecefunc,
                 filenamefunc):
        """Initialize the instance.
        
        @type storage: L{StorageWrapper.StorageWrapper}
        @param storage: the piece storage instance
        @type picker: L{PiecePicker.PiecePicker}
        @param picker: the piece choosing instance
        @type rawserver: L{Debtorrent.RawServer.RawServer}
        @param rawserver: the server
        @type finflag: C{threading.Event}
        @param finflag: the flag indicating when the download is complete
        @type errorfunc: C{method}
        @param errorfunc: the method to call when an error occurs
        @type peerdownloader: L{Downloader.Downloader}
        @param peerdownloader: the instance of the collection of normal downloaders
        @type max_rate_period: C{float}
        @param max_rate_period: maximum amount of time to guess the current 
            rate estimate represents
        @type infohash: C{string}
        @param infohash: the info hash
        @type measurefunc: C{method}
        @param measurefunc: the method to call to add downloaded data to the total
            download rate measurement
        @type gotpiecefunc: C{method}
        @param gotpiecefunc: the method to call when a piece comes in
        @type filenamefunc: C{method}
        @param filenamefunc: the method to call to determine the save location
        
        """
        
        self.storage = storage
        self.picker = picker
        self.rawserver = rawserver
        self.finflag = finflag
        self.errorfunc = errorfunc
        self.peerdownloader = peerdownloader
        self.infohash = infohash
        self.max_rate_period = max_rate_period
        self.gotpiecefunc = gotpiecefunc
        self.measurefunc = measurefunc
        self.downloads = []
        self.seedsfound = 0
        self.filenamefunc = filenamefunc

    def make_download(self, url):
        """Create a new download from a site.
        
        @type url: C{string}
        @param url: the base URL to use for downloading from that site
        @rtype: L{SingleDownload}
        @return: the SingleDownload instance created
        
        """
        
        logger.info('Starting a deb_mirror downloader for: '+url)
        self.downloads.append(SingleDownload(self, url))
        return self.downloads[-1]

    def get_downloads(self):
        """Get the list of all current downloads.
        
        @rtype: C{list} of L{SingleDownload}
        @return: all current downloads from sites
        
        """
        
        if self.finflag.isSet():
            return []
        return self.downloads

    def cancel_piece_download(self, pieces):
        """Cancel any active downloads for the pieces.
        
        @type pieces: C{list} of C{int}
        @param pieces: the list of pieces to cancel downloads of
        
        """
        
        logger.info('Cancelling all HTTP downloads for pieces: '+str(pieces))
        for d in self.downloads:
            if d.active and d.index in pieces:
                d.cancelled = True
