""" Date/Time string parsing module.

    Note about the Y2K problems:

       The parser can only handle years with at least 2 digits. 2
       digit year values get expanded by adding the century using
       DateTime.add_century(), while 3 digit year get converted
       literally. To have 2 digit years also be interpreted literally,
       add leading zeros, e.g. year 99 must be written as 099 or 0099.

    Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    Copyright (c) 2000-2001, eGenix.com Software GmbH; mailto:info@egenix.com
    See the documentation for further information on copyrights,
    or contact the author. All Rights Reserved.

"""
import types,re,string
import DateTime,ISO,ARPA,Timezone

# Enable to produce debugging output
__debug__ = 0

# REs for matching date and time parts in a string; These REs
# parse a superset of ARPA, ISO, American and European style dates.
# Timezones are supported via the Timezone submodule.

_year = '(?P<year>-?\d+\d(?!:))'
_year_epoch = '(?:' + _year + '(?P<epoch> *[ABCDE\.]+)?)'
_relyear = '(?:\((?P<relyear>[-+]?\d+)\))'

_month = '(?P<month>\d?\d(?!:))'
_fullmonth = '(?P<month>\d\d(?!:))'
_litmonth = ('(?P<litmonth>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)'
             '[a-z,\.;]*')
litmonthtable = ARPA.litmonthtable
_relmonth = '(?:\((?P<relmonth>[-+]?\d+)\))'

_day = '(?P<day>\d?\d(?!:))'
_usday = '(?P<day>\d?\d(?!:))(?:st|nd|rd|th|[,\.;])?'
_fullday = '(?P<day>\d\d(?!:))'
_litday = '(?P<litday>Mon|Tue|Wed|Thu|Fri|Sat|Sun)[a-z]*'
litdaytable = ARPA.litdaytable
_relday = '(?:\((?P<relday>[-+]?\d+)\))'

_hour = '(?P<hour>[012]?\d)'
_minute = '(?P<minute>[0-6]\d)'
_second = '(?P<second>[0-6]\d(?:\.\d+)?)'

_days = '(?P<days>\d*\d(?:\.\d+)?)'
_hours = '(?P<hours>\d*\d(?:\.\d+)?)'
_minutes = '(?P<minutes>\d*\d(?:\.\d+)?)'
_seconds = '(?P<seconds>\d*\d(?:\.\d+)?)'

_reldays = '(?:\((?P<reldays>[-+]?\d+(?:\.\d+)?)\))'
_relhours = '(?:\((?P<relhours>[-+]?\d+(?:\.\d+)?)\))'
_relminutes = '(?:\((?P<relminutes>[-+]?\d+(?:\.\d+)?)\))'
_relseconds = '(?:\((?P<relseconds>[-+]?\d+(?:\.\d+)?)\))'

_sign = '(?:(?P<sign>[-+]) *)'
_week = 'W(?P<week>\d?\d)'
_zone = Timezone.zone

_time = _hour + ':' + _minute + '(?::' + _second + ')? *' + _zone + '?'
_isotime = _hour + ':?' + _minute + '?:?' + _second + '? *' + _zone + '?'

_weekdate = _year + '-?(?:' + _week + '-?' + _day + '?)?'
_isodate = _year + '-?' + _fullmonth + '-?' + _fullday + '(?!:)'
_eurodate = _day + '\.' + _month + '\.' + _year_epoch
_usdate = _month + '/' + _day + '/' + _year_epoch
_litdate = ('(?:'+ _litday + ',? )? *' + 
            _usday + ' *' + 
            '[- ] *(?:' + _litmonth + '|'+ _month +') *[- ]' +
            _year_epoch + '?')
_altlitdate = ('(?:'+ _litday + ',? )? *' + 
               _litmonth + '[ ,.a-z]+' + 
               _usday + '[ a-z]+' +
               _year_epoch + '?')

_relany = '[*%?a-zA-Z]+'

_relisodate = ('(?:(?:' + _relany + '|' + _year + '|' + _relyear + ')-' +
               '(?:' + _relany + '|' + _month + '|' + _relmonth + ')-' +
               '(?:' + _relany + '|' + _day + '|' + _relday + '))')

_asctime = ('(?:'+ _litday + ',? )? *' + 
                _usday + ' *' + 
                '[- ] *(?:' + _litmonth + '|'+ _month +') *[- ]' +
                '(?:[0-9: ]+)' + 
                _year_epoch + '?')

_relisotime = ('(?:(?:' + _relany + '|' + _hour + '|' + _relhours + '):' +
               '(?:' + _relany + '|' + _minute + '|' + _relminutes + ')' +
               '(?::(?:' + _relany + '|' + _second + '|' + _relseconds + '))?)')

_isodelta = (_sign + '?(?:' + 
             _days + ':' + _hours + ':' + _minutes + ':' + _seconds + 
             '|' +
             _hours + ':' + _minutes + ':' + _seconds + 
             '|' +
             _hours + ':' + _minutes + ')')
_litdelta = (_sign + '?' +
             '(?:' + _days + ' *d[a-z]*[,; ]*)?' + 
             '(?:' + _hours + ' *h[a-z]*[,; ]*)?' + 
             '(?:' + _minutes + ' *m[a-z]*[,; ]*)?' +
             '(?:' + _seconds + ' *s[a-z]*[,; ]*)?')

_timeRE = re.compile(_time,re.I)
_isotimeRE = re.compile(_isotime,re.I)
_isodateRE = re.compile(_isodate,re.I)
_eurodateRE = re.compile(_eurodate,re.I)
_usdateRE = re.compile(_usdate,re.I)
_litdateRE = re.compile(_litdate,re.I)
_altlitdateRE = re.compile(_altlitdate,re.I)
_relisodateRE = re.compile(_relisodate,re.I)
_asctimeRE = re.compile(_asctime,re.I)
_isodeltaRE = re.compile(_isodelta)
_litdeltaRE = re.compile(_litdelta)
_relisotimeRE = re.compile(_relisotime,re.I)

def _parse_date(text,formats=('euro','us','iso','lit','altlit',
                              'unknown'),

                atoi=string.atoi,atof=string.atof,
                add_century=DateTime.add_century,
                now=DateTime.now):

    """ Parses the date part given in text and returns a tuple
        (text,day,month,year,style) with the following
        meanings:

        * text gives the original text without the date part
        * day,month,year give the parsed date (defaults to today)
        * style gives information about which parser was successful:
          'euro' - the European date parser
          'us' - the US date parser
          'lit' - the US literal date parser
          'altlit' - the alternative US literal date parser
          'unknown' - no date part was found, defaults were used

        formats may be set to a tuple specifying the parsers to use:
          'euro' - the European date parser
          'us' - the US date parser
          'lit' - the US literal date parser
          'altlit' - the alternative US literal date parser
          'unknown' - default to todays date

        If 'unknown' is not given in formats and the date cannot be
        parsed, a ValueError is raised.

    """
    match = None
    
    if 'euro' in formats:
        # European style date
        match = _eurodateRE.search(text)
        if match is not None:
            day,month,year,epoch = match.groups()
            if len(year) == 2:
                # Y2K problem:
                year = add_century(atoi(year))
            else:
                year = atoi(year)
            if epoch and 'B' in epoch:
                year = -year + 1
            month = atoi(month)
            day = atoi(day)
            style = 'euro'

    if match is None and \
       'us' in formats:
        # US style date
        match = _usdateRE.search(text)
        if match is not None:
            month,day,year,epoch = match.groups()
            if len(year) == 2:
                # Y2K problem:
                year = add_century(atoi(year))
            else:
                year = atoi(year)
            if epoch and 'B' in epoch:
                year = -year + 1
            month = atoi(month)
            day = atoi(day)
            style = 'us'

    if match is None and \
       'iso' in formats:
        # ISO style date
        match = _isodateRE.search(text)
        if match is not None:
            year,month,day = match.groups()
            if len(year) == 2:
                # Y2K problem:
                year = add_century(atoi(year))
            else:
                year = atoi(year)
            # default to January, 1
            if not month:
                month = 1
            else:
                month = atoi(month)
            if not day:
                day = 1
            else:
                day = atoi(day)
            style = 'iso'

    if match is None and \
       ('lit' in formats or
        'altlit' in formats):
        if 'lit' in formats:
            # US style literal date
            match = _litdateRE.search(text)
            if match is not None:
                litday,day,litmonth,month,year,epoch = match.groups()
                style = 'lit'

        if match is None and \
           'altlit' in formats:
            # Alternative US style literal date
            match = _altlitdateRE.search(text)
            if match is not None: 
                litday,litmonth,day,year,epoch = match.groups()
                month = '<missing>'
                style = 'altlit'

        if match is not None:
            if __debug__: print match.groups()
            if not year:
                year = now().year
            else:
                if len(year) == 2:
                    # Y2K problem:
                    year = add_century(atoi(year))
                else:
                    year = atoi(year)
            if epoch and 'B' in epoch:
                year = -year + 1
            if litmonth:
                try:
                    month = litmonthtable[litmonth]
                except KeyError:
                    raise ValueError,\
                          'wrong month name: "%s"' % litmonth
            else:
                month = atoi(month)
            day = atoi(day)

    if match is None and \
       'unknown' in formats:
        # No date part: default to today
        date = now()
        year = date.year
        month = date.month
        day = date.day
        style = 'unknown'

    if match is not None:
        # Remove date from text
        left,right = match.span()
        if __debug__:
            print 'parsed date:',repr(text[left:right]),\
                  'giving:',year,month,day
        text = text[:left] + text[right:]
        
    elif 'unknown' not in formats:
        # If no default handling should be applied, raise an error
        raise ValueError, 'unknown date format: "%s"' % text

    return text,day,month,year,style

def _parse_time(text,formats=('iso','unknown'),

                atoi=string.atoi,atof=string.atof):

    """ Parses a time part given in text and returns a tuple
        (text,hour,minute,second,offset,style) with the following
        meanings:

        * text gives the original text without the time part
        * hour,minute,second give the parsed time
        * offset gives the time zone UTC offset
        * style gives information about which parser was successful:
          'standard' - the standard parser
          'iso' - the ISO time format parser
          'unknown' - no time part was found

        formats may be set to a tuple specifying the parsers to use:
          'standard' - standard time format with ':' delimiter
          'iso' - ISO time format (superset of 'standard')
          'unknown' - default to 0:00:00, 0 zone offset

        If 'unknown' is not given in formats and the time cannot be
        parsed, a ValueError is raised.

    """
    # Time: default to midnight, localtime
    hour,minute,second,offset = 0,0,0.0,0
    match = None

    # Standard format
    if 'standard' in formats:
        match = _timeRE.search(text)
        if match is not None:
            style = 'standard'

    # ISO format
    if match is None and \
       'iso' in formats:
        match =  _isotimeRE.search(text)
        if match is not None:
            style = 'iso'

    if match is not None:
        hour,minute,second,zone = match.groups()
        if zone:
            # Convert to UTC offset
            offset = Timezone.utc_offset(zone)
        hour = atoi(hour)
        if minute:
            minute = atoi(minute)
        else:
            minute = 0
        if not second:
            second = 0.0
        else:
            second = atof(second)

        # Remove time from text
        left,right = match.span()
        if __debug__: 
            print 'parsed time:',repr(text[left:right]),\
                  'giving:',hour,minute,second,offset
        text = text[:left] + text[right:]

    elif 'unknown' not in formats:
        # If no default handling should be applied, raise an error
        raise ValueError, 'unknown time format: "%s"' % text

    else:
        style = 'unknown'

    return text,hour,minute,second,offset,style

###

def DateTimeFromString(text,

                       DateTime=DateTime):

    """ DateTimeFromString(text)
    
        Returns a DateTime instance reflecting the date and time given
        in text. In case a timezone is given, the returned instance
        will point to the corresponding UTC time value. Otherwise, the
        value is set as given in the string.

        Inserts default values for missing parts. Default is today for
        the date part and 0:00:00 for the time part.

    """
    origtext = text

    # First try standard formats
    text,hour,minute,second,offset,timestyle = _parse_time(
        text, ('standard','unknown'))
    text,day,month,year,datestyle = _parse_date(text)

    # ISO uses implicit ordering
    if timestyle == 'unknown' and \
       datestyle in ('iso', 'unknown'):
        text,day,month,year,datestyle = _parse_date(
            origtext, ('iso','unknown'))
        text,hour,minute,second,offset,timestyle = _parse_time(
            text, ('iso','unknown'))

    try:
        return DateTime.DateTime(year,month,day,hour,minute,second) - offset
    except DateTime.RangeError, why:
        raise DateTime.RangeError,\
              'Failed to parse "%s": %s' % (origtext, why)

def DateFromString(text,

                   DateTime=DateTime):

    """ DateFromString(text)
    
        Returns a DateTime instance reflecting the date given in
        text. A possibly included time part is ignored.

        Inserts default values for missing parts. Default is today for
        the date part and 0:00:00 for the time part.

    """
    _text,day,month,year,datestyle = _parse_date(text)
    try:
        return DateTime.DateTime(year,month,day)
    except DateTime.RangeError, why:
        raise DateTime.RangeError,\
              'Failed to parse "%s": %s' % (text, why)

def DateTimeDeltaFromString(text,

                            atoi=string.atoi,atof=string.atof,
                            DateTime=DateTime):

    """ DateTimeDeltaFromString(text)
    
        Returns a DateTimeDelta instance reflecting the delta given in
        text. Defaults to 0:00:00:00.00 for parts that are not
        included in the textual representation or cannot be parsed.

    """
    match = _isodeltaRE.search(text)
    if match is not None:
        groups = match.groups()
        if __debug__: print groups
        sign = groups[0]
        days = groups[1]
        if days:
            days = atof(days)
        else:
            days = 0.0
        hours = atof(groups[2] or groups[5] or groups[8])
        minutes = atof(groups[3] or groups[6] or groups[9])
        seconds = groups[4] or groups[7]
        if seconds:
            seconds = atof(seconds)
        else:
            seconds = 0.0
        if sign != '-':
            sign = 1
        else:
            sign = -1

    else:
        match = _litdeltaRE.search(text)
        if match is not None:
            sign,days,hours,minutes,seconds = match.groups()
            if days:
                days = atof(days)
            else:
                days = 0.0
            if hours:
                hours = atof(hours)
            else:
                hours = 0.0
            if minutes:
                minutes = atof(minutes)
            else:
                minutes = 0.0
            if seconds:
                seconds = atof(seconds)
            else:
                seconds = 0.0
            if sign != '-':
                sign = 1
            else:
                sign = -1

        else:
            # Not matched:
            return DateTime.DateTimeDelta(0.0)

    try:
        dtd = DateTime.DateTimeDelta(days,hours,minutes,seconds)
    except DateTime.RangeError, why:
        raise DateTime.RangeError,\
              'Failed to parse "%s": %s' % (text, why)
    else:
        if sign < 0:
            return -dtd
        else:
            return dtd

# Aliases
TimeFromString = DateTimeDeltaFromString
TimeDeltaFromString = DateTimeDeltaFromString

###

def _parse_reldate(text,

                   atoi=string.atoi,atof=string.atof):

    match = _relisodateRE.search(text)
    if match is not None:
        groups = match.groups()
        if __debug__: print groups
        year,years,month,months,day,days = groups
        if year:
            year = atoi(year)
        if years:
            years = atof(years)
        else:
            years = 0
        if month:
            month = atoi(month)
        if months:
            months = atof(months)
        else:
            months = 0
        if day:
            day = atoi(day)
        if days:
            days = atof(days)
        else:
            days = 0
        return year,years,month,months,day,days
    else:
        return None,0,None,0,None,0

def _parse_reltime(text,

                   atoi=string.atoi,atof=string.atof):

    match = _relisotimeRE.search(text)
    if match is not None:
        groups = match.groups()
        if __debug__: print groups
        hour,hours,minute,minutes,second,seconds = groups
        if hour:
            hour = atoi(hour)
        if hours:
            hours = atof(hours)
        else:
            hours = 0
        if minute:
            minute = atoi(minute)
        if minutes:
            minutes = atof(minutes)
        else:
            minutes = 0
        if second:
            second = atoi(second)
        if seconds:
            seconds = atof(seconds)
        else:
            seconds = 0
        return hour,hours,minute,minutes,second,seconds
    else:
        return None,0,None,0,None,0

def RelativeDateTimeFromString(text,

                               RelativeDateTime=DateTime.RelativeDateTime):

    """ RelativeDateTimeFromString(text)
    
        Returns a RelativeDateTime instance reflecting the relative
        date and time given in text.

        Defaults to wildcards for parts or values which are not
        included in the textual representation or cannot be parsed.

        The format used in text must adhere to the following syntax:

        		[YYYY-MM-DD] [HH:MM[:SS]]

        with the usual meanings. Values which should not be altered
        may be replaced with '*', '%', '?' or any combination of
        letters, e.g. 'YYYY'. Relative settings must be enclosed in
        parenthesis if given and should include a sign, e.g. '(+0001)'
        for the year part. All other settings are interpreted as
        absolute values.

        Date and time parts are both optional as a whole. Seconds in
        the time part are optional too. Everything else (including the
        hyphens and colons) is mandatory.

    """
    year,years,month,months,day,days = _parse_reldate(text)
    hour,hours,minute,minutes,second,seconds = _parse_reltime(text)
    return RelativeDateTime(year=year,years=years,
                            month=month,months=months,
                            day=day,days=days,
                            hour=hour,hours=hours,
                            minute=minute,minutes=minutes,
                            second=second,seconds=seconds)

def RelativeDateFromString(text,

                           RelativeDateTime=DateTime.RelativeDateTime):

    """ RelativeDateFromString(text)
    
        Same as RelativeDateTimeFromString(text) except that only the
        date part of text is taken into account.

    """
    year,years,month,months,day,days = _parse_reldate(text)
    return RelativeDateTime(year=year,years=years,
                            month=month,months=months,
                            day=day,days=days)

def RelativeTimeFromString(text,

                           RelativeDateTime=DateTime.RelativeDateTime):

    """ RelativeTimeFromString(text)
    
        Same as RelativeDateTimeFromString(text) except that only the
        time part of text is taken into account.

    """
    hour,hours,minute,minutes,second,seconds = _parse_reltime(text)
    return RelativeDateTime(hour=hour,hours=hours,
                            minute=minute,minutes=minutes,
                            second=second,seconds=seconds)

### Tests

if __name__ == '__main__':

    l = [
        ('Sun Nov  6 08:49:37 1994', '1994-11-06 08:49:37.00'),
        ('Sunday, 06-Nov-94 08:49:37 GMT', '1994-11-06 08:49:37.00'),
        ('Sun, 06 Nov 1994 08:49:37 GMT', '1994-11-06 08:49:37.00'),
        ('1994-11-06 08:49:37', '1994-11-06 08:49:37.00'),
        ('06/11/94 08:49:37', '1994-06-11 08:49:37.00'),
        ('11/06/94 08:49:37', '1994-11-06 08:49:37.00'),
        ('06-Nov-94 08:49:37', '1994-11-06 08:49:37.00'),
        ('06-Nov-94', '1994-11-06 00:00:00.00'),
        ('01:03', '01:03:00.00'),
        ('01:03:11', '01:03:11.00'),
        ('010203', '2001-02-03 00:00:00.00'),
        ('20000824/23', '2000-08-24 23:00:00.00'),
        ('20000824/0102', '2000-08-24 01:02:00.00'),
        ('20000824', '2000-08-24 00:00:00.00'),
        ('20000824/020301', '2000-08-24 02:03:01.00'),
        ('20000824 020301', '2000-08-24 02:03:01.00'),
        ('-20000824 020301', '-2000-08-24 02:03:01.00'),
        ]

    for text, reference in l:
        value = DateTimeFromString(text)
        if value != DateTimeFromString(reference):
            print 'Failed to parse "%s"' % text
            print '  expected: %s' % reference
            print '  parsed:   %s' % value
    
