# misc utility functions

from types import *
from string import split,join,find,lower,rfind,atoi,strip
import os, re, sys, traceback
from zLOG import LOG,ERROR,INFO,WARNING,BLATHER,DEBUG
try:
    from StructuredText.DocumentClass import StructuredTextTable
except ImportError:
    #older zope, won't be needing it
    pass

try:
    v = open(os.path.join(SOFTWARE_HOME,'version.txt')).read()
    m = re.match(r'(?i)zope\s*([0-9]+)\.([0-9]+)\.([0-9]+)',v)
    ZOPEVERSION = (int(m.group(1)),int(m.group(2)),int(m.group(3)))
except:
    # if we can't read version.txt, assume we're dealing with a
    # highly advanced specimen.. (cvs)
    ZOPEVERSION = (9,9,9)


#DEBUGMODE = os.environ.get('Z_DEBUG_MODE',0) or \
#            os.environ.get('ZWIKI_DEBUG',0)
# just require STUPID_LOG_SEVERITY <= -100 to see these
def DLOG(*args):
    tmp = []
    for arg in args:
        tmp.append(str(arg))
    LOG('ZWiki',BLATHER,' '.join(tmp))

# rendering helpers
def thunk_substituter(func, text, allowed):
    """Return a function which takes one arg and passes it with other args
    to passed-in func.

    thunk_substituter passes in the value of it's parameter, 'allowed', and a
    dictionary {'lastend': int, 'inpre': bool, 'intag': bool}.

    This is for use in a re.sub situation, to get the 'allowed' parameter and
    the state dict into the callback.

    (The technical term really is "thunk".  Honest.-)"""
    state = {'lastend':0,'inpre':0,'incode':0,'intag':0,'inanchor':0}
    return lambda arg, func=func, allowed=allowed, text=text, state=state: (
        func(arg, allowed, state, text))

def within_literal(upto, after, state, text,
                   rfind=rfind, lower=lower):
    """
    Check text from state['lastend'] to upto for literal context:

    - within an enclosing '<pre>' preformatted region '</pre>'
    - within an enclosing '<code>' code fragment '</code>'
    - within a tag '<' body '>'
    - within an '<a href...>' tag's contents '</a>'

    We also update the state dict accordingly.
    """
    # XXX This breaks on badly nested angle brackets and <pre></pre>, etc.
    lastend,inpre,incode,intag,inanchor = \
      state['lastend'], state['inpre'], state['incode'], state['intag'], \
      state['inanchor']
      
    newintag = newincode = newinpre = newinanchor = 0
    text = lower(text)
    
    # Check whether '<pre>' is currently (possibly, still) prevailing.
    opening = rfind(text, '<pre>', lastend, upto)
    if (opening != -1) or inpre:
        if opening != -1: opening = opening + 4
        else: opening = lastend
        if -1 == rfind(text, '</pre>', opening, upto):
            newinpre = 1
    state['inpre'] = newinpre

    # Check whether '<code>' is currently (possibly, still) prevailing.
    opening = rfind(text, '<code>', lastend, upto)
    if (opening != -1) or incode:
        if opening != -1: opening = opening + 5
        # We must already be incode, start at beginning of this segment:
        else: opening = lastend
        if -1 == rfind(text, '</code>', opening, upto):
            newincode = 1
    state['incode'] = newincode

    # Determine whether we're (possibly, still) within a tag.
    opening = rfind(text, '<', lastend, upto)
    if (opening != -1) or intag:
        # May also be intag - either way, we skip past last <tag>:
        if opening != -1: opening = opening + 1
        # We must already be intag, start at beginning of this segment:
        else: opening = lastend
        if -1 == rfind(text, '>', opening, upto):
            newintag = 1
    state['intag'] = newintag

    # Check whether '<a href...>' is currently (possibly, still) prevailing.
    #XXX make this more robust
    opening = rfind(text, '<a href', lastend, upto)
    if (opening != -1) or inanchor:
        if opening != -1: opening = opening + 5
        else: opening = lastend
        if -1 == rfind(text, '</a>', opening, upto):
            newinanchor = 1
    state['inanchor'] = newinanchor

    state['lastend'] = after
    return newinpre or newincode or newintag or newinanchor

def withinSgmlOrDtml(span,text):
    """
    report whether the span lies inside an sgml or dtml tag in text
    """
    spans = sgmlAndDtmlSpansIn(text)
    for s in spans:
        if span[0] >= s[0] and span[1] <= s[1]:
            return 1
    return 0

def sgmlAndDtmlSpansIn(text):
    """
    return a list of spans (tuples) of all sgml and dtml tags in text
    """
    pat = re.compile(dtmlorsgmlexpr)
    spans = []
    lastpos = 0
    while 1:
        m = pat.search(text,lastpos)
        if not m:
            break
        else:
            s = m.span()
            spans.append(s)
            lastpos = s[1]
    return spans

#from DocumentTemplate.DT_Util import html_quote
from cgi import escape
def html_quote(s):
    return escape(str(s))

def html_unquote(s,
                 character_entities=(
                       (('&amp;'),    '&'),
                       (('&lt;'),    '<' ),
                       (('&gt;'),    '>' ),
                       (('&lt;'), '\213' ),
                       (('&gt;'), '\233' ),
                       (('&quot;'),    '"'))): #"
        text=str(s)
        for re,name in character_entities:
            if find(text, re) >= 0: text=join(split(text,re),name)
        return text

# structured text customizations
from StructuredText.DocumentWithImages import DocumentWithImages
import StructuredText
from Regexps import dtmlorsgmlexpr

# override a couple of things in STX's DocumentClass:
class myDocumentWithImages(DocumentWithImages):
    
    # 1. leave dtml alone
    def doc_sgml(self,s,expr=re.compile(dtmlorsgmlexpr).search):
        r = expr(s)
        if r:
            start,end = r.span()
            text = s[start:end]
            return (StructuredText.DocumentClass.StructuredTextSGML(text),
                    start,
                    end)
    # we need SGML/DTML expressions to be first priority
    # doesn't look like ZopeIssue:432 will change
    # so we must hard-code STX element types and priorities here..
    # should mimic STXNG behaviour as closely as possible here
    text_types = [
        'doc_sgml',    
        'doc_literal',
        'doc_img',
        'doc_inner_link',
        'doc_named_link',
        'doc_href1',
        'doc_href2',
        'doc_strong',
        'doc_emphasize',
        'doc_underline',
        'doc_sgml',
        'doc_xref',
        ]

    # 2. allow + at table corners; makes emacs picture-mode editing easier
    def doc_table(self, paragraph,
                  expr = re.compile(r'\s*[+|][-+]+[+|]').match):
        text    = paragraph.getColorizableTexts()[0]
        m       = expr(text)
        
        subs = paragraph.getSubparagraphs()
        
        if not (m):
            return None
        rows = []
                
        spans   = []
        ROWS    = []
        COLS    = []
        indexes = []
        ignore  = []
        
        TDdivider   = re.compile(r'[-+]+').match
        THdivider   = re.compile(r'[=+]+').match
        col         = re.compile(r'\|').search
        innertable  = re.compile(r'(?![-=])[+|]([-+]+|[=+]+)[+|](?![-=])').search
        
        text = strip(text)
        rows = split(text,'\n')
        foo  = ""
        
        for row in range(len(rows)):
            rows[row] = strip(rows[row])
        
        # have indexes store if a row is a divider
        # or a cell part
        for index in range(len(rows)):
            tmpstr = rows[index][1:len(rows[index])-1]
            if TDdivider(tmpstr):
                indexes.append("TDdivider")
            elif THdivider(tmpstr):
                indexes.append("THdivider")
            else:
                indexes.append("cell")

        for index in range(len(indexes)):
            if indexes[index] is "TDdivider" or indexes[index] is "THdivider":
                ignore = [] # reset ignore
                #continue    # skip dividers

            tmp     = strip(rows[index])    # clean the row up
            tmp     = tmp[1:len(tmp)-1]     # remove leading + trailing |
            offset  = 0

            # find the start and end of inner
            # tables. ignore everything between
            if innertable(tmp):
                tmpstr = strip(tmp)
                while innertable(tmpstr):
                    start,end   = innertable(tmpstr).span()
                    if not (start,end-1) in ignore:
                        ignore.append((start,end-1))
                    tmpstr = " " + tmpstr[end:]

            # find the location of column dividers
            # NOTE: |'s in inner tables do not count
            #   as column dividers
            if col(tmp):
                while col(tmp):
                    bar         = 1   # true if start is not in ignore
                    start,end   = col(tmp).span()

                    if not start+offset in spans:
                        for s,e in ignore:
                            if start+offset >= s or start+offset <= e:
                                bar = None
                                break
                        if bar:   # start is clean
                            spans.append(start+offset)
                    if not bar:
                        foo = foo + tmp[:end]
                        tmp = tmp[end:]
                        offset = offset + end
                    else:
                        COLS.append((foo + tmp[0:start],start+offset))
                        foo = ""
                        tmp = " " + tmp[end:]
                        offset = offset + start
            if not offset+len(tmp) in spans:
                spans.append(offset+len(tmp))
            COLS.append((foo + tmp,offset+len(tmp)))
            foo = ""
            ROWS.append(COLS)
            COLS = []
        
        spans.sort()
        ROWS = ROWS[1:len(ROWS)]

        # find each column span
        cols    = []
        tmp     = []
        
        for row in ROWS:
            for c in row:
                tmp.append(c[1])
            cols.append(tmp)
            tmp = []
        
        cur = 1
        tmp = []
        C   = []
        for col in cols:
            for span in spans:
                if not span in col:
                    cur = cur + 1
                else:
                    tmp.append(cur)
                    cur = 1
            C.append(tmp)
            tmp = []
        
        for index in range(len(C)):
            for i in range(len(C[index])):
                ROWS[index][i] = (ROWS[index][i][0],C[index][i])
        rows = ROWS
        
        # label things as either TableData or
        # Table header
        TD  = []
        TH  = []
        all = []
        for index in range(len(indexes)):
            if indexes[index] is "TDdivider":
                TD.append(index)
                all.append(index)
            if indexes[index] is "THdivider":
                TH.append(index)
                all.append(index)
        TD = TD[1:]
        dividers = all[1:]
        #print "TD  => ", TD
        #print "TH  => ", TH
        #print "all => ", all, "\n"
        
        for div in dividers:
            if div in TD:
                index = all.index(div)
                for rowindex in range(all[index-1],all[index]):                    
                    for i in range(len(rows[rowindex])):
                        rows[rowindex][i] = (rows[rowindex][i][0],
                                             rows[rowindex][i][1],
                                             "td")
            else:
                index = all.index(div)
                for rowindex in range(all[index-1],all[index]):
                    for i in range(len(rows[rowindex])):
                        rows[rowindex][i] = (rows[rowindex][i][0],
                                             rows[rowindex][i][1],
                                             "th")
        
        # now munge the multi-line cells together
        # as paragraphs
        ROWS    = []
        COLS    = []
        for row in rows:
            for index in range(len(row)):
                if not COLS:
                    COLS = range(len(row))
                    for i in range(len(COLS)):
                        COLS[i] = ["",1,""]
                if TDdivider(row[index][0]) or THdivider(row[index][0]):
                    ROWS.append(COLS)
                    COLS = []
                else:
                    COLS[index][0] = COLS[index][0] + (row[index][0]) + "\n"
                    COLS[index][1] = row[index][1]
                    COLS[index][2] = row[index][2]
        
        # now that each cell has been munged together,
        # determine the cell's alignment.
        # Default is to center. Also determine the cell's
        # vertical alignment, top, middle, bottom. Default is
        # to middle
        rows = []
        cols = []
        for row in ROWS:
            for index in range(len(row)):
                topindent       = 0
                bottomindent    = 0
                leftindent      = 0
                rightindent     = 0
                left            = []
                right           = []                                    
                text            = row[index][0]
                text            = split(text,'\n')
                text            = text[:len(text)-1]
                align           = ""
                valign          = ""
                for t in text:
                    t = strip(t)
                    if not t:
                        topindent = topindent + 1
                    else:
                        break
                text.reverse()
                for t in text:
                    t = strip(t)
                    if not t:
                        bottomindent = bottomindent + 1
                    else:
                        break
                text.reverse()
                tmp   = join(text[topindent:len(text)-bottomindent],"\n")
                pars  = re.compile("\n\s*\n").split(tmp)
                for par in pars:
                    if index > 0:
                        par = par[1:]
                    par = split(par, ' ')
                    for p in par:
                        if not p:
                            leftindent = leftindent+1
                        else:
                            break
                    left.append(leftindent)
                    leftindent = 0
                    par.reverse()
                    for p in par:
                        if not p:
                            rightindent = rightindent + 1
                        else:
                            break
                    right.append(rightindent)
                    rightindent = 0
                left.sort()
                right.sort()

                if topindent == bottomindent:
                    valign="middle"
                elif topindent < 1:
                    valign="top"
                elif bottomindent < 1:
                    valign="bottom"
                else:
                    valign="middle"

                if left[0] < 1:
                    align = "left"
                elif right[0] < 1:
                    align = "right"
                elif left[0] > 1 and right[0] > 1:
                    align="center"
                else:
                    align="left"
                
                cols.append((row[index][0],row[index][1],align,valign,row[index][2]))
            rows.append(cols)
            cols = []
        return StructuredTextTable(rows,text,subs,indent=paragraph.indent)
            

myDocumentWithImages = myDocumentWithImages()

#def flatten(seq):
#  """
#  Translate a nested sequence into a flat list of string-terminals.
#  We omit duplicates terminals in the process.
#  """
#  got = []
#  pending = [seq]
#  while pending:
#    cur = pending.pop(0)
#    if type(cur) == StringType:
#      if cur not in got:
#        got.append(cur)
#    else:
#      pending.extend(cur)
#  return got

def flatten(recursiveList):
    """
    Flatten a recursive list/tuple structure.
    """
    flatList = []
    for i in recursiveList:
        if type(i) in (ListType,TupleType): flatList.extend(flatten(list(i)))
        else: flatList.append(i)
    return flatList

flatten2 = lambda l,f=lambda L,F : type(L) != type([]) and [L] or reduce(lambda a,b,F=F : a + F(b,F), L, []) :f(l,f)

## flatten from WFN
#def flatten3(seq):
#  """Translate a nested sequence into a flat list of string-terminals.
#  We omit duplicates terminals in the process."""
#  got = []
#  pending = [seq]
#  while pending:
#    cur = pending.pop(0)
#    if type(cur) == StringType:
#      if cur not in got:
#        got.append(cur)
#    else:
#      pending.extend(cur)
#  return got

def flattenDtmlParse(i):
    """
    Roughly flatten a DTML parse structure, for estimating it's size.
    """
    flatList = []
    if type(i) in (ListType,TupleType):
        if len(i) > 0: flatList.extend(flattenDtmlParse(i[0]))
        if len(i) > 1: flatList.extend(flattenDtmlParse(i[1:]))
    elif hasattr(i,'section'):
        flatList.extend(flattenDtmlParse(i.section))
    elif hasattr(i,'im_self'):
        flatList.extend(flattenDtmlParse(i.im_self))
    else:
        flatList.append(i)
    return flatList

# Boldly taken from tres seaver's PTK code.
# Then lifted from ken manheimer's WFN code.
def parseHeadersBody( body, headers=None ):
    """
    Parse any leading 'RFC-822'-ish headers from an uploaded
    document, returning a tuple containing the headers in a dictionary
    and the stripped body.

    E.g.::

        Title: Some title
        Creator: Tres Seaver
        Format: text/plain
        X-Text-Format: structured

        Overview

        This document .....

    would be returned as::

        { 'Title' : 'Some title'
        , 'Creator' : 'Tres Seaver'
        , 'Format' : 'text/plain'
        , 'text_format': 'structured'
        }

    as the headers, plus the body, starting with 'Overview' as
    the first line (the intervening blank line is a separator).

    Allow passing initial dictionary as headers.
    """
    cr = re.compile( '^.*\r$' )
    lines = map( lambda x, cr=cr: cr.match( x ) and x[:-1] or x
               , split( body, '\n' ) )

    i = 0
    if headers is None:
        headers = {}
    else:
        headers = headers.copy()

    hdrlist = []
    for line in lines:
        if line and line[-1] == '\r':
            line = line[:-1]
        if not line:
            break
        tokens = split( line, ':' )
        if len( tokens ) > 1:
            hdrlist.append( ( tokens[0], join( tokens[1:], ':' ) ) )
        elif i == 0:
            return headers, body     # no headers, just return those passed in.
        else:    # continuation
            last, hdrlist = hdrlist[ -1 ], hdrlist[ :-1 ]
            hdrlist.append( ( last[ 0 ]
                            , join( ( last[1], lstrip( line ) ), '\n' )
                            ) )
        i = i + 1

    for hdr in hdrlist:
        headers[ hdr[0] ] = hdr[ 1 ]

    return headers, join( lines[ i+1: ], '\n' )

#from python FAQ:
import tempfile
import os
class Popen3:
    """
    This is a deadlock-safe version of popen, that returns
    an object with errorlevel, out (a string) and err (a string).
    (capturestderr may not work under windows.)
    Example: print Popen3('grep spam','\n\nhere spam\n\n').out
    """
    def __init__(self,command,input='',capturestderr=0):
        outfile=tempfile.mktemp()
        command="( %s ) > %s" % (command,outfile)
        if input:
            infile=tempfile.mktemp()
            open(infile,"w").write(input)
            command=command+" <"+infile
        if capturestderr:
            errfile=tempfile.mktemp()
            command=command+" 2>"+errfile
        self.errorlevel=os.system(command) >> 8
        self.out=open(outfile,"r").read()
        os.remove(outfile)
        if input:
            os.remove(infile)
        if capturestderr:
            self.err=open(errfile,"r").read()
            os.remove(errfile)

# avoid IssueNo0536 memory leaks
def formattedTraceback():
    type,val,tb = sys.exc_info()
    try:     return join(traceback.format_exception(type,val,tb),'')
    finally: del tb  # Clean up circular reference

