{-# OPTIONS -fffi -cpp #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  FastPackedString
-- Copyright   :  (c) The University of Glasgow 2001,
--                    David Roundy 2003-2005
-- License : GPL (I'm happy to also license this file BSD style but don't
--           want to bother distributing two license files with darcs.
--
-- Maintainer  :  droundy@abridgegame.org
-- Stability   :  experimental
-- Portability :  portable
--
-- An efficient implementation of strings.
--
-----------------------------------------------------------------------------

-- Original GHC implementation by Bryan O\'Sullivan,
-- rewritten to use UArray by Simon Marlow.
-- rewritten to support slices and use ForeignPtr by David Roundy

module FastPackedString (
        -- * The @PackedString@ type
        PackedString,    -- abstract, instances: Eq, Ord, Show, Typeable
        unsafeWithInternals, -- :: PackedString -> (Ptr Word8 -> Int -> IO a) -> IO a
        constructPS,

         -- * Converting to and from @PackedString@s
    generatePS,  -- :: Int -> (Ptr Word8 -> Int -> IO Int) -> IO PackedString
        packString,  -- :: String -> PackedString
        mallocedCString2PS, -- :: CString -> IO PackedString
        withCStringPS, -- :: PackedString -> (CString -> IO a) -> IO a
        packWords,   -- :: [Word8] -> PackedString
        unpackPS,    -- :: PackedString -> String
        unpackWords, -- :: PackedString -> [Word8]
        unpackPSfromUTF8, -- :: PackedString -> String

        -- * I\/O with @PackedString@s
        hPutPS,      -- :: Handle -> PackedString -> IO ()
        hGetPS,      -- :: Handle -> Int -> IO PackedString
    hGetContentsPS, -- :: Handle -> IO PackedString
        readFilePS,  -- :: FilePath -> IO PackedString
        writeFilePS, -- :: FilePath -> PackedString -> IO ()
        gzReadFilePS,-- :: FilePath -> IO PackedString
        gzReadFileLazily,  -- :: FilePath -> IO LazyFile
        LazyFile(..),
        mmapFilePS,  -- :: FilePath -> IO PackedString
        gzWriteFilePS,  -- :: FilePath -> PackedString -> IO ()
        gzWriteFilePSs, -- :: FilePath -> [PackedString] -> IO ()

        -- * List-like manipulation functions
        nilPS,       -- :: PackedString
        consPS,      -- :: Char -> PackedString -> PackedString
        headPS,      -- :: PackedString -> Char
        tailPS,      -- :: PackedString -> PackedString
        initPS,      -- :: PackedString -> PackedString
        lastPS,      -- :: PackedString -> Char
        nullPS,      -- :: PackedString -> Bool
        appendPS,    -- :: PackedString -> PackedString -> PackedString
        lengthPS,    -- :: PackedString -> Int
        indexPS,     -- :: PackedString -> Int -> Char
        indexPSW,    -- :: PackedString -> Int -> Word8
        mapPS,       -- :: (Char -> Char) -> PackedString -> PackedString
        --filterPS,    -- :: (Char -> Bool) -> PackedString -> PackedString
        reversePS,   -- :: PackedString -> PackedString
        concatPS,    -- :: [PackedString] -> PackedString
        concatLenPS, -- :: Int -> [PackedString] -> PackedString
        elemPS,      -- :: Char -> PackedString -> Bool
        takePS,      -- :: Int -> PackedString -> PackedString
        dropPS,      -- :: Int -> PackedString -> PackedString
        splitAtPS,   -- :: Int -> PackedString -> (PackedString, PackedString)

        foldlPS,     -- :: (a -> Char -> a) -> a -> PackedString -> a
        foldrPS,     -- :: (Char -> a -> a) -> a -> PackedString -> a
        anyPS,
        takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
        dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
        dropWhitePS, -- :: PackedString -> PackedString
        breakWhitePS,-- :: PackedString -> Maybe (PackedString,PackedString)
        spanPS,      -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
        spanEndPS,   -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
        breakPS,     -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
        breakOnPS,   -- :: Char -> PackedString -> (PackedString, PackedString)
        linesPS,     -- :: PackedString -> [PackedString]
        unlinesPS,     -- :: [PackedString] -> PackedString
        findPS,
        hashPS,

        wordsPS,     -- :: PackedString -> [PackedString]
        splitPS,     -- :: Char -> PackedString -> [PackedString]
        splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]

--      joinPS,      -- :: PackedString -> [PackedString] -> PackedString

        breakFirstPS,-- :: Char -> PackedString -> Maybe (PackedString,PackedString)
        breakLastPS, -- :: Char -> PackedString -> Maybe (PackedString,PackedString)
        breakFirstPairPS,
        substrPS,    -- :: PackedString -> PackedString -> Maybe Int
        readIntPS,   -- :: PackedString -> Maybe (Int, PackedString)
        is_funky,    -- :: PackedString -> Bool
        fromHex2PS,  -- :: PackedString -> PackedString
        fromPS2Hex,  -- :: PackedString -> PackedString
        betweenLinesPS,--  :: PackedString -> PackedString -> PackedString -> Maybe (PackedString)
        break_after_nth_newline,
        break_before_nth_newline,
    ) where

import IO ( Handle, hClose, hFileSize, IOMode(ReadMode,WriteMode),
            hSeek, SeekMode(SeekFromEnd), hGetChar )
import Control.Exception( bracket )

import Autoconf ( use_mmap )

import System.Mem ( performGC )
import Foreign.Storable ( peekElemOff, peek, poke )
import Ptr ( nullPtr, plusPtr, minusPtr, Ptr )
import Foreign.Marshal.Alloc ( free )
import Foreign.Marshal.Array ( pokeArray, mallocArray, reallocArray,
                               peekArray, advancePtr )
import Foreign.Marshal.Utils ( with )
import Foreign.C.String
import Foreign.C.Types ( CLong, CInt, CSize, )
import Data.Bits ( rotateL )
import Data.Char
import Data.Word
import Int ( Int32 )
import Monad ( liftM, when )

import System.IO.Unsafe ( unsafePerformIO, unsafeInterleaveIO )
import System.IO ( hPutBuf, hGetBuf )

import System.IO ( openBinaryFile, )

import Foreign.Ptr ( castPtr )
import Foreign.ForeignPtr( ForeignPtr, withForeignPtr, mallocForeignPtrArray,
                           newForeignPtr )
#if defined(__GLASGOW_HASKELL__)
import qualified Foreign.Concurrent as FC ( newForeignPtr )
import System.Posix ( handleToFd )
#endif

#ifdef DEBUG_PS
import Foreign.ForeignPtr ( addForeignPtrFinalizer )
#endif
import Foreign.Ptr ( FunPtr )

debugForeignPtr :: ForeignPtr a -> String -> IO ()
#ifdef DEBUG_PS
foreign import ccall unsafe "static fpstring.h debug_alloc" debug_alloc
    :: Ptr a -> CString -> IO ()
foreign import ccall unsafe "static fpstring.h & debug_free" debug_free
    :: FunPtr (Ptr a -> IO ())
debugForeignPtr fp n =
    withCString n $ \cname-> withForeignPtr fp $ \p->
    do debug_alloc p cname
       addForeignPtrFinalizer debug_free fp
#else
debugForeignPtr _ _ = return ()
#endif

foreign import ccall unsafe "static stdio.h &free" c_free
    :: FunPtr (Ptr Word8 -> IO ())
mallocForeignPtr :: Int -> IO (ForeignPtr Word8)
mallocForeignPtr l
    = do when (l > 1000000) performGC
         mallocForeignPtrArray l

----------------------------------------------------------------------------
--A way of creating ForeignPtrs outside the IO monad (althogh it still
--isn't entirely "safe", but at least it's convenient.

createPS :: Int -> (Ptr Word8 -> IO ()) -> PackedString
createPS l write_ptr =
    unsafePerformIO $ do fp <- mallocForeignPtr l
                         debugForeignPtr fp "createPS"
                         withForeignPtr fp $ \p -> write_ptr p
                         return $ PS fp 0 l

-- -----------------------------------------------------------------------------
-- PackedString type declaration

-- | A space-efficient representation of a 'String', which supports various
-- efficient operations.  A 'PackedString' contains full Unicode 'Char's.
data PackedString = PS !(ForeignPtr Word8) !Int !Int

-- -----------------------------------------------------------------------------
-- unsafeWithInternals

-- | Do something with the internals of a PackedString. Beware of
-- altering the contents!
unsafeWithInternals :: PackedString -> (Ptr Word8 -> Int -> IO a) -> IO a
unsafeWithInternals (PS fp s l) f
 = withForeignPtr fp $ \p -> f (p `plusPtr` s) l

constructPS
  :: (Ptr Word8) -> Int -> IO () -> IO PackedString
constructPS p l f = do fp <- FC.newForeignPtr p f
                       return $ PS fp 0 l

{-# INLINE (!) #-}
(!) :: PackedString -> Int -> Word8
(PS x s _l) ! i
    = unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p (s+i)
  -- | i < 0 = error "Can't access negative element in PackedString."
  -- | i >= l = error "Out of range element in PackedString."
  -- | otherwise = unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p (s+i)

-- -----------------------------------------------------------------------------
-- generatePS

-- | Given the maximum size needed and a function to make the contents
-- of a PackedString, generatePS makes the PackedString. The generating
-- function is required to return the actual size (<= the maximum size).

generatePS :: Int -> (Ptr Word8 -> IO Int) -> IO PackedString
generatePS i f
 = do p <- mallocArray i
      i' <- f p
      p' <- reallocArray p i'
      fp <- newForeignPtr c_free p'
      return $ PS fp 0 i'

instance Eq PackedString where
   (==) = eqPS

foreign import ccall unsafe "static string.h memcmp" c_memcmp
    :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int

{-# INLINE eqPS #-}
eqPS :: PackedString -> PackedString -> Bool
eqPS a b = (comparePS a b) == EQ

instance Ord PackedString where
    compare = comparePS

-- | 'comparePS' provides an 'Ordering' for 'PackedStrings' supporting slices.
comparePS :: PackedString -> PackedString -> Ordering
comparePS (PS _ _ 0) (PS _ _ 0) = EQ    -- short cut for empty strings
comparePS (PS x1 s1 l1) (PS x2 s2 l2) = unsafePerformIO $ 
    withForeignPtr x1 $ \p1 -> 
        withForeignPtr x2 $ \p2 -> do 
            i <- c_memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
            return $ case i `compare` 0 of
                EQ  -> l1 `compare` l2
                x   -> x

--instance Read PackedString: ToDo

instance Show PackedString where
    showsPrec p ps r = showsPrec p (unpackPS ps) r

-- -----------------------------------------------------------------------------
-- Constructor functions

nilPS :: PackedString
nilPS = unsafePerformIO $ do fp <- mallocForeignPtr 1
                             debugForeignPtr fp "nilPS"
                             return $ PS fp 0 0

consPS :: Char -> PackedString -> PackedString
consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better

-- | Convert a 'String' into a 'PackedString'
packString :: String -> PackedString
packString str = createPS (length str) $ \p -> pokeArray p $ map c2w str
packWords :: [Word8] -> PackedString
packWords s = createPS (length s) $ \p -> pokeArray p s

{-# INLINE w2c #-}
w2c :: Word8 -> Char
w2c = chr . fromIntegral
{-# INLINE c2w #-}
c2w :: Char -> Word8
c2w = fromIntegral . ord

foreign import ccall unsafe "static string.h strlen" c_strlen
    :: CString -> IO CInt
foreign import ccall unsafe "static stdlib.h malloc" c_malloc
    :: CInt -> IO (Ptr Word8)
foreign import ccall unsafe "static stdlib.h free" free_cstring
    :: CString -> IO ()


mallocedCString2PS :: CString -> IO PackedString
mallocedCString2PS cs = do fp <- newForeignPtr c_free (castPtr cs)
                           l <- c_strlen cs
                           return $ PS fp 0 (fromIntegral l)

withCStringPS :: PackedString -> (CString -> IO a) -> IO a
withCStringPS (PS ps s l) = bracket alloc free_cstring
    where alloc = withForeignPtr ps $ \p ->
                  do buf <- c_malloc (fromIntegral l+1)
                     c_memcpy (castPtr buf) (castPtr p `plusPtr` s)
                                  (fromIntegral l)
                     poke (buf `plusPtr` l) (0::Word8)
                     return $ castPtr buf

-- -----------------------------------------------------------------------------
-- Destructor functions (taking PackedStrings apart)

-- | Convert a 'PackedString' into a 'String'
unpackPS :: PackedString -> String
unpackPS (PS ps s l)
 = map w2c $ unsafePerformIO
           $ withForeignPtr ps $ \p -> peekArray l (p `plusPtr` s)
{-
unpackPS :: PackedString -> String
unpackPS theps@(PS ps s l)
 | l >= 1024 = map w2c (unsafePerformIO (withForeignPtr ps $
                                        \p -> peekArray 1024 (p `plusPtr` s)))
            ++ unpackPS (PS ps (s + 1024) (l - 1024))
 | l >= 128 = map w2c (unsafePerformIO (withForeignPtr ps $
                                       \p -> peekArray 128 (p `plusPtr` s)))
           ++ unpackPS (PS ps (s + 128) (l - 128))
 | l > 0 = unsafeHeadPS theps : unpackPS (unsafeTailPS theps)
 | otherwise = ""
-}
{-
unpackPS :: PackedString -> String
unpackPS theps = if nullPS theps then []
                 else unsafeHeadPS theps : unpackPS (unsafeTailPS theps)
-}
unpackWords :: PackedString -> [Word8]
unpackWords ps@(PS x s _) =
    if nullPS ps then []
    else (unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p s)
             : unpackWords (unsafeTailPS ps)

unpackPSfromUTF8 :: PackedString -> String
unpackPSfromUTF8 (PS _ _ 0) = ""
unpackPSfromUTF8 (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
    do outbuf <- mallocArray l
       lout <- utf8_to_ints outbuf (p `plusPtr` s) l
       when (lout < 0) $ error "Bad UTF8!"
       str <- (map chr) `liftM` peekArray lout outbuf
       free outbuf
       return str

foreign import ccall unsafe "static fpstring.h utf8_to_ints" utf8_to_ints
    :: Ptr Int -> Ptr Word8 -> Int -> IO Int

-- -----------------------------------------------------------------------------
-- List-mimicking functions for PackedStrings

{-# INLINE lengthPS #-}
lengthPS :: PackedString -> Int
lengthPS (PS _ _ l) = l

{-# INLINE indexPSW #-}
indexPSW :: PackedString -> Int -> Word8
indexPSW theps i | i < 0 = error "Negative index in indexPS"
                 | i >= lengthPS theps = error "Out of bounds in indexPS"
                 | otherwise = theps ! i

{-# INLINE indexPS #-}
indexPS :: PackedString -> Int -> Char
indexPS theps i | i < 0 = error "Negative index in indexPS"
                | i >= lengthPS theps = error "Out of bounds in indexPS"
                | otherwise = w2c $ theps ! i

{-# INLINE lastPS #-}
lastPS :: PackedString -> Char
lastPS ps@(PS x s l) -- ps ! 0 is inlined manually to eliminate a (+0)
  | nullPS ps = error "FastPackedString.lastPS: last []"
  | otherwise  = w2c $ unsafePerformIO $ withForeignPtr x $
                 \p -> peekElemOff p (s+l-1)

{-# INLINE headPS #-}
headPS :: PackedString -> Char
headPS ps@(PS x s _) -- ps ! 0 is inlined manually to eliminate a (+0)
  | nullPS ps = error "FastPackedString.headPS: head []"
  | otherwise  = w2c $ unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p s

{-# INLINE unsafeHeadPS #-}
unsafeHeadPS :: PackedString -> Char
unsafeHeadPS (PS x s _) -- ps ! 0 is inlined manually to eliminate a (+0)
  = w2c $ unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p s

-- | Extract the elements after the head of a packed string, which must be non-empty.
{-# INLINE tailPS #-}
tailPS :: PackedString -> PackedString
tailPS (PS p s l) 
    | l <= 0    = error ("FastPackedString.tailPS: empty list")
    | l == 1    = nilPS                                                                    
    | otherwise = PS p (s+1) (l-1)

-- | Return all the elements of a packed string except the last one.
-- The string must be finite and non-empty.
{-# INLINE initPS #-}
initPS :: PackedString -> PackedString
initPS (PS p s l) 
    | l <= 0    = error ("FastPackedString.initPS: empty list")
    | l == 1    = nilPS                                                                    
    | otherwise = PS p s (l-1)                                                          

{-# INLINE unsafeTailPS #-}
unsafeTailPS :: PackedString -> PackedString
unsafeTailPS (PS ps s l)
  | l == 1 = nilPS
  | otherwise  = PS ps (s+1) (l-1)

{-# INLINE nullPS #-}
nullPS :: PackedString -> Bool
nullPS (PS _ _ l) = l == 0

appendPS :: PackedString -> PackedString -> PackedString
appendPS xs ys
  | nullPS xs = ys
  | nullPS ys = xs
  | otherwise  = concatPS [xs,ys]

mapPS :: (Char -> Char) -> PackedString -> PackedString
mapPS func (PS ps s l) = createPS l $ \p-> withForeignPtr ps $
                         \f-> mint (f `plusPtr` s) p l
    where mint :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
          mint _ _ 0 = return ()
          mint f t len = do val <- peek f
                            poke t $ c2w $ func $ w2c val
                            mint (f `plusPtr` 1) (t `plusPtr` 1) (len - 1)

--filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
--filterPS pred ps = packString (filter pred (unpackPS ps))

foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
foldlPS f b ps = foldl f b (unpackPS ps)

foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
foldrPS f v ps = foldr f v (unpackPS ps)

{-# INLINE takePS #-}
takePS :: Int -> PackedString -> PackedString
takePS n ps@(PS x s _) = if n >= lengthPS ps then ps
                         else PS x s n -- substrPS ps 0 (n - 1)

{-# INLINE dropPS #-}
dropPS  :: Int -> PackedString -> PackedString
dropPS n ps@(PS x s l)
    | n >= lengthPS ps = nilPS
    | otherwise = PS x (s+n) (l-n) -- substrPS ps n (lengthPS ps - 1)

{-# INLINE splitAtPS #-}
splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
splitAtPS  n ps  = (takePS n ps, dropPS n ps)

-- This must be fast, it's used heavily in Printer. -- jch
anyPS :: (Char -> Bool) -> PackedString -> Bool
anyPS f (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \ptr ->
        lookat (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
    where lookat :: Ptr Word8 -> Ptr Word8 -> IO Bool
          lookat p st | p == st = return False
                      | otherwise = do w <- peek p
                                       if f $ w2c w
                                          then return True
                                          else lookat (p `plusPtr` 1) st

findWhenPS :: (Char -> Bool) -> PackedString -> Int
findWhenPS f ps = seq f $
    if nullPS ps then 0
    else if f $ unsafeHeadPS ps then 0
         else 1 + findWhenPS f (unsafeTailPS ps)

findFromEndUntilPS :: (Char -> Bool) -> PackedString -> Int
findFromEndUntilPS f ps@(PS x s l) = seq f $
    if nullPS ps then 0
    else if f $ lastPS ps then l
         else findFromEndUntilPS f (PS x s (l-1))

{-# INLINE takeWhilePS #-}
takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
takeWhilePS f ps = seq f $ takePS (findWhenPS (not . f) ps) ps

{-# INLINE dropWhilePS #-}
dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
dropWhilePS f ps = seq f $ dropPS (findWhenPS (not . f) ps) ps

{-# INLINE dropWhitePS #-}
dropWhitePS :: PackedString -> PackedString
dropWhitePS (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
    do i <- first_nonwhite (p `plusPtr` s) l
       return $ if i == l then nilPS
                else PS x (s+i) (l-i)

foreign import ccall unsafe "fpstring.h first_nonwhite" first_nonwhite
    :: Ptr Word8 -> Int -> IO Int
foreign import ccall unsafe "fpstring.h first_white" first_white
    :: Ptr Word8 -> Int -> IO Int

{-# INLINE is_funky #-}
is_funky :: PackedString -> Bool
is_funky (PS x s l) = unsafePerformIO $ withForeignPtr x $ \p->
                      (/=0) `liftM` has_funky_char (p `plusPtr` s) l

foreign import ccall unsafe "fpstring.h has_funky_char" has_funky_char
    :: Ptr Word8 -> Int -> IO Int


elemPS :: Char -> PackedString -> Bool
elemPS c ps = c `elem` unpackPS ps

spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
spanPS  p ps = breakPS (not . p) ps

spanEndPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
spanEndPS  p ps = splitAtPS (findFromEndUntilPS (not.p) ps) ps

breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
breakPS p ps = case findWhenPS p ps of
               n -> (takePS n ps, dropPS n ps)

{-# INLINE breakOnPS #-}
breakOnPS :: Char -> PackedString -> (PackedString, PackedString)
breakOnPS c p = case findPS c p of
                Nothing -> (p,nilPS)
                Just n -> (takePS n p, dropPS n p)

{-# INLINE hashPS #-}
hashPS :: PackedString -> Int32
hashPS (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
    do hash (p `plusPtr` s) l

hash :: Ptr Word8 -> Int -> IO Int32
hash ptr len = f (0 :: Int32) ptr len
 where f h _ 0 = return h
       f h p n = do x <- peek p
                    let h' =  (fromIntegral x) + (rotateL h 8)
                    f h' (p `advancePtr` 1) (n-1)

{-# INLINE substrPS #-}
substrPS :: PackedString -> PackedString -> Maybe Int
substrPS tok str
    | nullPS tok = Just 0
    | lengthPS tok > lengthPS str = Nothing
    | otherwise = do n <- findPS (headPS tok) str
                     let ttok = tailPS tok
                         reststr = dropPS (n+1) str
                     if ttok == takePS (lengthPS ttok) reststr
                        then Just n
                        else ((n+1)+) `fmap` substrPS tok reststr

{-# INLINE breakWhitePS #-}
breakWhitePS :: PackedString -> (PackedString,PackedString)
breakWhitePS (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
    do i <- first_white (p `plusPtr` s) l
       if i == 0 then return (nilPS, PS x s l)
                 else if i == l
                      then return (PS x s l, nilPS)
                      else return (PS x s i, PS x (s+i) (l-i))

{-# INLINE breakFirstPS #-}
breakFirstPS :: Char -> PackedString -> Maybe (PackedString,PackedString)
breakFirstPS c p = case findPS c p of
                   Nothing -> Nothing
                   Just n -> Just (takePS n p, dropPS (n+1) p)

{-# INLINE breakLastPS #-}
breakLastPS :: Char -> PackedString -> Maybe (PackedString,PackedString)
breakLastPS c p = case findLastPS c p of
                  Nothing -> Nothing
                  Just n -> Just (takePS n p, dropPS (n+1) p)

breakFirstPairPS ::
    Char -> Char -> PackedString -> Maybe (PackedString, PackedString)
breakFirstPairPS c1 c2 ps =
    let w1 = c2w c1
        w2 = c2w c2
    in case [ m | m <- [0..(lengthPS ps) - 2],
                  (ps ! m) == w1, (ps ! (m + 1)) == w2 ] of
       [] -> Nothing
       (n:_) -> Just (takePS (n+1) ps, dropPS (n+1) ps)

{-# INLINE linesPS #-}
linesPS :: PackedString -> [PackedString]
linesPS ps = case wfindPS (c2w '\n') ps of
             Nothing -> [ps]
             Just n -> takePS n ps : linesPS (dropPS (n+1) ps)

unlinesPS :: [PackedString] -> PackedString
unlinesPS ss = concatPS $ intersperse_newlines ss
    where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s)
          intersperse_newlines s = s
          newline = packString "\n"

wordsPS :: PackedString -> [PackedString]
wordsPS ps = splitWithPS isSpace ps

reversePS :: PackedString -> PackedString
reversePS ps = packString (reverse (unpackPS ps))

foreign import ccall unsafe "static string.h memcpy" c_memcpy
    :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()

concatPS :: [PackedString] -> PackedString
concatPS [] = nilPS
concatPS [ps] = ps
concatPS xs
 = unsafePerformIO $
   do let start_size = 1024
      p <- mallocArray start_size
      f p 0 1024 xs
    where f ptr len _ [] = do ptr' <- reallocArray ptr len
                              fp <- newForeignPtr c_free ptr'
                              return $ PS fp 0 len
          f ptr len to_go pss@(PS p s l:pss')
           | l <= to_go = do withForeignPtr p $ \pf ->
                                 c_memcpy (ptr `advancePtr` len)
                                          (pf `advancePtr` s) l
                             f ptr (len + l) (to_go - l) pss'
           | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
                            ptr' <- reallocArray ptr new_total
                            f ptr' len (new_total - len) pss

-- -----------------------------------------------------------------------------
-- concatLenPS

-- | Camse as concatPS only you tell it how big the result will be.
-- If you lie thenBad Things will happen.

concatLenPS :: Int -> [PackedString] -> PackedString
concatLenPS n [] = n `seq` nilPS
concatLenPS _ [ps] = ps
concatLenPS total_length pss = createPS total_length $ \p-> cpPSs p pss
    where cpPSs :: Ptr Word8 -> [PackedString] -> IO ()
          cpPSs p (PS x s l:rest) = do withForeignPtr x $ \pf ->
                                          c_memcpy p (pf `plusPtr` s) l
                                       cpPSs (p `plusPtr` l) rest
          cpPSs _ [] = return ()

{-# INLINE findPS #-}
findPS :: Char -> PackedString -> Maybe Int
findPS c ps = wfindPS (c2w c) ps

{-# INLINE wfindPS #-}
wfindPS :: Word8 -> PackedString -> Maybe Int
wfindPS c (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
    let p' = p `plusPtr` s
        q = memchr p' (fromIntegral c) (fromIntegral l)
    in return $ if q == nullPtr then Nothing
                                else Just (q `minusPtr` p')

foreign import ccall unsafe "string.h memchr" memchr
    :: Ptr Word8 -> CInt -> CSize -> Ptr Word8

{-# INLINE findLastPS #-}
findLastPS :: Char -> PackedString -> Maybe Int
findLastPS c ps = wfindLastPS (c2w c) ps

{-# INLINE wfindLastPS #-}
wfindLastPS :: Word8 -> PackedString -> Maybe Int
wfindLastPS c (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
                    findit (-1) (p `plusPtr` s) 0
    where findit h p i = if i >= l
                         then if h < 0
                              then return Nothing
                              else return $ Just h
                         else do here <- peekElemOff p i
                                 if c == here
                                    then findit i p (i+1)
                                    else findit h p (i+1)

------------------------------------------------------------

{-# INLINE splitPS #-}
splitPS :: Char -> PackedString -> [PackedString]
splitPS c = wsplitPS (c2w c)
{-# INLINE wsplitPS #-}
wsplitPS :: Word8 -> PackedString -> [PackedString]
wsplitPS c ps = case wfindPS c ps of
                Nothing -> if nullPS ps then [] else [ps]
                Just n -> takePS n ps : wsplitPS c (dropPS (n+1) ps)

splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
splitWithPS f ps =
    case [ m | m <- [0..lengthPS ps-1], f (w2c (ps ! m)) ] of
    [] -> if nullPS ps then [] else [ps]
    (n:_) -> takePS n ps : splitWithPS f (dropPS (n+1) ps)

-- -----------------------------------------------------------------------------
-- Local utility functions

{-
-- The definition of @_substrPS@ is essentially:
-- @take (end - begin + 1) (drop begin str)@.

substrPS :: PackedString -> Int -> Int -> PackedString
substrPS (PS ps s _) begin end = PS ps (s+begin) (1+end-begin)
--substrPS (PS ps s l) begin end
--    | end <= l && begin <= end && begin >= 0 = PS ps (s+begin) (1+end-begin)
--    | otherwise = bug "substrPS out of bounds"
-}

-- -----------------------------------------------------------------------------
-- hPutPS

-- | Outputs a 'PackedString' to the specified 'Handle'.
--
-- NOTE: the representation of the 'PackedString' in the file is assumed to
-- be in the ISO-8859-1 encoding.  In other words, only the least signficant
-- byte is taken from each character in the 'PackedString'.
hPutPS :: Handle -> PackedString -> IO ()
hPutPS _ (PS _ _ 0) = return ()
hPutPS h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
hPutPS h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l

-- -----------------------------------------------------------------------------
-- hGetPS

-- | Read a 'PackedString' directly from the specified 'Handle'.  This
-- is far more efficient than reading the characters into a 'String'
-- and then using 'packString'.
--
-- NOTE: as with 'hPutPS', the string representation in the file is
-- assumed to be ISO-8859-1.
hGetPS :: Handle -> Int -> IO PackedString
hGetPS _ 0 = return nilPS
hGetPS h i = do fp <- mallocForeignPtr i
                debugForeignPtr fp $ "hGetPS "++show h
                l <- withForeignPtr fp $ \p-> hGetBuf h p i
                return $ PS fp 0 l

-- -----------------------------------------------------------------------------
-- hGetContentsPS

-- | Read entire handle contents into a 'PackedString'.
--
-- NOTE: as with 'hGetPS', the string representation in the file is
-- assumed to be ISO-8859-1.

hGetContentsPS :: Handle -> IO PackedString
hGetContentsPS h
 = do let start_size = 1024
      p <- mallocArray start_size
      i <- hGetBuf h p start_size
      if i < start_size
       then do p' <- reallocArray p i
               fp <- newForeignPtr c_free p'
               return $ PS fp 0 i
       else f p start_size
    where f p s = do let s' = 2 * s
                     p' <- reallocArray p s'
                     i <- hGetBuf h (p' `plusPtr` s) s
                     if i < s then do let i' = s + i
                                      p'' <- reallocArray p' i'
                                      fp <- newForeignPtr c_free p''
                                      return $ PS fp 0 i'
                              else f p' s'

-- -----------------------------------------------------------------------------
-- readFilePS

-- | Read an entire file directly into a 'PackedString'.  This is far more
-- efficient than reading the characters into a 'String' and then using
-- 'packString'.  It also may be more efficient than opening the file and
-- reading it using hGetPS.
--
-- NOTE: as with 'hGetPS', the string representation in the file is
-- assumed to be ISO-8859-1.

readFilePS :: FilePath -> IO PackedString
readFilePS f = do h <- openBinaryFile f ReadMode
                  l <- hFileSize h
                  s <- hGetPS h $ fromIntegral l
                  hClose h
                  return s

-- -----------------------------------------------------------------------------
-- writeFilePS

-- | Write a 'PackedString' to a file.

writeFilePS :: FilePath -> PackedString -> IO ()
writeFilePS f ps = do h <- openBinaryFile f WriteMode
                      hPutPS h ps
                      hClose h

-- -----------------------------------------------------------------------------
-- gzReadFilePS

-- | Read an entire file, which may or may not be gzip compressed, directly
-- into a 'PackedString'.

foreign import ccall unsafe "static zlib.h gzopen" c_gzopen
    :: CString -> CString -> IO (Ptr ())
foreign import ccall unsafe "static zlib.h gzclose" c_gzclose
    :: Ptr () -> IO ()
foreign import ccall unsafe "static zlib.h gzread" c_gzread
    :: Ptr () -> Ptr Word8 -> Int -> IO Int
foreign import ccall unsafe "static zlib.h gzwrite" c_gzwrite
    :: Ptr () -> Ptr Word8 -> Int -> IO Int

gzReadFilePS :: FilePath -> IO PackedString
gzReadFilePS f = do
    h <- openBinaryFile f ReadMode
    header <- hGetPS h 2
    if header /= packString "\31\139"
       then do hClose h
               mmapFilePS f
       else do hSeek h SeekFromEnd (-4)
               len <- hGetLittleEndInt h
               hClose h
               withCString f $ \fstr-> withCString "rb" $ \rb-> do
                 gzf <- c_gzopen fstr rb
                 when (gzf == nullPtr) $ fail $ "problem opening file "++f
                 fp <- mallocForeignPtr len
                 debugForeignPtr fp $ "gzReadFilePS "++f
                 lread <- withForeignPtr fp $ \p -> c_gzread gzf p len
                 c_gzclose gzf
                 when (lread /= len) $ fail $ "problem gzreading file "++f
                 return $ PS fp 0 len

data LazyFile = LazyString String
              | MMappedPackedString PackedString
              | LazyPackedStrings [PackedString]

gzReadFileLazily :: FilePath -> IO LazyFile
gzReadFileLazily f = do
    h <- openBinaryFile f ReadMode
    header <- hGetPS h 2
    if header == packString "\31\139" then
        do hClose h
           withCString f $ \fstr-> withCString "rb" $ \rb-> do
               gzf <- c_gzopen fstr rb
               when (gzf == nullPtr) $ fail $ "problem opening file "++f
               let read_rest = do
                       -- We might be making too big a fp here
                       fp <- mallocForeignPtr blocksize
                       debugForeignPtr fp $ "gzReadFileLazily "++f
                       lread <- withForeignPtr fp
                              $ \p -> c_gzread gzf p blocksize
                       case lread of
                           0 -> do c_gzclose gzf
                                   return []
                           -1 -> fail $ "problem gzreading file "++f
                           l | l < blocksize -> do c_gzclose gzf
                                                   return [PS fp 0 l]
                           l -> do rest <- unsafeInterleaveIO read_rest
                                   return (PS fp 0 l:rest)
               liftM LazyPackedStrings read_rest
#if defined(__GLASGOW_HASKELL__)
        else if use_mmap then
            do hClose h
               liftM MMappedPackedString (mmapFilePS f)
#endif
        else do let read_rest = do
                        -- We might be making too big a fp here
                        fp <- mallocForeignPtr blocksize
                        debugForeignPtr fp $ "gzReadFileLazily "++f
                        lread <- withForeignPtr fp
                               $ \p -> hGetBuf h p blocksize
                        case lread of
                            0 -> return []
                            l -> do rest <- unsafeInterleaveIO read_rest
                                    return (PS fp 0 l:rest)
                rest <- unsafeInterleaveIO read_rest
                return $ LazyPackedStrings (header:rest)
    where blocksize = 1024

hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt h = do
    b1 <- ord `liftM` hGetChar h
    b2 <- ord `liftM` hGetChar h
    b3 <- ord `liftM` hGetChar h
    b4 <- ord `liftM` hGetChar h
    return $ b1 + 256*b2 + 65536*b3 + 16777216*b4

gzWriteFilePS :: FilePath -> PackedString -> IO ()
gzWriteFilePS f ps = gzWriteFilePSs f [ps]

gzWriteFilePSs :: FilePath -> [PackedString] -> IO ()
gzWriteFilePSs f pss  =
    withCString f $ \fstr -> withCString "wb" $ \wb -> do
    gzf <- c_gzopen fstr wb
    when (gzf == nullPtr) $ fail $ "problem gzopening file for write: "++f
    mapM_ (gzWriteToGzf gzf) pss `catch`
              \_ -> fail $ "problem gzwriting file: "++f
    c_gzclose gzf

gzWriteToGzf :: Ptr () -> PackedString -> IO ()
gzWriteToGzf gzf (PS x s l) = do
    lw <- withForeignPtr x $ \p -> c_gzwrite gzf (p `plusPtr` s) l
    when (lw /= l) $ fail $ "problem in gzWriteToGzf"

-- -----------------------------------------------------------------------------
-- mmapFilePS

-- | Like readFilePS, this reads an entire file directly into a
-- 'PackedString', but it is even more efficient.  It involves directly
-- mapping the file to memory.  This has the advantage that the contents of
-- the file never need to be copied.  Also, under memory pressure the page
-- may simply be discarded, wile in the case of readFilePS it would need to
-- be written to swap.  If you read many small files, mmapFilePS will be
-- less memory-efficient than readFilePS, since each mmapFilePS takes up a
-- separate page of memory.  Also, you can run into bus errors if the file
-- is modified.  NOTE: as with 'readFilePS', the string representation in
-- the file is assumed to be ISO-8859-1.

mmapFilePS :: FilePath -> IO PackedString
mmapFilePS f = if use_mmap
               then do (fp,l) <- mmap f
                       return $ PS fp 0 l
               else readFilePS f

#if defined(__GLASGOW_HASKELL__)
foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
    :: Int -> Int -> IO (Ptr Word8)
foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
    :: Ptr Word8 -> Int -> IO Int
foreign import ccall unsafe "static unistd.h close" c_close
    :: Int -> IO Int
#endif

mmap :: FilePath -> IO (ForeignPtr Word8, Int)
mmap f = do
    h <- openBinaryFile f ReadMode
    l <- fromIntegral `liftM` hFileSize h
    -- Don't bother mmaping small files because each mmapped file takes up
    -- at least one full VM block.
    if l < mmap_limit
       then do thefp <- mallocForeignPtr l
               debugForeignPtr thefp $ "mmap short file "++f
               withForeignPtr thefp $ \p-> hGetBuf h p l
               hClose h
               return (thefp, l)
       else do
#if defined(__GLASGOW_HASKELL__)
               fd <- fromIntegral `liftM` handleToFd h
               p <- my_mmap l fd
               fp <- if p == nullPtr
                     then
#else
               fp <-
#endif
                          do thefp <- mallocForeignPtr l
                             debugForeignPtr thefp $ "mmap short file "++f
                             withForeignPtr thefp $ \p' -> hGetBuf h p' l
                             return thefp
#if defined(__GLASGOW_HASKELL__)
                     else do
                             fp <- FC.newForeignPtr p (do {c_munmap p l; return (); })
                             debugForeignPtr fp $ "mmap "++f
                             return fp
               c_close fd
#endif
               hClose h
               return (fp, l)
    where mmap_limit = 16*1024


-- -------------------------------------------------------------------------
-- readIntPS

-- | readIntPS skips any whitespace at the beginning of its argument, and
-- reads an Int from the beginning of the PackedString.  If there is no
-- integer at the beginning of the string, it returns Nothing, otherwise it
-- just returns the int read, along with a PackedString containing the
-- remainder of its input.  The actual parsing is done by the standard C
-- library function strtol.

foreign import ccall unsafe "static stdlib.h strtol" c_strtol
    :: Ptr Word8 -> Ptr (Ptr Word8) -> Int -> IO CLong

readIntPS :: PackedString -> Maybe (Int, PackedString)
readIntPS (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p-> with p $ \endpp ->
    do val <- c_strtol (p `plusPtr` s) endpp 0
       skipped <- (`minusPtr` (p `plusPtr` s)) `liftM` peek endpp
       if skipped == 0
          then return Nothing
          else return $ Just (fromIntegral val,
                              PS x (s+skipped) (l-skipped))

-- -------------------------------------------------------------------------
-- fromPS2Hex

foreign import ccall unsafe "static fpstring.h conv_to_hex" conv_to_hex
    :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()

fromPS2Hex :: PackedString -> PackedString
fromPS2Hex (PS x s l) = createPS (2*l) $ \p -> withForeignPtr x $ \f ->
           conv_to_hex p (f `plusPtr` s) l

-- -------------------------------------------------------------------------
-- fromHex2PS

foreign import ccall unsafe "static fpstring.h conv_from_hex" conv_from_hex
    :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()

fromHex2PS :: PackedString -> PackedString
fromHex2PS (PS x s l) = createPS (l `div` 2) $ \p -> withForeignPtr x $ \f ->
           conv_from_hex p (f `plusPtr` s) (l `div` 2)

-- -------------------------------------------------------------------------
-- betweenLinesPS

-- | betweenLinesPS returns the PackedString between the two lines given,
-- or Nothing if they do not appear.

betweenLinesPS :: PackedString -> PackedString -> PackedString
               -> Maybe (PackedString)
betweenLinesPS start end ps
 = case break (start ==) (linesPS ps) of
       (_, _:rest@(PS ps1 s1 _:_)) ->
           case break (end ==) rest of
               (_, PS _ s2 _:_) -> Just $ PS ps1 s1 (s2 - s1)
               _ -> Nothing
       _ -> Nothing

-- -------------------------------------------------------------------------
-- break_after_nth_newline

break_after_nth_newline :: Int -> PackedString
                        -> Maybe (PackedString, PackedString)
break_after_nth_newline 0 the_ps | nullPS the_ps = Just (nilPS, nilPS)
break_after_nth_newline n the_ps@(PS fp the_s l)
 = unsafePerformIO $ withForeignPtr fp $ \p ->
   do let findit 0 s | s == end = return $ Just (the_ps, nilPS)
          findit _ s | s == end = return Nothing
          findit 0 s = let left_l = s - the_s
                       in return $ Just (PS fp the_s left_l,
                                         PS fp s (l - left_l))
          findit i s = do w <- peekElemOff p s
                          if w == nl then findit (i-1) (s+1)
                                     else findit i (s+1)
          nl = c2w '\n'
          end = the_s + l
      findit n the_s

-- -------------------------------------------------------------------------
-- break_before_nth_newline

break_before_nth_newline :: Int -> PackedString -> (PackedString, PackedString)
break_before_nth_newline 0 the_ps
 | nullPS the_ps = (nilPS, nilPS)
break_before_nth_newline n the_ps@(PS fp the_s l)
 = unsafePerformIO $ withForeignPtr fp $ \p ->
   do let findit _ s | s == end = return (the_ps, nilPS)
          findit i s = do w <- peekElemOff p s
                          if w == nl
                            then if i == 0
                                 then let left_l = s - the_s
                                      in return (PS fp the_s left_l,
                                                 PS fp s (l - left_l))
                                 else findit (i-1) (s+1)
                            else findit i (s+1)
          nl = c2w '\n'
          end = the_s + l
      findit n the_s

