{-# OPTIONS_GHC -fffi -cpp #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  FastPackedString
-- Copyright   :  (c) The University of Glasgow 2001,
--                    David Roundy 2003-2005
-- License : GPL (I'm happy to also license this file BSD style but don't
--           want to bother distributing two license files with darcs.
--
-- Maintainer  :  droundy@abridgegame.org
-- Stability   :  experimental
-- Portability :  portable
--
-- An efficient implementation of strings.
--
-----------------------------------------------------------------------------

-- Original GHC implementation by Bryan O\'Sullivan,
-- rewritten to use UArray by Simon Marlow.
-- rewritten to support slices and use ForeignPtr by David Roundy

module FastPackedString (
        -- * The @PackedString@ type
        PackedString,    -- abstract, instances: Eq, Ord, Show, Typeable
        unsafeWithInternals, -- :: PackedString -> (Ptr Word8 -> Int -> IO a) -> IO a

         -- * Converting to and from @PackedString@s
    generatePS,  -- :: Int -> (Ptr Word8 -> Int -> IO Int) -> IO PackedString
        packString,  -- :: String -> PackedString
        withCStringPS, -- :: PackedString -> (CString -> IO a) -> IO a
        unsafeWithCStringLenPS, -- :: PackedString -> ((CString,Int) -> IO a) -> IO a
        packWords,   -- :: [Word8] -> PackedString
        unpackPS,    -- :: PackedString -> String
        unpackPSfromUTF8, -- :: PackedString -> String

        -- * I\/O with @PackedString@s
        hPutPS,      -- :: Handle -> PackedString -> IO ()
        hGetPS,      -- :: Handle -> Int -> IO PackedString
    hGetContentsPS, -- :: Handle -> IO PackedString
        readFilePS,  -- :: FilePath -> IO PackedString
        writeFilePS, -- :: FilePath -> PackedString -> IO ()
        gzReadFilePS,-- :: FilePath -> IO PackedString
        mmapFilePS,  -- :: FilePath -> IO PackedString
        gzWriteFilePS,  -- :: FilePath -> PackedString -> IO ()
        gzWriteFilePSs, -- :: FilePath -> [PackedString] -> IO ()

        -- * List-like manipulation functions
        nilPS,       -- :: PackedString
        ifHeadThenTail, -- :: Word8 -> PackedString -> Maybe PackedString
        headPS,      -- :: PackedString -> Char
        tailPS,      -- :: PackedString -> PackedString
        initPS,      -- :: PackedString -> PackedString
        lastPS,      -- :: PackedString -> Char
        nullPS,      -- :: PackedString -> Bool
        appendPS,    -- :: PackedString -> PackedString -> PackedString
        lengthPS,    -- :: PackedString -> Int
        indexPS,     -- :: PackedString -> Int -> Char
        indexPSW,    -- :: PackedString -> Int -> Word8
        reversePS,   -- :: PackedString -> PackedString
        concatPS,    -- :: [PackedString] -> PackedString
        takePS,      -- :: Int -> PackedString -> PackedString
        dropPS,      -- :: Int -> PackedString -> PackedString
        splitAtPS,   -- :: Int -> PackedString -> (PackedString, PackedString)

        anyPS,
        takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
        dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
        dropWhitePS, -- :: PackedString -> PackedString
        breakWhitePS,-- :: PackedString -> Maybe (PackedString,PackedString)
        spanEndPS,   -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
        breakPS,     -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
        breakOnPS,   -- :: Char -> PackedString -> (PackedString, PackedString)
        linesPS,     -- :: PackedString -> [PackedString]
        unlinesPS,     -- :: [PackedString] -> PackedString
        findPS,
        hashPS,

        splitPS,     -- :: Char -> PackedString -> [PackedString]

        breakFirstPS,-- :: Char -> PackedString -> Maybe (PackedString,PackedString)
        breakLastPS, -- :: Char -> PackedString -> Maybe (PackedString,PackedString)
        substrPS,    -- :: PackedString -> PackedString -> Maybe Int
        readIntPS,   -- :: PackedString -> Maybe (Int, PackedString)
        is_funky,    -- :: PackedString -> Bool
        fromHex2PS,  -- :: PackedString -> PackedString
        fromPS2Hex,  -- :: PackedString -> PackedString
        betweenLinesPS,--  :: PackedString -> PackedString -> PackedString -> Maybe (PackedString)
        break_after_nth_newline,
        break_before_nth_newline,
    ) where

#ifndef HAVE_BYTESTRING
import OldFastPackedString
#else
import System.IO ( Handle, hClose, hFileSize, IOMode(ReadMode),
            hSeek, SeekMode(SeekFromEnd), hGetChar )
import Autoconf ( use_mmap )

import Foreign.Storable ( peekElemOff, peek )
import Foreign.Marshal.Alloc ( free )
import Foreign.Marshal.Array ( pokeArray, mallocArray,
                               peekArray, advancePtr )
import Foreign.C.String
import Foreign.C.Types ( CInt, CSize )
import Data.Bits ( rotateL )
import Data.Char
import Data.Word
import Data.Int ( Int32 )
import qualified Data.List as L
import Control.Monad ( liftM, when )

import System.IO.Unsafe ( unsafePerformIO )
import System.IO ( hGetBuf )

import System.IO ( openBinaryFile, )

import Foreign.Ptr ( nullPtr, plusPtr, Ptr )
import Foreign.ForeignPtr( ForeignPtr, withForeignPtr )
#if defined(__GLASGOW_HASKELL__)
import qualified Foreign.Concurrent as FC ( newForeignPtr )
import System.Posix ( handleToFd )
#endif

#ifdef DEBUG_PS
import Foreign.ForeignPtr ( addForeignPtrFinalizer )
import Foreign.Ptr ( FunPtr )
#endif
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Unsafe as BU
#if HAVE_HASKELL_ZLIB
import qualified Data.ByteString.Lazy as BL
import qualified Codec.Compression.GZip as GZ
#endif

debugForeignPtr :: ForeignPtr a -> String -> IO ()
#ifdef DEBUG_PS
foreign import ccall unsafe "static fpstring.h debug_alloc" debug_alloc
    :: Ptr a -> CString -> IO ()
foreign import ccall unsafe "static fpstring.h & debug_free" debug_free
    :: FunPtr (Ptr a -> IO ())
debugForeignPtr fp n =
    withCString n $ \cname-> withForeignPtr fp $ \p->
    do debug_alloc p cname
       addForeignPtrFinalizer debug_free fp
#else
debugForeignPtr _ _ = return ()
#endif

mallocForeignPtr :: Int -> IO (ForeignPtr Word8)
mallocForeignPtr = BI.mallocByteString

----------------------------------------------------------------------------
--A way of creating ForeignPtrs outside the IO monad (althogh it still
--isn't entirely "safe", but at least it's convenient.

createPS :: Int -> (Ptr Word8 -> IO ()) -> PackedString
createPS = BI.unsafeCreate

-- -----------------------------------------------------------------------------
-- PackedString type declaration

-- | A space-efficient representation of a 'String', which supports various
-- efficient operations.  A 'PackedString' contains full Unicode 'Char's.
type PackedString = BC.ByteString

-- -----------------------------------------------------------------------------
-- unsafeWithInternals

-- | Do something with the internals of a PackedString. Beware of
-- altering the contents!
unsafeWithInternals :: PackedString -> (Ptr Word8 -> Int -> IO a) -> IO a
unsafeWithInternals ps f
 = case BI.toForeignPtr ps of
   (fp,s,l) -> withForeignPtr fp $ \p -> f (p `plusPtr` s) l

-- -----------------------------------------------------------------------------
-- generatePS

-- | Given the maximum size needed and a function to make the contents
-- of a PackedString, generatePS makes the PackedString. The generating
-- function is required to return the actual size (<= the maximum size).

generatePS :: Int -> (Ptr Word8 -> IO Int) -> IO PackedString
generatePS = BI.createAndTrim

-- -----------------------------------------------------------------------------
-- Constructor functions

nilPS :: PackedString
nilPS = BC.empty

-- | Convert a 'String' into a 'PackedString'
packString :: String -> PackedString
packString = BC.pack

packWords :: [Word8] -> PackedString
packWords s = createPS (length s) $ \p -> pokeArray p s

withCStringPS :: PackedString -> (CString -> IO a) -> IO a
withCStringPS = BC.useAsCString

unsafeWithCStringLenPS :: PackedString -> ((CString, Int) -> IO a) -> IO a
unsafeWithCStringLenPS = BU.unsafeUseAsCStringLen

-- -----------------------------------------------------------------------------
-- Destructor functions (taking PackedStrings apart)

-- | Convert a 'PackedString' into a 'String'
unpackPS :: PackedString -> String
unpackPS = BC.unpack

unpackPSfromUTF8 :: PackedString -> String
unpackPSfromUTF8 ps =
 case BI.toForeignPtr ps of
   (_,_, 0) -> ""
   (x,s,l)  ->
    unsafePerformIO $ withForeignPtr x $ \p->
    do outbuf <- mallocArray l
       lout <- fromIntegral `liftM`
               utf8_to_ints outbuf (p `plusPtr` s) (fromIntegral l)
       when (lout < 0) $ error "Bad UTF8!"
       str <- (map (chr . fromIntegral)) `liftM` peekArray lout outbuf
       free outbuf
       return str

foreign import ccall unsafe "static fpstring.h utf8_to_ints" utf8_to_ints
    :: Ptr Int -> Ptr Word8 -> CInt -> IO CInt

-- -----------------------------------------------------------------------------
-- List-mimicking functions for PackedStrings

{-# INLINE lengthPS #-}
lengthPS :: PackedString -> Int
lengthPS = BC.length

{-# INLINE indexPSW #-}
indexPSW :: PackedString -> Int -> Word8
indexPSW ps i = BI.c2w $ BC.index ps i

{-# INLINE indexPS #-}
indexPS :: PackedString -> Int -> Char
indexPS p i = BC.index p i

{-# INLINE lastPS #-}
lastPS :: PackedString -> Char
lastPS = BC.last

{-# INLINE ifHeadThenTail #-}
ifHeadThenTail :: Word8 -> PackedString -> Maybe PackedString
ifHeadThenTail w ps =
   case BI.toForeignPtr ps of
   (x,s,l) ->
    if l > 0 && w == unsafePerformIO (withForeignPtr x $ \p -> peekElemOff p s)
    then Just $ BI.fromForeignPtr x (s+1) (l-1)
    else Nothing

{-# INLINE headPS #-}
headPS :: PackedString -> Char
headPS = BC.head

-- | Extract the elements after the head of a packed string, which must be non-empty.
{-# INLINE tailPS #-}
tailPS :: PackedString -> PackedString
tailPS = BC.tail

-- | Return all the elements of a packed string except the last one.
-- The string must be finite and non-empty.
{-# INLINE initPS #-}
initPS :: PackedString -> PackedString
initPS = BC.init

{-# INLINE nullPS #-}
nullPS :: PackedString -> Bool
nullPS = BC.null

appendPS :: PackedString -> PackedString -> PackedString
appendPS = BC.append

{-# INLINE takePS #-}
takePS :: Int -> PackedString -> PackedString
takePS = BC.take

{-# INLINE dropPS #-}
dropPS  :: Int -> PackedString -> PackedString
dropPS = BC.drop

{-# INLINE splitAtPS #-}
splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
splitAtPS  = BC.splitAt

-- This must be fast, it's used heavily in Printer. -- jch
anyPS :: (Char -> Bool) -> PackedString -> Bool
anyPS = BC.any

{-# INLINE takeWhilePS #-}
takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
takeWhilePS = BC.takeWhile

{-# INLINE dropWhilePS #-}
dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
dropWhilePS = BC.dropWhile

{-# INLINE dropWhitePS #-}
dropWhitePS :: PackedString -> PackedString
dropWhitePS ps =
   case BI.toForeignPtr ps of
   (x,s,l) ->
    unsafePerformIO $ withForeignPtr x $ \p->
    do i <- fromIntegral `liftM`
            first_nonwhite (p `plusPtr` s) (fromIntegral l)
       return $ if i == l then nilPS
                else BI.fromForeignPtr x (s+i) (l-i)

foreign import ccall unsafe "fpstring.h first_nonwhite" first_nonwhite
    :: Ptr Word8 -> CInt -> IO CInt
foreign import ccall unsafe "fpstring.h first_white" first_white
    :: Ptr Word8 -> CInt -> IO CInt

{-# INLINE is_funky #-}
is_funky :: PackedString -> Bool
is_funky ps = case BI.toForeignPtr ps of
   (x,s,l) ->
    unsafePerformIO $ withForeignPtr x $ \p->
    (/=0) `liftM` has_funky_char (p `plusPtr` s) (fromIntegral l)

foreign import ccall unsafe "fpstring.h has_funky_char" has_funky_char
    :: Ptr Word8 -> CInt -> IO CInt

spanEndPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
spanEndPS  = BC.spanEnd

breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
breakPS = BC.break

{-# INLINE breakOnPS #-}
breakOnPS :: Char -> PackedString -> (PackedString, PackedString)
breakOnPS c p = case findPS c p of
                Nothing -> (p,nilPS)
                Just n -> (takePS n p, dropPS n p)

{-# INLINE hashPS #-}
hashPS :: PackedString -> Int32
hashPS ps =
   case BI.toForeignPtr ps of
   (x,s,l) ->
    unsafePerformIO $ withForeignPtr x $ \p->
    do hash (p `plusPtr` s) l

hash :: Ptr Word8 -> Int -> IO Int32
hash ptr len = f (0 :: Int32) ptr len
 where f h _ 0 = return h
       f h p n = do x <- peek p
                    let h' =  (fromIntegral x) + (rotateL h 8)
                    f h' (p `advancePtr` 1) (n-1)

{-# INLINE substrPS #-}
substrPS :: PackedString -> PackedString -> Maybe Int
substrPS tok str
    | nullPS tok = Just 0
    | lengthPS tok > lengthPS str = Nothing
    | otherwise = do n <- findPS (headPS tok) str
                     let ttok = tailPS tok
                         reststr = dropPS (n+1) str
                     if ttok == takePS (lengthPS ttok) reststr
                        then Just n
                        else ((n+1)+) `fmap` substrPS tok reststr

{-# INLINE breakWhitePS #-}
breakWhitePS :: PackedString -> (PackedString,PackedString)
breakWhitePS ps =
   case BI.toForeignPtr ps of
   (x,s,l) ->
    unsafePerformIO $ withForeignPtr x $ \p->
    do i <- fromIntegral `liftM` first_white (p `plusPtr` s) (fromIntegral l)
       if i == 0 then return (nilPS, BI.fromForeignPtr x s l)
                 else if i == l
                      then return (BI.fromForeignPtr x s l, nilPS)
                      else return (BI.fromForeignPtr x s i, BI.fromForeignPtr x (s+i) (l-i))

-- TODO: replace breakFirstPS and breakLastPS with definitions based on
-- ByteString's break/breakEnd
{-# INLINE breakFirstPS #-}
breakFirstPS :: Char -> PackedString -> Maybe (PackedString,PackedString)
breakFirstPS c p = case findPS c p of
                   Nothing -> Nothing
                   Just n -> Just (takePS n p, dropPS (n+1) p)

{-# INLINE breakLastPS #-}
breakLastPS :: Char -> PackedString -> Maybe (PackedString,PackedString)
breakLastPS c p = case findLastPS c p of
                  Nothing -> Nothing
                  Just n -> Just (takePS n p, dropPS (n+1) p)

-- TODO: rename
{-# INLINE linesPS #-}
linesPS :: PackedString -> [PackedString]
linesPS ps
         | ps == BC.empty = [BC.pack ""]
         | otherwise = BC.split '\n' ps
{- QuickCheck property:

import Test.QuickCheck
import qualified Data.ByteString.Char8 as BC
import Data.Char
instance Arbitrary BC.ByteString where
    arbitrary = fmap BC.pack arbitrary
instance Arbitrary Char where
  arbitrary = chr `fmap` choose (32,127)
deepCheck = check (defaultConfig { configMaxTest = 10000})
testLines =  deepCheck (\x -> (linesPS x == linesPSOld x))
linesPSOld ps = case  BC.elemIndex '\n' ps of
             Nothing -> [ps]
             Just n -> takePS n ps : linesPS (dropPS (n+1) ps) -}

{-| This function acts exactly like the "Prelude" unlines function, or like
"Data.ByteString.Char8" 'unlines', but with one important difference: it will
produce a string which may not end with a newline! That is:

> unlinesPS ["foo", "bar"]

evaluates to \"foo\nbar\", not \"foo\nbar\n"! This point should hold true for
'linesPS' as well.

TODO: rename this function. -}
{-# INLINE unlinesPS #-}
unlinesPS :: [PackedString] -> PackedString
unlinesPS [] = BC.empty
unlinesPS x  = BC.init $ BC.unlines x
{- QuickCheck property:

testUnlines = deepCheck (\x -> (unlinesPS x == unlinesPSOld x))
unlinesPSOld ss = BC.concat $ intersperse_newlines ss
    where intersperse_newlines (a:b:s) = a : newline : intersperse_newlines (b:s)
          intersperse_newlines s = s
          newline = BC.pack "\n" -}

reversePS :: PackedString -> PackedString
reversePS = BC.reverse

concatPS :: [PackedString] -> PackedString
concatPS = BC.concat

{-# INLINE findPS #-}
findPS :: Char -> PackedString -> Maybe Int
findPS = BC.elemIndex

{-# INLINE findLastPS #-}
findLastPS :: Char -> PackedString -> Maybe Int
findLastPS = BC.elemIndexEnd

------------------------------------------------------------

{-# INLINE splitPS #-}
splitPS :: Char -> PackedString -> [PackedString]
splitPS = BC.split

-- -----------------------------------------------------------------------------
-- hPutPS

-- | Outputs a 'PackedString' to the specified 'Handle'.
--
-- NOTE: the representation of the 'PackedString' in the file is assumed to
-- be in the ISO-8859-1 encoding.  In other words, only the least signficant
-- byte is taken from each character in the 'PackedString'.
hPutPS :: Handle -> PackedString -> IO ()
hPutPS = B.hPut

-- -----------------------------------------------------------------------------
-- hGetPS

-- | Read a 'PackedString' directly from the specified 'Handle'.  This
-- is far more efficient than reading the characters into a 'String'
-- and then using 'packString'.
--
-- NOTE: as with 'hPutPS', the string representation in the file is
-- assumed to be ISO-8859-1.
hGetPS :: Handle -> Int -> IO PackedString
hGetPS = B.hGet

-- -----------------------------------------------------------------------------
-- hGetContentsPS

-- | Read entire handle contents into a 'PackedString'. Note that since we use strict
-- ByteString, hGetContentsPS is not lazy either (unlike most 'getContents' functions).
--
-- NOTE: as with 'hGetPS', the string representation in the file is
-- assumed to be ISO-8859-1.
hGetContentsPS :: Handle -> IO PackedString
hGetContentsPS = B.hGetContents -- ratify hGetContents: just a Data.ByteString import

-- -----------------------------------------------------------------------------
-- readFilePS

-- | Read an entire file directly into a 'PackedString'.  This is far more
-- efficient than reading the characters into a 'String' and then using
-- 'packString'.  It also may be more efficient than opening the file and
-- reading it using hGetPS.
--
-- NOTE: as with 'hGetPS', the string representation in the file is
-- assumed to be ISO-8859-1.

readFilePS :: FilePath -> IO PackedString
readFilePS = B.readFile -- ratify readFile: just a Data.ByteString import

-- -----------------------------------------------------------------------------
-- writeFilePS

-- | Write a 'PackedString' to a file.

writeFilePS :: FilePath -> PackedString -> IO ()
writeFilePS = B.writeFile

-- -----------------------------------------------------------------------------
-- gzReadFilePS

-- | Read an entire file, which may or may not be gzip compressed, directly
-- into a 'PackedString'.

#ifndef HAVE_HASKELL_ZLIB
foreign import ccall unsafe "static zlib.h gzopen" c_gzopen
    :: CString -> CString -> IO (Ptr ())
foreign import ccall unsafe "static zlib.h gzclose" c_gzclose
    :: Ptr () -> IO ()
foreign import ccall unsafe "static zlib.h gzread" c_gzread
    :: Ptr () -> Ptr Word8 -> CInt -> IO CInt
foreign import ccall unsafe "static zlib.h gzwrite" c_gzwrite
    :: Ptr () -> Ptr Word8 -> CInt -> IO CInt
#endif

gzReadFilePS :: FilePath -> IO PackedString
gzReadFilePS f = do
    h <- openBinaryFile f ReadMode
    header <- hGetPS h 2
    if header /= packString "\31\139"
       then do hClose h
               mmapFilePS f
       else do hSeek h SeekFromEnd (-4)
               len <- hGetLittleEndInt h
               hClose h
#ifdef HAVE_HASKELL_ZLIB
               -- for now we ignore the length, but zlib will be modified
               -- to add an API to say what length the result will be so
               -- that BL.toChunks only produces one chunk, which in turn
               -- means that B.concat won't need to copy data
               -- the dummy use of len is to avoid a compiler warning
               liftM (B.concat . BL.toChunks . const GZ.decompress len) $ BL.readFile f -- ratify readFile: immediately consumed by the conversion to a strict bytestring
#else
               withCString f $ \fstr-> withCString "rb" $ \rb-> do
                 gzf <- c_gzopen fstr rb
                 when (gzf == nullPtr) $ fail $ "problem opening file "++f
                 fp <- mallocForeignPtr len
                 debugForeignPtr fp $ "gzReadFilePS "++f
                 lread <- withForeignPtr fp $ \p ->
                          c_gzread gzf p (fromIntegral len)
                 c_gzclose gzf
                 when (fromIntegral lread /= len) $
                      fail $ "problem gzreading file "++f
                 return $ BI.fromForeignPtr fp 0 len
#endif

hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt h = do
    b1 <- ord `liftM` hGetChar h
    b2 <- ord `liftM` hGetChar h
    b3 <- ord `liftM` hGetChar h
    b4 <- ord `liftM` hGetChar h
    return $ b1 + 256*b2 + 65536*b3 + 16777216*b4

gzWriteFilePS :: FilePath -> PackedString -> IO ()
gzWriteFilePS f ps = gzWriteFilePSs f [ps]

gzWriteFilePSs :: FilePath -> [PackedString] -> IO ()
gzWriteFilePSs f pss  =
#ifdef HAVE_HASKELL_ZLIB
    BL.writeFile f $ GZ.compress $ BL.fromChunks pss
#else
    withCString f $ \fstr -> withCString "wb" $ \wb -> do
    gzf <- c_gzopen fstr wb
    when (gzf == nullPtr) $ fail $ "problem gzopening file for write: "++f
    mapM_ (gzWriteToGzf gzf) pss `catch`
              \_ -> fail $ "problem gzwriting file: "++f
    c_gzclose gzf

gzWriteToGzf :: Ptr () -> PackedString -> IO ()
gzWriteToGzf gzf ps = case BI.toForeignPtr ps of
 (x,s,l) -> do
    lw <- withForeignPtr x $ \p -> c_gzwrite gzf (p `plusPtr` s)
                                                 (fromIntegral l)
    when (fromIntegral lw /= l) $ fail $ "problem in gzWriteToGzf"
#endif

-- -----------------------------------------------------------------------------
-- mmapFilePS

-- | Like readFilePS, this reads an entire file directly into a
-- 'PackedString', but it is even more efficient.  It involves directly
-- mapping the file to memory.  This has the advantage that the contents of
-- the file never need to be copied.  Also, under memory pressure the page
-- may simply be discarded, wile in the case of readFilePS it would need to
-- be written to swap.  If you read many small files, mmapFilePS will be
-- less memory-efficient than readFilePS, since each mmapFilePS takes up a
-- separate page of memory.  Also, you can run into bus errors if the file
-- is modified.  NOTE: as with 'readFilePS', the string representation in
-- the file is assumed to be ISO-8859-1.

mmapFilePS :: FilePath -> IO PackedString
mmapFilePS f = if use_mmap
               then do (fp,l) <- mmap f
                       return $ BI.fromForeignPtr fp 0 l
               else readFilePS f

#if defined(__GLASGOW_HASKELL__)
foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
    :: CSize -> CInt -> IO (Ptr Word8)
foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
    :: Ptr Word8 -> CSize -> IO CInt
foreign import ccall unsafe "static unistd.h close" c_close
    :: CInt -> IO CInt
#endif

mmap :: FilePath -> IO (ForeignPtr Word8, Int)
mmap f = do
    h <- openBinaryFile f ReadMode
    l <- fromIntegral `liftM` hFileSize h
    -- Don't bother mmaping small files because each mmapped file takes up
    -- at least one full VM block.
    if l < mmap_limit
       then do thefp <- mallocForeignPtr l
               debugForeignPtr thefp $ "mmap short file "++f
               withForeignPtr thefp $ \p-> hGetBuf h p l
               hClose h
               return (thefp, l)
       else do
#if defined(__GLASGOW_HASKELL__)
               fd <- fromIntegral `liftM` handleToFd h
               p <- my_mmap (fromIntegral l) fd
               fp <- if p == nullPtr
                     then
#else
               fp <-
#endif
                          do thefp <- mallocForeignPtr l
                             debugForeignPtr thefp $ "mmap short file "++f
                             withForeignPtr thefp $ \p' -> hGetBuf h p' l
                             return thefp
#if defined(__GLASGOW_HASKELL__)
                     else do
                             fp <- FC.newForeignPtr p
                                   (do {c_munmap p $ fromIntegral l;
                                        return (); })
                             debugForeignPtr fp $ "mmap "++f
                             return fp
               c_close fd
#endif
               hClose h
               return (fp, l)
    where mmap_limit = 16*1024


-- -------------------------------------------------------------------------
-- readIntPS

-- | readIntPS skips any whitespace at the beginning of its argument, and
-- reads an Int from the beginning of the PackedString.  If there is no
-- integer at the beginning of the string, it returns Nothing, otherwise it
-- just returns the int read, along with a PackedString containing the
-- remainder of its input.

readIntPS :: PackedString -> Maybe (Int, PackedString)
readIntPS = BC.readInt . BC.dropWhile isSpace

-- -------------------------------------------------------------------------
-- fromPS2Hex

foreign import ccall unsafe "static fpstring.h conv_to_hex" conv_to_hex
    :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()

fromPS2Hex :: PackedString -> PackedString
fromPS2Hex ps = case BI.toForeignPtr ps of
          (x,s,l) ->
           createPS (2*l) $ \p -> withForeignPtr x $ \f ->
           conv_to_hex p (f `plusPtr` s) $ fromIntegral l

-- -------------------------------------------------------------------------
-- fromHex2PS

foreign import ccall unsafe "static fpstring.h conv_from_hex" conv_from_hex
    :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()

fromHex2PS :: PackedString -> PackedString
fromHex2PS ps = case BI.toForeignPtr ps of
          (x,s,l) ->
           createPS (l `div` 2) $ \p -> withForeignPtr x $ \f ->
           conv_from_hex p (f `plusPtr` s) (fromIntegral $ l `div` 2)

-- -------------------------------------------------------------------------
-- betweenLinesPS

-- | betweenLinesPS returns the PackedString between the two lines given,
-- or Nothing if they do not appear.

betweenLinesPS :: PackedString -> PackedString -> PackedString
               -> Maybe (PackedString)
betweenLinesPS start end ps
 = case break (start ==) (linesPS ps) of
       (_, _:rest@(bs1:_)) ->
           case BI.toForeignPtr bs1 of
            (ps1,s1,_) ->
             case break (end ==) rest of
               (_, bs2:_) -> case BI.toForeignPtr bs2 of (_,s2,_) -> Just $ BI.fromForeignPtr ps1 s1 (s2 - s1)
               _ -> Nothing
       _ -> Nothing

-- -------------------------------------------------------------------------
-- break_after_nth_newline

break_after_nth_newline :: Int -> PackedString
                        -> Maybe (PackedString, PackedString)
break_after_nth_newline 0 the_ps | nullPS the_ps = Just (nilPS, nilPS)
break_after_nth_newline n the_ps =
  case BI.toForeignPtr the_ps of
  (fp,the_s,l) ->
   unsafePerformIO $ withForeignPtr fp $ \p ->
   do let findit 0 s | s == end = return $ Just (the_ps, nilPS)
          findit _ s | s == end = return Nothing
          findit 0 s = let left_l = s - the_s
                       in return $ Just (BI.fromForeignPtr fp the_s left_l,
                                         BI.fromForeignPtr fp s (l - left_l))
          findit i s = do w <- peekElemOff p s
                          if w == nl then findit (i-1) (s+1)
                                     else findit i (s+1)
          nl = BI.c2w '\n'
          end = the_s + l
      findit n the_s

-- -------------------------------------------------------------------------
-- break_before_nth_newline

break_before_nth_newline :: Int -> PackedString -> (PackedString, PackedString)
break_before_nth_newline 0 the_ps
 | nullPS the_ps = (nilPS, nilPS)
break_before_nth_newline n the_ps =
 case BI.toForeignPtr the_ps of
 (fp,the_s,l) ->
   unsafePerformIO $ withForeignPtr fp $ \p ->
   do let findit _ s | s == end = return (the_ps, nilPS)
          findit i s = do w <- peekElemOff p s
                          if w == nl
                            then if i == 0
                                 then let left_l = s - the_s
                                      in return (BI.fromForeignPtr fp the_s left_l,
                                                 BI.fromForeignPtr fp s (l - left_l))
                                 else findit (i-1) (s+1)
                            else findit i (s+1)
          nl = BI.c2w '\n'
          end = the_s + l
      findit n the_s
#endif
