{-# LANGUAGE DeriveDataTypeable #-}

-- | Abstract MOO string type
module MOO.String (
    MOOString

  -- * Creation and elimination
  , fromText
  , fromBinary
  , fromString
  , toText
  , toCaseFold
  , toBinary
  , toString
  , toBuilder
  , toRegexp
  , singleton
  , empty

  -- * Basic interface
  , append
  , tail
  , null
  , length
  , compareLength
  , storageBytes
  , equal

  -- * Transformations
  , intercalate

  -- * Folds
  , foldr
  -- ** Special folds
  , concat
  , concatMap

  -- * Substrings
  -- ** Breaking strings
  , take
  , drop
  , splitAt
  , breakOn
  , breakOnEnd
  , break
  -- ** Breaking into many substrings
  , splitOn
  -- ** Breaking into lines and words
  , words
  , unwords

  -- * Predicates
  , validChar
  , isPrefixOf

  -- * Indexing
  , index
  ) where

import Control.Applicative ((<$>))
import Data.ByteString (ByteString)
import Data.Char (isAscii, isPrint, isHexDigit, digitToInt, intToDigit)
import Data.Function (on)
import Data.Hashable (Hashable(hashWithSalt))
import Data.Monoid (Monoid(mempty, mappend, mconcat))
import Data.String (IsString(fromString))
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8, decodeUtf8)
import Data.Text.Foreign (lengthWord16)
import Data.Text.Lazy.Builder (Builder)
import Data.Typeable (Typeable)
import Data.Word (Word8, Word16)
import Database.VCache (VCacheable(put, get))
import Foreign.Storable (sizeOf)
import Prelude hiding (tail, null, length, foldr, concat, concatMap, take, drop,
                       splitAt, break, words, unwords)

import qualified Data.ByteString as BS
import qualified Data.Text as T
import qualified Data.Text.Lazy.Builder as TLB

import MOO.Builtins.Match (Regexp, newRegexp)

type CompiledRegexp = Either String Regexp

data CachedReps = CachedReps {
    binaryData            :: Maybe ByteString
  , caseInsensitiveRegexp :: CompiledRegexp
  , caseSensitiveRegexp   :: CompiledRegexp
  }

mkCachedReps :: Text -> CachedReps
mkCachedReps text = CachedReps {
    binaryData            = decodeBinary text
  , caseInsensitiveRegexp = newRegexp text False
  , caseSensitiveRegexp   = newRegexp text True
  }

data MOOString = MOOString {
    toText     :: Text
  , toCaseFold :: Text
  , length     :: Int
  , cachedReps :: CachedReps
  } deriving Typeable

instance IsString MOOString where
  fromString = fromText . T.pack

instance Eq MOOString where
  (==) = (==) `on` toCaseFold

instance Ord MOOString where
  compare = compare `on` toCaseFold

instance Hashable MOOString where
  hashWithSalt salt = hashWithSalt salt . toCaseFold

instance Monoid MOOString where
  mempty  = empty
  mappend = append
  mconcat = concat

instance Show MOOString where
  show = show . toText

instance VCacheable MOOString where
  put = let left = Left :: a -> Either a ByteString
        in put . left . encodeUtf8 . toText
  get = either (fromText . decodeUtf8) fromBinary <$> get

fromText :: Text -> MOOString
fromText text = MOOString {
    toText     = text
  , toCaseFold = caseFold text
  , length     = T.length text
  , cachedReps = mkCachedReps text
  }

fromBinary :: ByteString -> MOOString
fromBinary bytes =
  let str = fromText (encodeBinary bytes)
  in str { cachedReps = (cachedReps str) { binaryData = Just bytes } }

toString :: MOOString -> String
toString = T.unpack . toText

toBuilder :: MOOString -> Builder
toBuilder = TLB.fromText . toText

toBinary :: MOOString -> Maybe ByteString
toBinary = binaryData . cachedReps

toRegexp :: Bool  -- ^ case-matters
         -> MOOString -> CompiledRegexp
toRegexp False = caseInsensitiveRegexp . cachedReps
toRegexp True  = caseSensitiveRegexp   . cachedReps

-- | Case-fold the argument, returning the same argument if the result is
-- unchanged to avoid wasting memory.
caseFold :: Text -> Text
caseFold text
  | text == folded = text
  | otherwise      = folded
  where folded = T.toCaseFold text

-- | Encode a MOO /binary string/.
encodeBinary :: ByteString -> Text
encodeBinary = T.pack . BS.foldr encode []
  where encode :: Word8 -> String -> String
        encode b
          | isAscii c && isPrint c && c /= '~' = (c :)
          | otherwise                          = ('~' :) . (hex q :) . (hex r :)
          where n      = fromIntegral b :: Int
                c      = toEnum n       :: Char
                (q, r) = n `divMod` 16  :: (Int, Int)
                hex    = intToDigit     :: Int -> Char

-- | Decode a MOO /binary string/ or return 'Nothing' if the string is
-- improperly formatted.
decodeBinary :: Text -> Maybe ByteString
decodeBinary = fmap BS.pack . decode . T.unpack
  where decode :: String -> Maybe [Word8]
        decode ('~':q:r:rest) = do
          q' <- fromHex q
          r' <- fromHex r
          let b = 16 * q' + r'
          (b :) <$> decode rest
        decode ('~':_) = Nothing
        decode (c:rest)
          | isAscii c && isPrint c = (b :) <$> decode rest
          | otherwise              = Nothing
          where b = fromIntegral (fromEnum c)
        decode [] = Just []

        fromHex :: Char -> Maybe Word8
        fromHex c
          | isHexDigit c = Just b
          | otherwise    = Nothing
          where b = fromIntegral (digitToInt c)

-- | May the given character appear in a MOO string?
validChar :: Char -> Bool
validChar c = isAscii c && (isPrint c || c == '\t')

singleton :: Char -> MOOString
singleton = fromText . T.singleton

storageBytes :: MOOString -> Int
storageBytes str = sizeOf (undefined :: Word16) * lengthWord16 (toText str) +
                   sizeOf (undefined :: Int) * 4

-- | Test two strings for indistinguishable (case-sensitive) equality.
equal :: MOOString -> MOOString -> Bool
equal = (==) `on` toText

empty :: MOOString
empty = fromText T.empty

tail :: MOOString -> MOOString
tail = fromText . T.tail . toText

append :: MOOString -> MOOString -> MOOString
append str1 str2 = fromText $ toText str1 `T.append` toText str2

null :: MOOString -> Bool
null = T.null . toText

compareLength :: MOOString -> Int -> Ordering
compareLength str = T.compareLength (toText str)

intercalate :: MOOString -> [MOOString] -> MOOString
intercalate sep = fromText . T.intercalate (toText sep) . map toText

foldr :: (Char -> a -> a) -> a -> MOOString -> a
foldr f z = T.foldr f z . toText

concat :: [MOOString] -> MOOString
concat = fromText . T.concat . map toText

concatMap :: (Char -> MOOString) -> MOOString -> MOOString
concatMap f = fromText . T.concatMap (toText . f) . toText

take :: Int -> MOOString -> MOOString
take len = fromText . T.take len . toText

drop :: Int -> MOOString -> MOOString
drop len = fromText . T.drop len . toText

splitAt :: Int -> MOOString -> (MOOString, MOOString)
splitAt n str = (fromText prefix, fromText remainder)
  where (prefix, remainder) = T.splitAt n (toText str)

-- XXX Need caseless versions...

breakOn :: MOOString -> MOOString -> (MOOString, MOOString)
breakOn sep str = (fromText before, fromText match)
  where (before, match) = T.breakOn (toText sep) (toText str)

breakOnEnd :: MOOString -> MOOString -> (MOOString, MOOString)
breakOnEnd sep str = (fromText before, fromText match)
  where (before, match) = T.breakOnEnd (toText sep) (toText str)

break :: (Char -> Bool) -> MOOString -> (MOOString, MOOString)
break p str = (fromText prefix, fromText remainder)
  where (prefix, remainder) = T.break p (toText str)

splitOn :: MOOString -> MOOString -> [MOOString]
splitOn sep = map fromText . T.splitOn (toText sep) . toText

--

words :: MOOString -> [MOOString]
words = map fromText . T.words . toText

unwords :: [MOOString] -> MOOString
unwords = fromText . T.unwords . map toText

isPrefixOf :: MOOString -> MOOString -> Bool
isPrefixOf = T.isPrefixOf `on` toCaseFold

index :: MOOString -> Int -> Char
index str =  T.index (toText str)