module MOO.String (
MOOString
, fromText
, fromBinary
, fromString
, toText
, toCaseFold
, toBinary
, toString
, toBuilder
, toRegexp
, singleton
, empty
, append
, tail
, null
, length
, compareLength
, storageBytes
, equal
, intercalate
, foldr
, concat
, concatMap
, take
, drop
, splitAt
, breakOn
, breakOnEnd
, break
, splitOn
, words
, unwords
, validChar
, isPrefixOf
, index
) where
import Control.Applicative ((<$>))
import Data.ByteString (ByteString)
import Data.Char (isAscii, isPrint, isHexDigit, digitToInt, intToDigit)
import Data.Function (on)
import Data.Hashable (Hashable(hashWithSalt))
import Data.Monoid (Monoid(mempty, mappend, mconcat))
import Data.String (IsString(fromString))
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8, decodeUtf8)
import Data.Text.Foreign (lengthWord16)
import Data.Text.Lazy.Builder (Builder)
import Data.Typeable (Typeable)
import Data.Word (Word8, Word16)
import Database.VCache (VCacheable(put, get))
import Foreign.Storable (sizeOf)
import Prelude hiding (tail, null, length, foldr, concat, concatMap, take, drop,
splitAt, break, words, unwords)
import qualified Data.ByteString as BS
import qualified Data.Text as T
import qualified Data.Text.Lazy.Builder as TLB
import MOO.Builtins.Match (Regexp, newRegexp)
type CompiledRegexp = Either String Regexp
data CachedReps = CachedReps {
binaryData :: Maybe ByteString
, caseInsensitiveRegexp :: CompiledRegexp
, caseSensitiveRegexp :: CompiledRegexp
}
mkCachedReps :: Text -> CachedReps
mkCachedReps text = CachedReps {
binaryData = decodeBinary text
, caseInsensitiveRegexp = newRegexp text False
, caseSensitiveRegexp = newRegexp text True
}
data MOOString = MOOString {
toText :: Text
, toCaseFold :: Text
, length :: Int
, cachedReps :: CachedReps
} deriving Typeable
instance IsString MOOString where
fromString = fromText . T.pack
instance Eq MOOString where
(==) = (==) `on` toCaseFold
instance Ord MOOString where
compare = compare `on` toCaseFold
instance Hashable MOOString where
hashWithSalt salt = hashWithSalt salt . toCaseFold
instance Monoid MOOString where
mempty = empty
mappend = append
mconcat = concat
instance Show MOOString where
show = show . toText
instance VCacheable MOOString where
put = let left = Left :: a -> Either a ByteString
in put . left . encodeUtf8 . toText
get = either (fromText . decodeUtf8) fromBinary <$> get
fromText :: Text -> MOOString
fromText text = MOOString {
toText = text
, toCaseFold = caseFold text
, length = T.length text
, cachedReps = mkCachedReps text
}
fromBinary :: ByteString -> MOOString
fromBinary bytes =
let str = fromText (encodeBinary bytes)
in str { cachedReps = (cachedReps str) { binaryData = Just bytes } }
toString :: MOOString -> String
toString = T.unpack . toText
toBuilder :: MOOString -> Builder
toBuilder = TLB.fromText . toText
toBinary :: MOOString -> Maybe ByteString
toBinary = binaryData . cachedReps
toRegexp :: Bool
-> MOOString -> CompiledRegexp
toRegexp False = caseInsensitiveRegexp . cachedReps
toRegexp True = caseSensitiveRegexp . cachedReps
caseFold :: Text -> Text
caseFold text
| text == folded = text
| otherwise = folded
where folded = T.toCaseFold text
encodeBinary :: ByteString -> Text
encodeBinary = T.pack . BS.foldr encode []
where encode :: Word8 -> String -> String
encode b
| isAscii c && isPrint c && c /= '~' = (c :)
| otherwise = ('~' :) . (hex q :) . (hex r :)
where n = fromIntegral b :: Int
c = toEnum n :: Char
(q, r) = n `divMod` 16 :: (Int, Int)
hex = intToDigit :: Int -> Char
decodeBinary :: Text -> Maybe ByteString
decodeBinary = fmap BS.pack . decode . T.unpack
where decode :: String -> Maybe [Word8]
decode ('~':q:r:rest) = do
q' <- fromHex q
r' <- fromHex r
let b = 16 * q' + r'
(b :) <$> decode rest
decode ('~':_) = Nothing
decode (c:rest)
| isAscii c && isPrint c = (b :) <$> decode rest
| otherwise = Nothing
where b = fromIntegral (fromEnum c)
decode [] = Just []
fromHex :: Char -> Maybe Word8
fromHex c
| isHexDigit c = Just b
| otherwise = Nothing
where b = fromIntegral (digitToInt c)
validChar :: Char -> Bool
validChar c = isAscii c && (isPrint c || c == '\t')
singleton :: Char -> MOOString
singleton = fromText . T.singleton
storageBytes :: MOOString -> Int
storageBytes str = sizeOf (undefined :: Word16) * lengthWord16 (toText str) +
sizeOf (undefined :: Int) * 4
equal :: MOOString -> MOOString -> Bool
equal = (==) `on` toText
empty :: MOOString
empty = fromText T.empty
tail :: MOOString -> MOOString
tail = fromText . T.tail . toText
append :: MOOString -> MOOString -> MOOString
append str1 str2 = fromText $ toText str1 `T.append` toText str2
null :: MOOString -> Bool
null = T.null . toText
compareLength :: MOOString -> Int -> Ordering
compareLength str = T.compareLength (toText str)
intercalate :: MOOString -> [MOOString] -> MOOString
intercalate sep = fromText . T.intercalate (toText sep) . map toText
foldr :: (Char -> a -> a) -> a -> MOOString -> a
foldr f z = T.foldr f z . toText
concat :: [MOOString] -> MOOString
concat = fromText . T.concat . map toText
concatMap :: (Char -> MOOString) -> MOOString -> MOOString
concatMap f = fromText . T.concatMap (toText . f) . toText
take :: Int -> MOOString -> MOOString
take len = fromText . T.take len . toText
drop :: Int -> MOOString -> MOOString
drop len = fromText . T.drop len . toText
splitAt :: Int -> MOOString -> (MOOString, MOOString)
splitAt n str = (fromText prefix, fromText remainder)
where (prefix, remainder) = T.splitAt n (toText str)
breakOn :: MOOString -> MOOString -> (MOOString, MOOString)
breakOn sep str = (fromText before, fromText match)
where (before, match) = T.breakOn (toText sep) (toText str)
breakOnEnd :: MOOString -> MOOString -> (MOOString, MOOString)
breakOnEnd sep str = (fromText before, fromText match)
where (before, match) = T.breakOnEnd (toText sep) (toText str)
break :: (Char -> Bool) -> MOOString -> (MOOString, MOOString)
break p str = (fromText prefix, fromText remainder)
where (prefix, remainder) = T.break p (toText str)
splitOn :: MOOString -> MOOString -> [MOOString]
splitOn sep = map fromText . T.splitOn (toText sep) . toText
words :: MOOString -> [MOOString]
words = map fromText . T.words . toText
unwords :: [MOOString] -> MOOString
unwords = fromText . T.unwords . map toText
isPrefixOf :: MOOString -> MOOString -> Bool
isPrefixOf = T.isPrefixOf `on` toCaseFold
index :: MOOString -> Int -> Char
index str = T.index (toText str)