BSON datastructure and encode/decode is initially in place
This commit is contained in:
parent
777fab6d5e
commit
b3b6ca4d02
1 changed files with 224 additions and 0 deletions
224
BSON.hs
Normal file
224
BSON.hs
Normal file
|
@ -0,0 +1,224 @@
|
||||||
|
|
||||||
|
import Control.Monad
|
||||||
|
import Data.Binary
|
||||||
|
import Data.Binary.Get
|
||||||
|
import Data.Binary.IEEE754
|
||||||
|
import Data.Binary.Put
|
||||||
|
import Data.Bits
|
||||||
|
import Data.ByteString.Char8
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as LC
|
||||||
|
import qualified Data.ByteString.Lazy.UTF8 as L8
|
||||||
|
import Data.Char (chr, ord)
|
||||||
|
import Data.Int
|
||||||
|
import qualified Data.List as List
|
||||||
|
import Data.Time.Clock.POSIX
|
||||||
|
|
||||||
|
data BSValue
|
||||||
|
= BSDouble Double
|
||||||
|
| BSString L8.ByteString
|
||||||
|
| BSObject BSONObject
|
||||||
|
| BSArray [BSValue]
|
||||||
|
| BSUndefined
|
||||||
|
| BSBinary BinarySubType L.ByteString
|
||||||
|
| BSObjectId L.ByteString
|
||||||
|
| BSBool !Bool
|
||||||
|
| BSDate POSIXTime
|
||||||
|
| BSNull
|
||||||
|
| BSRegex L8.ByteString String
|
||||||
|
| BSSymbol L8.ByteString
|
||||||
|
| BSInt32 Int32
|
||||||
|
| BSInt64 Int64
|
||||||
|
| BSMinKey
|
||||||
|
| BSMaxKey
|
||||||
|
deriving (Show, Eq, Ord)
|
||||||
|
|
||||||
|
newtype BSONObject = BSONObject { fromBSONObject :: [(L8.ByteString, BSValue)] }
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
toBSONObject :: [(L8.ByteString, BSValue)] -> BSONObject
|
||||||
|
toBSONObject = BSONObject
|
||||||
|
|
||||||
|
data DataType =
|
||||||
|
Data_min_key | -- -1
|
||||||
|
Data_number | -- 1
|
||||||
|
Data_string | -- 2
|
||||||
|
Data_object | -- 3
|
||||||
|
Data_array | -- 4
|
||||||
|
Data_binary | -- 5
|
||||||
|
Data_undefined | -- 6
|
||||||
|
Data_oid | -- 7
|
||||||
|
Data_boolean | -- 8
|
||||||
|
Data_date | -- 9
|
||||||
|
Data_null | -- 10
|
||||||
|
Data_regex | -- 11
|
||||||
|
Data_ref | -- 12
|
||||||
|
Data_code | -- 13
|
||||||
|
Data_symbol | -- 14
|
||||||
|
Data_code_w_scope | -- 15
|
||||||
|
Data_int | -- 16
|
||||||
|
Data_timestamp | -- 17
|
||||||
|
Data_long | -- 18
|
||||||
|
Data_max_key -- 127
|
||||||
|
deriving (Show, Read, Enum, Eq, Ord)
|
||||||
|
|
||||||
|
toDataType :: Int -> DataType
|
||||||
|
toDataType (-1) = Data_min_key
|
||||||
|
toDataType 127 = Data_max_key
|
||||||
|
toDataType d = toEnum d
|
||||||
|
|
||||||
|
fromDataType :: DataType -> Int
|
||||||
|
fromDataType Data_min_key = (-1)
|
||||||
|
fromDataType Data_max_key = 127
|
||||||
|
fromDataType d = fromEnum d
|
||||||
|
|
||||||
|
|
||||||
|
data BinarySubType =
|
||||||
|
BSTUNDEFINED_1 |
|
||||||
|
BSTFunction | -- 1
|
||||||
|
BSTByteArray | -- 2
|
||||||
|
BSTUUID | -- 3
|
||||||
|
BSTUNDEFINED_2 |
|
||||||
|
BSTMD5 | -- 5
|
||||||
|
BSTUserDefined
|
||||||
|
deriving (Show, Read, Enum, Eq, Ord)
|
||||||
|
|
||||||
|
toBinarySubType :: Int -> BinarySubType
|
||||||
|
toBinarySubType 0x80 = BSTUserDefined
|
||||||
|
toBinarySubType d = toEnum d
|
||||||
|
|
||||||
|
fromBinarySubType :: BinarySubType -> Int
|
||||||
|
fromBinarySubType BSTUserDefined = 0x80
|
||||||
|
fromBinarySubType d = fromEnum d
|
||||||
|
|
||||||
|
instance Binary BSONObject where
|
||||||
|
get = liftM snd getObj
|
||||||
|
put = putObj
|
||||||
|
|
||||||
|
getVal :: DataType -> Get (Int, BSValue)
|
||||||
|
getVal Data_number = getFloat64le >>= return . (,) 8 . BSDouble
|
||||||
|
getVal Data_string = do
|
||||||
|
sLen1 <- getI32
|
||||||
|
(sLen2, s) <- getS
|
||||||
|
return (fromIntegral $ 4 + sLen1, BSString s)
|
||||||
|
getVal Data_object = getObj >>= \(len, obj) -> return (len, BSObject obj)
|
||||||
|
getVal Data_array = do
|
||||||
|
(len, arr) <- getRawObj
|
||||||
|
let arr2 = List.foldl (\acc (k,v) -> v : acc) [] arr -- reverse and remove key
|
||||||
|
return (len, BSArray arr2)
|
||||||
|
getVal Data_binary = do
|
||||||
|
skip 4
|
||||||
|
st <- getI8
|
||||||
|
len2 <- getI32
|
||||||
|
bs <- getLazyByteString $ fromIntegral len2
|
||||||
|
return (4 + 1 + 4 + len2, BSBinary (toBinarySubType st) bs)
|
||||||
|
getVal Data_undefined = return (1, BSUndefined)
|
||||||
|
getVal Data_oid = getLazyByteString 12 >>= return . (,) 12 . BSObjectId
|
||||||
|
getVal Data_boolean = getI8 >>= return . (,) 1 . BSBool . (/= 0)
|
||||||
|
getVal Data_date =
|
||||||
|
getI64 >>= return . (,) 8 . BSDate . flip (/) 1000 . realToFrac
|
||||||
|
getVal Data_null = return (1, BSNull)
|
||||||
|
getVal Data_int = getI32 >>= return . (,) 4 . BSInt32 . fromIntegral
|
||||||
|
getVal Data_long = getI64 >>= return . (,) 8 . BSInt64
|
||||||
|
|
||||||
|
getInnerObj 1 obj = obj
|
||||||
|
getInnerObj bytesLeft obj = do
|
||||||
|
typ <- getDataType
|
||||||
|
(keySz, key) <- getS
|
||||||
|
(valSz, val) <- getVal typ
|
||||||
|
getInnerObj (bytesLeft - 1 - keySz - valSz) $ liftM ((key, val) :) obj
|
||||||
|
|
||||||
|
getRawObj = do
|
||||||
|
bytes <- getI32
|
||||||
|
obj <- getInnerObj (bytes - 4) $ return []
|
||||||
|
getNull
|
||||||
|
return (bytes, obj)
|
||||||
|
|
||||||
|
getObj = getRawObj >>= \(len, obj) -> return (len, toBSONObject obj)
|
||||||
|
|
||||||
|
getS :: Get (Int, L8.ByteString)
|
||||||
|
getS = getLazyByteStringNul >>= \s -> return (fromIntegral $ L.length s + 1, s)
|
||||||
|
|
||||||
|
getNull = do {'\0' <- getC; return ()}
|
||||||
|
|
||||||
|
getC = liftM chr getI8
|
||||||
|
getI8 = liftM fromIntegral getWord8
|
||||||
|
getI32 = liftM fromIntegral getWord32le
|
||||||
|
getI64 = liftM fromIntegral getWord64le
|
||||||
|
getDataType = liftM toDataType getI8
|
||||||
|
|
||||||
|
|
||||||
|
putType BSDouble{} = putDataType Data_number
|
||||||
|
putType BSString{} = putDataType Data_string
|
||||||
|
putType BSObject{} = putDataType Data_object
|
||||||
|
putType BSArray{} = putDataType Data_array
|
||||||
|
putType BSBinary{} = putDataType Data_binary
|
||||||
|
putType BSUndefined = putDataType Data_undefined
|
||||||
|
putType BSObjectId{} = putDataType Data_oid
|
||||||
|
putType BSBool{} = putDataType Data_boolean
|
||||||
|
putType BSDate{} = putDataType Data_date
|
||||||
|
putType BSNull = putDataType Data_null
|
||||||
|
putType BSRegex{} = putDataType Data_regex
|
||||||
|
-- putType = putDataType Data_ref
|
||||||
|
-- putType = putDataType Data_code
|
||||||
|
putType BSSymbol{} = putDataType Data_symbol
|
||||||
|
-- putType = putDataType Data_code_w_scope
|
||||||
|
putType (BSInt32 i) = putDataType Data_int
|
||||||
|
putType (BSInt64 i) = putDataType Data_long
|
||||||
|
-- putType = putDataType Data_timestamp
|
||||||
|
putType BSMinKey = putDataType Data_min_key
|
||||||
|
putType BSMaxKey = putDataType Data_max_key
|
||||||
|
|
||||||
|
|
||||||
|
putVal (BSDouble d) = putFloat64le d
|
||||||
|
putVal (BSString s) = putI32 (1 + L8.length s) >> putS s
|
||||||
|
putVal (BSObject o) = putObj o
|
||||||
|
putVal (BSArray es) = putOutterObj bs
|
||||||
|
where bs = runPut $ forM_ (List.zip [0..] es) $ \(i, e) ->
|
||||||
|
putType e >> (putS $ L8.fromString $ show i) >> putVal e
|
||||||
|
putVal (BSBinary t bs)= do putI32 $ 4 + L.length bs
|
||||||
|
putI8 $ fromBinarySubType t
|
||||||
|
putI32 $ L.length bs
|
||||||
|
putLazyByteString bs
|
||||||
|
putVal BSUndefined = flush
|
||||||
|
putVal (BSObjectId o) = putLazyByteString o
|
||||||
|
putVal (BSBool False) = putI8 0
|
||||||
|
putVal (BSBool True) = putI8 1
|
||||||
|
putVal (BSDate pt) = putI64 $ round $ 1000 * realToFrac pt
|
||||||
|
putVal BSNull = flush
|
||||||
|
putVal (BSRegex r opt)= do putS r
|
||||||
|
putByteString $ pack $ List.sort opt
|
||||||
|
putNull
|
||||||
|
putVal (BSSymbol s) = putI32 (1 + L8.length s) >> putS s
|
||||||
|
putVal (BSInt32 i) = putI32 i
|
||||||
|
putVal (BSInt64 i) = putI64 i
|
||||||
|
putVal BSMinKey = flush
|
||||||
|
putVal BSMaxKey = flush
|
||||||
|
|
||||||
|
putObj obj = putOutterObj bs
|
||||||
|
where bs = runPut $ forM_ (fromBSONObject obj) $ \(k, v) ->
|
||||||
|
putType v >> putS k >> putVal v
|
||||||
|
|
||||||
|
putOutterObj bytes = do
|
||||||
|
-- the length prefix and null term are included in the length
|
||||||
|
putI32 $ 4 + 1 + L.length bytes
|
||||||
|
putLazyByteString bytes
|
||||||
|
putNull
|
||||||
|
|
||||||
|
putNull = putI8 0
|
||||||
|
putI8 :: (Integral i) => i -> Put
|
||||||
|
putI8 = putWord8 . fromIntegral
|
||||||
|
|
||||||
|
putI32 :: (Integral i) => i -> Put
|
||||||
|
putI32 = putWord32le . fromIntegral
|
||||||
|
|
||||||
|
putI64 :: (Integral i) => i -> Put
|
||||||
|
putI64 = putWord64le . fromIntegral
|
||||||
|
|
||||||
|
putS :: L8.ByteString -> Put
|
||||||
|
putS s = putLazyByteString s >> putNull
|
||||||
|
|
||||||
|
putDataType = putI8 . fromDataType
|
||||||
|
|
||||||
|
int32_min = -2147483648
|
||||||
|
int32_max = -int32_min - 1
|
Loading…
Reference in a new issue