diff --git a/Database/MongoDB/Query.hs b/Database/MongoDB/Query.hs index cdfc8a1..f258b84 100644 --- a/Database/MongoDB/Query.hs +++ b/Database/MongoDB/Query.hs @@ -538,7 +538,7 @@ data MapReduce = MapReduce { rSelect :: Selector, -- ^ Operate on only those documents selected. Default is [] meaning all documents. rSort :: Order, -- ^ Default is [] meaning no sort rLimit :: Limit, -- ^ Default is 0 meaning no limit - rOut :: MROut, -- ^ Output to a collection with a certain merge policy. Default is no collection (Inline). Note, you don't want this default if your result set is large. + rOut :: MROut, -- ^ Output to a collection with a certain merge policy. Default is no collection ('Inline'). Note, you don't want this default if your result set is large. rFinalize :: Maybe FinalizeFun, -- ^ Function to apply to all the results when finished. Default is Nothing. rScope :: Document, -- ^ Variables (environment) that can be accessed from map/reduce/finalize. Default is []. rVerbose :: Bool -- ^ Provide statistics on job execution time. Default is False. diff --git a/Var/Pool.hs b/System/IO/Pool.hs similarity index 98% rename from Var/Pool.hs rename to System/IO/Pool.hs index 1bf0d73..6ca0bef 100644 --- a/Var/Pool.hs +++ b/System/IO/Pool.hs @@ -2,7 +2,7 @@ {-# LANGUAGE RecordWildCards, NamedFieldPuns, FlexibleContexts #-} -module Var.Pool where +module System.IO.Pool where import Control.Applicative ((<$>)) import Control.Monad.MVar diff --git a/doc/TODO b/doc/TODO index b67460b..8cd0aa8 100644 --- a/doc/TODO +++ b/doc/TODO @@ -11,7 +11,6 @@ Bson MongoDB ------- + Support the new query flag: https://jira.mongodb.org/browse/SERVER-2020 -+ Support MapReduce 1.8 version + When one connection in a pool fails, close all other since they will likely fail too + on insert/update: reject keys that start with "$" or "." + dereference dbref @@ -43,8 +42,6 @@ Tests - none currently Misc ---- + javascript DSL -+ update tutorial to match new python one -+ custom types (see python examples) Questions: - In Mongo shell, db.foo.totalSize fetches storageSize of each index but does not use it @@ -52,4 +49,3 @@ Questions: Notes: - Remember that in the new version of MongoDB (>= 1.6), "ok" field can be a number (0 or 1) or boolean (False or True). Use 'true1' function defined in Database.MongoDB.Util - A cursor will die on the server if not accessed (by any connection) within past 10 minutes (unless NoCursorTimeout option set). Accessing a dead (or non-existent) cursor raises a CursorNotFoundFailure. -- Unsafe to shrink pool and close connections because map/reduce temp tables that were created on the connection will get deleted. Note, other connections can access a map/reduce temp table as long as the original connection is still alive. Also, other connections can access cursors created on other connections, even if those die. Cursors will be deleted on server only if idle for more than 10 minutes. Accessing a deleted cursor returns an error. diff --git a/doc/map-reduce-example.md b/doc/map-reduce-example.md index 9a3f0d9..cf63819 100644 --- a/doc/map-reduce-example.md +++ b/doc/map-reduce-example.md @@ -1,81 +1,45 @@ -Map/Reduce Example ------------------- +## Map/Reduce Example -This is an example of how to use the mapReduce function to perform -map/reduce style aggregation on your data. +This is an example of how to use the mapReduce function to perform map/reduce style aggregation on your data. -This document has been shamelessly ported from the similar -[pymongo Map/Reduce Example](http://api.mongodb.org/python/1.4%2B/examples/map_reduce.html). +### Setup -Setup ------ - -To start, we'll insert some example data which we can perform -map/reduce queries on: +To start, we'll insert some example data which we can perform map/reduce queries on: $ ghci ... Prelude> :set prompt "> " > :set -XOverloadedStrings > import Database.MongoDB - > import Data.CompactString () - > conn <- newConnPool 1 (host "127.0.0.1") - > let run act = access safe Master conn $ use (Database "test") act - > :{ - run $ insertMany "mr1" [ - ["x" =: 1, "tags" =: ["dog", "cat"]], - ["x" =: 2, "tags" =: ["cat"]], - ["x" =: 3, "tags" =: ["mouse", "cat", "dog"]], - ["x" =: 4, "tags" =: ([] :: [String])] - ] - :} + > import Data.CompactString () -- only needed when using ghci + > pipe <- runIOE $ connect $ host "127.0.0.1" + > let run act = access pipe master "test" act + > let docs = [ ["x" =: 1, "tags" =: ["dog", "cat"]], ["x" =: 2, "tags" =: ["cat"]], ["x" =: 3, "tags" =: ["mouse", "cat", "dog"]] ] + > run $ insertMany "mr1" docs -Basic Map/Reduce ----------------- +### Basic Map/Reduce -Now we'll define our map and reduce functions. In this case we're -performing the same operation as in the MongoDB Map/Reduce -documentation - counting the number of occurrences for each tag in the -tags array, across the entire collection. +Now we'll define our map and reduce functions to count the number of occurrences for each tag in the tags array, across the entire collection. -Our map function just emits a single (key, 1) pair for each tag in the -array: +Our map function just emits a single (key, 1) pair for each tag in the array: - > :{ - let mapFn = Javascript [] " - function() {\n - this.tags.forEach(function(z) {\n - emit(z, 1);\n - });\n - }" - :} + > let mapFn = Javascript [] "function() {this.tags.forEach (function(z) {emit(z, 1);});}" -The reduce function sums over all of the emitted values for a given -key: +The reduce function sums over all of the emitted values for a given key: - > :{ - let reduceFn = Javascript [] " - function (key, values) {\n - var total = 0;\n - for (var i = 0; i < values.length; i++) {\n - total += values[i];\n - }\n - return total;\n - }" - :} + > let reduceFn = Javascript [] "function (key, values) {var total = 0; for (var i = 0; i < values.length; i++) {total += values[i];} return total;}" -Note: We can't just return values.length as the reduce function might -be called iteratively on the results of other reduce steps. +Note: We can't just return values.length as the reduce function might be called iteratively on the results of other reduce steps. Finally, we run mapReduce, results by default will be return in an array in the result document (inlined): > run $ runMR' (mapReduce "mr1" mapFn reduceFn) Right [ results: [[ _id: "cat", value: 3.0],[ _id: "dog", value: 2.0],[ _id: "mouse", value: 1.0]], timeMillis: 379, counts: [ input: 4, emit: 6, reduce: 2, output: 3], ok: 1.0] -Inlining only works if result set < 16MB. An alternative to inlining is outputing to a collection. But what to do if there is data already in the collection from a previous run of the same MapReduce? You have three alternatives in the MRMerge data type: Replace, Merge, and Reduce. See its documentation for details. To output to a collection, set the mOut field in MapReduce. +Inlining only works if result set < 16MB. An alternative to inlining is outputing to a collection. But what to do if there is data already in the collection from a previous run of the same MapReduce? You have three alternatives in the MRMerge data type: Replace, Merge, and Reduce. See its documentation for details. To output to a collection, set the `rOut` field in `MapReduce`. > run $ runMR' (mapReduce "mr1" mapFn reduceFn) {rOut = Output Replace "mr1out" Nothing} - Right [ result: "mr1out", timeMillis: 379, counts: [ input: 4, emit: 6, reduce: 2, output: 3], ok: 1.0] + Right [ result: "mr1out", timeMillis: 379, counts: [ input: 3, emit: 6, reduce: 2, output: 3], ok: 1.0] You can now query the mr1out collection to see the result, or run another MapReduce on it! A shortcut for running the map-reduce then querying the result collection right away is `runMR`. diff --git a/doc/tutorial.md b/doc/tutorial.md index 0a8dc72..f0cdc5b 100644 --- a/doc/tutorial.md +++ b/doc/tutorial.md @@ -10,11 +10,13 @@ Start a local MongoDB server in a separate terminal window: Start a haskell session: $ ghci + > :set prompt "> " Import the MongoDB driver library, and set OverloadedStrings so literal strings are converted to UTF-8 automatically. > :set -XOverloadedStrings > import Database.MongoDB + > import Data.CompactString () -- only needed when using ghci ### Connecting @@ -30,7 +32,7 @@ A `Pipe` is a thread-safe, pipelined (a' la [HTTP pipelining](http://en.wikipedi ### Action monad -A DB read or write operation is called a DB `Action`. A DB Action is a monad so you can sequence them together. To run an Action supply it to the `access` function with the Pipe to read/write to, the `AccessMode` for read/write operations, and the `Database` to access. For example, to list all collections in the "test" database: +A DB read or write operation is called a DB `Action`. A DB Action is a monad so you can sequence them together. To run an Action supply it to the `access` function with the Pipe to use, the `AccessMode` for read/write operations, and the `Database` to access. For example, to list all collections in the "test" database: > access pipe master "test" allCollections @@ -40,7 +42,7 @@ A DB read or write operation is called a DB `Action`. A DB Action is a monad so Since we are working in ghci, which requires us to start from the IO monad every time, we'll define a convenient *run* function that takes an action and executes it against our "test" database on the server we just connected to, with master access mode: - > let run = access pipe master "test" + > let run act = access pipe master "test" act ### Databases and Collections @@ -118,13 +120,13 @@ You can count how many documents are in an entire collection: Or count how many documents match a query: - > run $ count $ select ["title" =: ["$exits" =: True]] "posts" + > run $ count $ select ["title" =: ["$exists" =: True]] "posts" ### Sorting `sort` takes the fields to sort by and whether ascending (1) or descending (-1) - > run $ find (select [] "posts") {sort = ["author" =: 1, "title" =: 1]} >> rest + > run $ find (select [] "posts") {sort = ["author" =: 1, "text" =: 1]} >>= rest If you don't sort, documents are returned in *natural* order, which is the order found on disk. Natural order is not particularly useful because, although the order is often close to insertion order, it is not guaranteed. @@ -132,7 +134,7 @@ If you don't sort, documents are returned in *natural* order, which is the order `project` returns partial documents containing only the fields you include (1). However, *_id* is always included unless you exclude it (0). - > run $ find (select [] "posts") {project = ["author" =: 1, "_id" =: 0]} + > run $ find (select [] "posts") {project = ["author" =: 1, "_id" =: 0]} >>= rest ### Updating @@ -142,9 +144,9 @@ If you don't sort, documents are returned in *natural* order, which is the order or inserts a new document if its *_id* is new or missing - > run $ save "posts" ["author" =: "Tony", "text" =: "Haskell rocks"] + > run $ save "posts" ["author" =: "Tony", "text" =: "hello world"] -`modify` updates every document matching selection according to supplied modifier. For example: +`modify` updates every document matching selection using supplied modifier. For example: > run $ modify (select [] "posts") ["$push" =: ["tags" =: "new"]] @@ -156,7 +158,7 @@ or inserts a new document if its *_id* is new or missing ### Documentation -Documentation on the Mongo query language, i.e. the selector document, modifier document, etc., can be found at the [MongoDB Developer Zone](http://www.mongodb.org/display/DOCS/Developer+Zone). +Documentation on the Mongo query language (i.e. the selector document, modifier document, etc.) can be found at the [MongoDB Developer Zone](http://www.mongodb.org/display/DOCS/Developer+Zone). -Haddock generated documentation on this Haskell driver can be found on [Hackage](http://hackage.haskell.org/package/mongoDB). +Haddock generated documentation of this Haskell driver can be found on [Hackage](http://hackage.haskell.org/package/mongoDB). diff --git a/mongoDB.cabal b/mongoDB.cabal index 0e1d359..94c2b16 100644 --- a/mongoDB.cabal +++ b/mongoDB.cabal @@ -40,7 +40,7 @@ exposed-modules: Database.MongoDB.Internal.Util Database.MongoDB.Query System.IO.Pipeline - Var.Pool + System.IO.Pool exposed: True buildable: True build-tools: