move Pool to System.IO. update docs
This commit is contained in:
parent
23fdaee494
commit
8672652395
6 changed files with 32 additions and 70 deletions
|
@ -538,7 +538,7 @@ data MapReduce = MapReduce {
|
|||
rSelect :: Selector, -- ^ Operate on only those documents selected. Default is [] meaning all documents.
|
||||
rSort :: Order, -- ^ Default is [] meaning no sort
|
||||
rLimit :: Limit, -- ^ Default is 0 meaning no limit
|
||||
rOut :: MROut, -- ^ Output to a collection with a certain merge policy. Default is no collection (Inline). Note, you don't want this default if your result set is large.
|
||||
rOut :: MROut, -- ^ Output to a collection with a certain merge policy. Default is no collection ('Inline'). Note, you don't want this default if your result set is large.
|
||||
rFinalize :: Maybe FinalizeFun, -- ^ Function to apply to all the results when finished. Default is Nothing.
|
||||
rScope :: Document, -- ^ Variables (environment) that can be accessed from map/reduce/finalize. Default is [].
|
||||
rVerbose :: Bool -- ^ Provide statistics on job execution time. Default is False.
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
{-# LANGUAGE RecordWildCards, NamedFieldPuns, FlexibleContexts #-}
|
||||
|
||||
module Var.Pool where
|
||||
module System.IO.Pool where
|
||||
|
||||
import Control.Applicative ((<$>))
|
||||
import Control.Monad.MVar
|
4
doc/TODO
4
doc/TODO
|
@ -11,7 +11,6 @@ Bson
|
|||
MongoDB
|
||||
-------
|
||||
+ Support the new query flag: https://jira.mongodb.org/browse/SERVER-2020
|
||||
+ Support MapReduce 1.8 version
|
||||
+ When one connection in a pool fails, close all other since they will likely fail too
|
||||
+ on insert/update: reject keys that start with "$" or "."
|
||||
+ dereference dbref
|
||||
|
@ -43,8 +42,6 @@ Tests - none currently
|
|||
Misc
|
||||
----
|
||||
+ javascript DSL
|
||||
+ update tutorial to match new python one
|
||||
+ custom types (see python examples)
|
||||
|
||||
Questions:
|
||||
- In Mongo shell, db.foo.totalSize fetches storageSize of each index but does not use it
|
||||
|
@ -52,4 +49,3 @@ Questions:
|
|||
Notes:
|
||||
- Remember that in the new version of MongoDB (>= 1.6), "ok" field can be a number (0 or 1) or boolean (False or True). Use 'true1' function defined in Database.MongoDB.Util
|
||||
- A cursor will die on the server if not accessed (by any connection) within past 10 minutes (unless NoCursorTimeout option set). Accessing a dead (or non-existent) cursor raises a CursorNotFoundFailure.
|
||||
- Unsafe to shrink pool and close connections because map/reduce temp tables that were created on the connection will get deleted. Note, other connections can access a map/reduce temp table as long as the original connection is still alive. Also, other connections can access cursors created on other connections, even if those die. Cursors will be deleted on server only if idle for more than 10 minutes. Accessing a deleted cursor returns an error.
|
||||
|
|
|
@ -1,81 +1,45 @@
|
|||
Map/Reduce Example
|
||||
------------------
|
||||
## Map/Reduce Example
|
||||
|
||||
This is an example of how to use the mapReduce function to perform
|
||||
map/reduce style aggregation on your data.
|
||||
This is an example of how to use the mapReduce function to perform map/reduce style aggregation on your data.
|
||||
|
||||
This document has been shamelessly ported from the similar
|
||||
[pymongo Map/Reduce Example](http://api.mongodb.org/python/1.4%2B/examples/map_reduce.html).
|
||||
### Setup
|
||||
|
||||
Setup
|
||||
-----
|
||||
|
||||
To start, we'll insert some example data which we can perform
|
||||
map/reduce queries on:
|
||||
To start, we'll insert some example data which we can perform map/reduce queries on:
|
||||
|
||||
$ ghci
|
||||
...
|
||||
Prelude> :set prompt "> "
|
||||
> :set -XOverloadedStrings
|
||||
> import Database.MongoDB
|
||||
> import Data.CompactString ()
|
||||
> conn <- newConnPool 1 (host "127.0.0.1")
|
||||
> let run act = access safe Master conn $ use (Database "test") act
|
||||
> :{
|
||||
run $ insertMany "mr1" [
|
||||
["x" =: 1, "tags" =: ["dog", "cat"]],
|
||||
["x" =: 2, "tags" =: ["cat"]],
|
||||
["x" =: 3, "tags" =: ["mouse", "cat", "dog"]],
|
||||
["x" =: 4, "tags" =: ([] :: [String])]
|
||||
]
|
||||
:}
|
||||
> import Data.CompactString () -- only needed when using ghci
|
||||
> pipe <- runIOE $ connect $ host "127.0.0.1"
|
||||
> let run act = access pipe master "test" act
|
||||
> let docs = [ ["x" =: 1, "tags" =: ["dog", "cat"]], ["x" =: 2, "tags" =: ["cat"]], ["x" =: 3, "tags" =: ["mouse", "cat", "dog"]] ]
|
||||
> run $ insertMany "mr1" docs
|
||||
|
||||
Basic Map/Reduce
|
||||
----------------
|
||||
### Basic Map/Reduce
|
||||
|
||||
Now we'll define our map and reduce functions. In this case we're
|
||||
performing the same operation as in the MongoDB Map/Reduce
|
||||
documentation - counting the number of occurrences for each tag in the
|
||||
tags array, across the entire collection.
|
||||
Now we'll define our map and reduce functions to count the number of occurrences for each tag in the tags array, across the entire collection.
|
||||
|
||||
Our map function just emits a single (key, 1) pair for each tag in the
|
||||
array:
|
||||
Our map function just emits a single (key, 1) pair for each tag in the array:
|
||||
|
||||
> :{
|
||||
let mapFn = Javascript [] "
|
||||
function() {\n
|
||||
this.tags.forEach(function(z) {\n
|
||||
emit(z, 1);\n
|
||||
});\n
|
||||
}"
|
||||
:}
|
||||
> let mapFn = Javascript [] "function() {this.tags.forEach (function(z) {emit(z, 1);});}"
|
||||
|
||||
The reduce function sums over all of the emitted values for a given
|
||||
key:
|
||||
The reduce function sums over all of the emitted values for a given key:
|
||||
|
||||
> :{
|
||||
let reduceFn = Javascript [] "
|
||||
function (key, values) {\n
|
||||
var total = 0;\n
|
||||
for (var i = 0; i < values.length; i++) {\n
|
||||
total += values[i];\n
|
||||
}\n
|
||||
return total;\n
|
||||
}"
|
||||
:}
|
||||
> let reduceFn = Javascript [] "function (key, values) {var total = 0; for (var i = 0; i < values.length; i++) {total += values[i];} return total;}"
|
||||
|
||||
Note: We can't just return values.length as the reduce function might
|
||||
be called iteratively on the results of other reduce steps.
|
||||
Note: We can't just return values.length as the reduce function might be called iteratively on the results of other reduce steps.
|
||||
|
||||
Finally, we run mapReduce, results by default will be return in an array in the result document (inlined):
|
||||
|
||||
> run $ runMR' (mapReduce "mr1" mapFn reduceFn)
|
||||
Right [ results: [[ _id: "cat", value: 3.0],[ _id: "dog", value: 2.0],[ _id: "mouse", value: 1.0]], timeMillis: 379, counts: [ input: 4, emit: 6, reduce: 2, output: 3], ok: 1.0]
|
||||
|
||||
Inlining only works if result set < 16MB. An alternative to inlining is outputing to a collection. But what to do if there is data already in the collection from a previous run of the same MapReduce? You have three alternatives in the MRMerge data type: Replace, Merge, and Reduce. See its documentation for details. To output to a collection, set the mOut field in MapReduce.
|
||||
Inlining only works if result set < 16MB. An alternative to inlining is outputing to a collection. But what to do if there is data already in the collection from a previous run of the same MapReduce? You have three alternatives in the MRMerge data type: Replace, Merge, and Reduce. See its documentation for details. To output to a collection, set the `rOut` field in `MapReduce`.
|
||||
|
||||
> run $ runMR' (mapReduce "mr1" mapFn reduceFn) {rOut = Output Replace "mr1out" Nothing}
|
||||
Right [ result: "mr1out", timeMillis: 379, counts: [ input: 4, emit: 6, reduce: 2, output: 3], ok: 1.0]
|
||||
Right [ result: "mr1out", timeMillis: 379, counts: [ input: 3, emit: 6, reduce: 2, output: 3], ok: 1.0]
|
||||
|
||||
You can now query the mr1out collection to see the result, or run another MapReduce on it! A shortcut for running the map-reduce then querying the result collection right away is `runMR`.
|
||||
|
||||
|
|
|
@ -10,11 +10,13 @@ Start a local MongoDB server in a separate terminal window:
|
|||
Start a haskell session:
|
||||
|
||||
$ ghci
|
||||
> :set prompt "> "
|
||||
|
||||
Import the MongoDB driver library, and set OverloadedStrings so literal strings are converted to UTF-8 automatically.
|
||||
|
||||
> :set -XOverloadedStrings
|
||||
> import Database.MongoDB
|
||||
> import Data.CompactString () -- only needed when using ghci
|
||||
|
||||
### Connecting
|
||||
|
||||
|
@ -30,7 +32,7 @@ A `Pipe` is a thread-safe, pipelined (a' la [HTTP pipelining](http://en.wikipedi
|
|||
|
||||
### Action monad
|
||||
|
||||
A DB read or write operation is called a DB `Action`. A DB Action is a monad so you can sequence them together. To run an Action supply it to the `access` function with the Pipe to read/write to, the `AccessMode` for read/write operations, and the `Database` to access. For example, to list all collections in the "test" database:
|
||||
A DB read or write operation is called a DB `Action`. A DB Action is a monad so you can sequence them together. To run an Action supply it to the `access` function with the Pipe to use, the `AccessMode` for read/write operations, and the `Database` to access. For example, to list all collections in the "test" database:
|
||||
|
||||
> access pipe master "test" allCollections
|
||||
|
||||
|
@ -40,7 +42,7 @@ A DB read or write operation is called a DB `Action`. A DB Action is a monad so
|
|||
|
||||
Since we are working in ghci, which requires us to start from the IO monad every time, we'll define a convenient *run* function that takes an action and executes it against our "test" database on the server we just connected to, with master access mode:
|
||||
|
||||
> let run = access pipe master "test"
|
||||
> let run act = access pipe master "test" act
|
||||
|
||||
### Databases and Collections
|
||||
|
||||
|
@ -118,13 +120,13 @@ You can count how many documents are in an entire collection:
|
|||
|
||||
Or count how many documents match a query:
|
||||
|
||||
> run $ count $ select ["title" =: ["$exits" =: True]] "posts"
|
||||
> run $ count $ select ["title" =: ["$exists" =: True]] "posts"
|
||||
|
||||
### Sorting
|
||||
|
||||
`sort` takes the fields to sort by and whether ascending (1) or descending (-1)
|
||||
|
||||
> run $ find (select [] "posts") {sort = ["author" =: 1, "title" =: 1]} >> rest
|
||||
> run $ find (select [] "posts") {sort = ["author" =: 1, "text" =: 1]} >>= rest
|
||||
|
||||
If you don't sort, documents are returned in *natural* order, which is the order found on disk. Natural order is not particularly useful because, although the order is often close to insertion order, it is not guaranteed.
|
||||
|
||||
|
@ -132,7 +134,7 @@ If you don't sort, documents are returned in *natural* order, which is the order
|
|||
|
||||
`project` returns partial documents containing only the fields you include (1). However, *_id* is always included unless you exclude it (0).
|
||||
|
||||
> run $ find (select [] "posts") {project = ["author" =: 1, "_id" =: 0]}
|
||||
> run $ find (select [] "posts") {project = ["author" =: 1, "_id" =: 0]} >>= rest
|
||||
|
||||
### Updating
|
||||
|
||||
|
@ -142,9 +144,9 @@ If you don't sort, documents are returned in *natural* order, which is the order
|
|||
|
||||
or inserts a new document if its *_id* is new or missing
|
||||
|
||||
> run $ save "posts" ["author" =: "Tony", "text" =: "Haskell rocks"]
|
||||
> run $ save "posts" ["author" =: "Tony", "text" =: "hello world"]
|
||||
|
||||
`modify` updates every document matching selection according to supplied modifier. For example:
|
||||
`modify` updates every document matching selection using supplied modifier. For example:
|
||||
|
||||
> run $ modify (select [] "posts") ["$push" =: ["tags" =: "new"]]
|
||||
|
||||
|
@ -156,7 +158,7 @@ or inserts a new document if its *_id* is new or missing
|
|||
|
||||
### Documentation
|
||||
|
||||
Documentation on the Mongo query language, i.e. the selector document, modifier document, etc., can be found at the [MongoDB Developer Zone](http://www.mongodb.org/display/DOCS/Developer+Zone).
|
||||
Documentation on the Mongo query language (i.e. the selector document, modifier document, etc.) can be found at the [MongoDB Developer Zone](http://www.mongodb.org/display/DOCS/Developer+Zone).
|
||||
|
||||
Haddock generated documentation on this Haskell driver can be found on [Hackage](http://hackage.haskell.org/package/mongoDB).
|
||||
Haddock generated documentation of this Haskell driver can be found on [Hackage](http://hackage.haskell.org/package/mongoDB).
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ exposed-modules:
|
|||
Database.MongoDB.Internal.Util
|
||||
Database.MongoDB.Query
|
||||
System.IO.Pipeline
|
||||
Var.Pool
|
||||
System.IO.Pool
|
||||
exposed: True
|
||||
buildable: True
|
||||
build-tools:
|
||||
|
|
Loading…
Reference in a new issue