From b52854a45708c699260e62f7fbfba8af5c56a67d Mon Sep 17 00:00:00 2001 From: Tony Hannan Date: Fri, 8 Jul 2011 22:17:44 -0400 Subject: [PATCH] moved docs to new docs folder. updated tutorial for version 1.0.0 --- README.md | 22 +- docs/Example.hs | 33 +++ TODO => docs/TODO | 0 V0.6-Redesign.md => docs/V0.6-Redesign.md | 0 .../map-reduce-example.md | 0 docs/tutorial.md | 162 ++++++++++++ mongoDB.cabal | 4 +- tutorial.md | 245 ------------------ 8 files changed, 205 insertions(+), 261 deletions(-) create mode 100644 docs/Example.hs rename TODO => docs/TODO (100%) rename V0.6-Redesign.md => docs/V0.6-Redesign.md (100%) rename map-reduce-example.md => docs/map-reduce-example.md (100%) create mode 100644 docs/tutorial.md delete mode 100644 tutorial.md diff --git a/README.md b/README.md index 9b88acc..224b353 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,9 @@ -mongoDB -------- +This is the Haskell MongoDB driver (client). [MongoDB](http://www.mongodb.org) is a free, scalable, fast, document database management system. This driver lets you connect to a MongoDB server, and update and query its data. It also lets you do adminstrative tasks, like create an index or look at performance statistics. -About ------ - -MongoDB driver for Haskell, which lets you connect to a MongoDB database management system and do inserts, queries, updates, etc. - -Links ------ - -* [MongoDB](http://www.mongodb.org) -* [mongoDB API reference](http://hackage.haskell.org/package/mongoDB) -* [tutorial](http://github.com/TonyGen/mongoDB-haskell/blob/master/tutorial.md) -* [map/reduce example](http://github.com/TonyGen/mongoDB-haskell/blob/master/map-reduce-example.md) +### Documentation +* [Quick example](http://github.com/TonyGen/mongoDB-haskell/blob/master/doc/Example.hs) +* [Tutorial](http://github.com/TonyGen/mongoDB-haskell/blob/master/doc/tutorial.md) +* [Driver API](http://hackage.haskell.org/package/mongoDB) +* [MapReduce example](http://github.com/TonyGen/mongoDB-haskell/blob/master/doc/map-reduce-example.md) +* [Driver design](http://github.com/TonyGen/mongoDB-haskell/blob/master/doc/design.md) +* [MongoDB DBMS](http://www.mongodb.org) diff --git a/docs/Example.hs b/docs/Example.hs new file mode 100644 index 0000000..d7fa9e6 --- /dev/null +++ b/docs/Example.hs @@ -0,0 +1,33 @@ +{-# LANGUAGE OverloadedStrings #-} + +import Database.MongoDB +import Control.Monad.Trans (liftIO) + +main = do + pipe <- runIOE $ connect (host "127.0.0.1") + e <- access pipe master "baseball" run + close pipe + print e + +run = do + clearTeams + insertTeams + printDocs "All Teams" =<< allTeams + printDocs "National League Teams" =<< nationalLeagueTeams + printDocs "New York Teams" =<< newYorkTeams + +clearTeams = delete (select [] "team") + +insertTeams = insertMany "team" [ + ["name" =: u"Yankees", "home" =: ["city" =: u"New York", "state" =: u"NY"], "league" =: u"American"], + ["name" =: u"Mets", "home" =: ["city" =: u"New York", "state" =: u"NY"], "league" =: u"National"], + ["name" =: u"Phillies", "home" =: ["city" =: u"Philadelphia", "state" =: u"PA"], "league" =: u"National"], + ["name" =: u"Red Sox", "home" =: ["city" =: u"Boston", "state" =: u"MA"], "league" =: u"American"] ] + +allTeams = rest =<< find (select [] "team") {sort = ["home.city" =: (1 :: Int)]} + +nationalLeagueTeams = rest =<< find (select ["league" =: u"National"] "team") + +newYorkTeams = rest =<< find (select ["home.state" =: u"NY"] "team") {project = ["name" =: (1 :: Int), "league" =: (1 :: Int)]} + +printDocs title docs = liftIO $ putStrLn title >> mapM_ (print . exclude ["_id"]) docs diff --git a/TODO b/docs/TODO similarity index 100% rename from TODO rename to docs/TODO diff --git a/V0.6-Redesign.md b/docs/V0.6-Redesign.md similarity index 100% rename from V0.6-Redesign.md rename to docs/V0.6-Redesign.md diff --git a/map-reduce-example.md b/docs/map-reduce-example.md similarity index 100% rename from map-reduce-example.md rename to docs/map-reduce-example.md diff --git a/docs/tutorial.md b/docs/tutorial.md new file mode 100644 index 0000000..0a8dc72 --- /dev/null +++ b/docs/tutorial.md @@ -0,0 +1,162 @@ +This tutorial takes you through inserting, updating, and querying documents. + +### Getting Ready + +Start a local MongoDB server in a separate terminal window: + + $ mkdir mongoFiles + $ mongod --dbpath mongoFiles + +Start a haskell session: + + $ ghci + +Import the MongoDB driver library, and set OverloadedStrings so literal strings are converted to UTF-8 automatically. + + > :set -XOverloadedStrings + > import Database.MongoDB + +### Connecting + +Establish a connection to your local Mongo server on the standard port (27017): + + > pipe <- runIOE $ connect $ host "127.0.0.1" + +A host with non-standard port would look like `Host "127.0.0.1" (PortNumber 27001)`. + +`connect h` has type `ErrorT IOError IO Pipe`. `runIOE` brings this type back down to `IO Pipe` and throws the IOError in IO if present. One design principle of this driver was to make DB errors explicit, hence the need for `runIOE`. + +A `Pipe` is a thread-safe, pipelined (a' la [HTTP pipelining](http://en.wikipedia.org/wiki/HTTP_pipelining)) TCP connection to a MongoDB server. Multiple threads can use the pipe at the same time. The pipelining feature is used by cursors and not exposed to the user. + +### Action monad + +A DB read or write operation is called a DB `Action`. A DB Action is a monad so you can sequence them together. To run an Action supply it to the `access` function with the Pipe to read/write to, the `AccessMode` for read/write operations, and the `Database` to access. For example, to list all collections in the "test" database: + + > access pipe master "test" allCollections + +`access` return either Left `Failure` or Right result. Failure means there was a connection failure, or a read/write failure like cursor expired or duplicate key insert. + +`master` is an `AccessMode`. Access mode indicates how reads and writes will be performed. Its three modes are: `ReadStaleOk`, `UnconfirmedWrites`, and `ConfirmWrites GetLastErrorParams`. `master` is just short hand for `ConfirmWrites []`. The first mode may be used against a slave or a master server, the last two must be used against a master server. + +Since we are working in ghci, which requires us to start from the IO monad every time, we'll define a convenient *run* function that takes an action and executes it against our "test" database on the server we just connected to, with master access mode: + + > let run = access pipe master "test" + +### Databases and Collections + +To see all the databases available on the server: + + > run allDatabases + +The "test" database in context is ignored in this case because `allDatabases` is not a query on a specific database but on the server as a whole. + +Databases and collections do not need to be created, just start using them and MongoDB will automatically create them for you. In the examples below we'll be using the "test" database (captured in *run* above) and the "posts" colllection. + +### Documents + +Data in MongoDB is represented (and stored) using JSON-style documents, called BSON documents. A `Document` is simply a list of `Field`s, where each field is a label-value pair. A `Value` is a basic type like Bool, Int, Float, String, Time; a special BSON value like Binary, Javascript, ObjectId; a (embedded) Document; or a list of Values. Here's an example document which could represent a blog post: + + > let post = ["author" =: "Mike", "text" =: "My first blog post!", "tags" =: ["mongoDB", "Haskell"]] + +### Inserting One + +To insert a document into a collection we can use the `insert` function. + + > run $ insert "posts" post + +When a document is inserted and it does not contain an *_id* field then it is added with a globally unique value of type `ObjectId`. The *_id* value can be any type but must be unique across the collection. `insert` returns the *_id* value of the inserted document. + +After inserting the first document, the "posts" collection has actually been created on the server. We can verify this by listing all of the collections in our database again: + + > run allCollections + +Note, the "system.indexes" collection is a special internal collection that was created automatically. + +### Reading One + +The most basic type of query that can be performed in MongoDB is `findOne`. This function returns a single document matching the selection, or `Nothing` if there are no matches. It is useful when you know there is only one matching document, or are only interested in the first match. Here we use `findOne` to get the first document from the posts +collection: + + > run $ findOne $ select [] "posts" + +The result is a document matching the one that we inserted previously. Note, the returned document contains the _id field, which was automatically added on insert. + +`findOne` also supports querying on specific elements that the resulting document must match. For example, to limit our results to a document with author "Mike" we do: + + > run $ findOne $ select ["author" =: "Mike"] "posts" + +If we try with a different author, like "Eliot", we'll get no result: + + > run $ findOne $ select ["author" =: "Eliot"] "posts" + +`fetch` is the same as `findOne` except it fails if no document matches. + +### Inserting Many + +In order to make querying a little more interesting, let's insert a few more documents. In addition to inserting a single document, we can also perform bulk insert operations, by using the `insertMany` function which accepts a list of documents to be inserted. It sends only a single write operation to the server. + + > let post1 = ["author" =: "Mike", "text" =: "Another post!", "tags" =: ["bulk", "insert"]] + > let post2 = ["author" =: "Eliot", "title" =: "MongoDB is fun", "text" =: "and pretty easy too!"] + > run $ insertMany "posts" [post1, post2] + +Note that *post2* has a different shape than the other posts; it has no "tags" field and a new "title" field. Documents in the same collection can have different schemas. + +### Reading Many + +To retrieve more than a single document we use the `find` function. `find` returns a `Cursor`, which allows us to +iterate over the matching documents. There are few ways in which we can iterate: `next` gets the documents one at a time, and `rest` gets all (remaining) documents in the query result. + + > run $ find (select ["author" =: "Mike"] "posts") >>= rest + +`next` automatically closes the cursor when the last document has been read out of it, similarly for `rest`. Otherwise, you should close a cursor if you don't exhaust it via `closeCursor`. + +### Counting + +You can count how many documents are in an entire collection: + + > run $ count $ select [] "posts" + +Or count how many documents match a query: + + > run $ count $ select ["title" =: ["$exits" =: True]] "posts" + +### Sorting + +`sort` takes the fields to sort by and whether ascending (1) or descending (-1) + + > run $ find (select [] "posts") {sort = ["author" =: 1, "title" =: 1]} >> rest + +If you don't sort, documents are returned in *natural* order, which is the order found on disk. Natural order is not particularly useful because, although the order is often close to insertion order, it is not guaranteed. + +### Projecting + +`project` returns partial documents containing only the fields you include (1). However, *_id* is always included unless you exclude it (0). + + > run $ find (select [] "posts") {project = ["author" =: 1, "_id" =: 0]} + +### Updating + +`save` updates an existing document + + > run $ fetch (select ["author" =: "Eliot"] "posts") >>= save "posts" . merge ["tags" =: ["hello"]] + +or inserts a new document if its *_id* is new or missing + + > run $ save "posts" ["author" =: "Tony", "text" =: "Haskell rocks"] + +`modify` updates every document matching selection according to supplied modifier. For example: + + > run $ modify (select [] "posts") ["$push" =: ["tags" =: "new"]] + +### Deleting + +`delete` deletes all documents matching selection. `deleteOne` deletes one document matching selection (the first one in *natural* order), if any + + > run $ delete $ select ["author" =: "Homer"] "posts" -- none deleted in this case + +### Documentation + +Documentation on the Mongo query language, i.e. the selector document, modifier document, etc., can be found at the [MongoDB Developer Zone](http://www.mongodb.org/display/DOCS/Developer+Zone). + +Haddock generated documentation on this Haskell driver can be found on [Hackage](http://hackage.haskell.org/package/mongoDB). + diff --git a/mongoDB.cabal b/mongoDB.cabal index 76c3499..0e1d359 100644 --- a/mongoDB.cabal +++ b/mongoDB.cabal @@ -22,8 +22,8 @@ stability: alpha homepage: http://github.com/TonyGen/mongoDB-haskell package-url: bug-reports: -synopsis: MongoDB driver -description: This module lets you connect to MongoDB (www.mongodb.org) and do inserts, queries, updates, etc. Please see the example in Database.MongoDB and the tutorial from the homepage. +synopsis: Driver (client) for MongoDB, a free, scalable, fast, document database management system +description: This package lets you connect to MongoDB servers and update/query their data. Please see the example in Database.MongoDB and the tutorial from the homepage. For information about MongoDB itself, see www.mongodb.org. category: Database author: Tony Hannan & Scott Parish tested-with: diff --git a/tutorial.md b/tutorial.md deleted file mode 100644 index 596c0b1..0000000 --- a/tutorial.md +++ /dev/null @@ -1,245 +0,0 @@ -MongoDB Haskell Mini Tutorial ------------------------------ - - __Updated:__ Oct 2010 - -This is a mini tutorial to get you up and going with the basics -of the Haskell mongoDB drivers. You will need the mongoDB driver -installed as well as mongo itself. Prompts used in this tutorial are: - - $ = command line prompt - > = ghci repl prompt - - -Installing Haskell Bindings ---------------------------- - -From Hackage using cabal: - - $ cabal install mongoDB - -From Source: - - $ git clone git://github.com/TonyGen/mongoDB-haskell.git mongoDB - $ cd mongoDB - $ runhaskell Setup.hs configure - $ runhaskell Setup.hs build - $ runhaskell Setup.hs install - -Getting Ready -------------- - -Start a MongoDB instance for us to play with in a separate terminal window: - - $ mongod --dbpath - -Start up a haskell repl: - - $ ghci - -Import the MongoDB driver library, and set -OverloadedStrings so literal strings are converted to UTF-8 automatically. - - > import Database.MongoDB - > import Data.CompactString () - > :set -XOverloadedStrings - -Making A Connection -------------------- -Create a connection pool for your mongo server, using the standard port (27017): - - > pool <- newConnPool 1 $ host "127.0.0.1" - -or for a non-standard port - - > pool <- newConnPool 1 $ Host "127.0.0.1" (PortNumber 30000) - -*newConnPool* takes the connection pool size, and the host to connect to. It returns -a *ConnPool*, which is a potential pool of TCP connections. They are not created until first -access, so it is not possible to get a connection error here. - -Note, plain IO code in this driver never raises an exception unless it invokes third party IO -code that does. Driver code that may throw an exception says so in its Monad type, -for example, *ErrorT IOError IO a*. - -Access monad -------------------- - -A query/update executes in an *Access* monad, which has access to a -*Pipe*, *WriteMode*, and read-mode (*MasterSlaveOk*), and may throw a *Failure*. -A Pipe is a single TCP connection. - -To run an Access action (monad), supply WriteMode, MasterOrSlaveOk, Connection, -and action to *access*. For example, to get a list of all the database on the server: - - > access safe Master pool allDatabases - -*access* return either Left Failure or Right result. Failure means there was a connection failure -or a read or write exception like cursor expired or duplicate key insert. - -Since we are working in ghci, which requires us to start from the -IO monad every time, we'll define a convenient *run* function that takes an -action and executes it against our "test" database on the server we -just connected to, with typical write and read mode: - - > let run action = access safe Master pool $ use (Database "test") action - -*use* adds a *Database* to the action context, so query/update operations know which -database to operate on. - -Databases and Collections ------------------------------ - -MongoDB can store multiple databases -- separate namespaces -under which collections reside. - -As before, you can obtain the list of databases available on a connection: - - > run allDatabases - -The "test" database in context is ignored in this case because *allDatabases* -is not a query on a specific database but on the server as a whole. - -Databases and collections do not need to be created, just start using -them and MongoDB will automatically create them for you. In the below examples -we'll be using the database "test" (captured in *run* above) and the colllection "posts". - -You can obtain a list of all collections in the "test" database: - - > run allCollections - -Documents ---------- - -Data in MongoDB is represented (and stored) using JSON-style -documents, called BSON documents. A *Document" is simply a list of *Field*s, -where each field is a named value. A *Value" is a basic type like Bool, Int, Float, String, Time; -a special BSON value like Binary, Javascript, ObjectId; a (embedded) -Document; or a list of values. Here's an example document which could -represent a blog post: - - > import Data.Time - > now <- getCurrentTime - > :{ - let post = ["author" =: "Mike", - "text" =: "My first blog post!", - "tags" =: ["mongoDB", "Haskell"], - "date" =: now] - :} - -Inserting a Document -------------------- - -To insert a document into a collection we can use the *insert* function: - - > run $ insert "posts" post - -When a document is inserted a special field, *_id*, is automatically -added if the document doesn't already contain that field. The value -of *_id* must be unique across the collection. *insert* returns the -value of *_id* for the inserted document. For more information, see -the [documentation on _id](http://www.mongodb.org/display/DOCS/Object+IDs). - -After inserting the first document, the posts collection has actually -been created on the server. We can verify this by listing all of the -collections in our database: - - > run allCollections - -Note, the system.indexes collection is a special internal collection -that was created automatically. - -Getting a single document with findOne -------------------------------------- - -The most basic type of query that can be performed in MongoDB is -*findOne*. This method returns a single document matching a query (or -*Nothing* if there are no matches). It is useful when you know there is -only one matching document, or are only interested in the first -match. Here we use *findOne* to get the first document from the posts -collection: - - > run $ findOne (select [] "posts") - -The result is a document matching the one that we inserted previously. -Note, the returned document contains the *_id* field, which was automatically -added on insert. - -*findOne* also supports querying on specific elements that the -resulting document must match. To limit our results to a document with -author "Mike" we do: - - > run $ findOne (select ["author" =: "Mike"] "posts") - -If we try with a different author, like "Eliot", we'll get no result: - - > run $ findOne (select ["author" =: "Eliot"] "posts") - -Bulk Inserts ------------- - -In order to make querying a little more interesting, let's insert a -few more documents. In addition to inserting a single document, we can -also perform bulk insert operations, by using the *insertMany* function -which accepts a list of documents to be inserted. It send only a single -command to the server: - - > now <- getCurrentTime - > :{ - let post1 = ["author" =: "Mike", - "text" =: "Another post!", - "tags" =: ["bulk", "insert"], - "date" =: now] - :} - > :{ - let post2 = ["author" =: "Eliot", - "title" =: "MongoDB is fun", - "text" =: "and pretty easy too!", - "date" =: now] - :} - > run $ insertMany "posts" [post1, post2] - -* Note that post2 has a different shape than the other posts - there -is no "tags" field and we've added a new field, "title". This is what we -mean when we say that MongoDB is schema-free. - -Querying for More Than One Document ------------------------------------- - -To get more than a single document as the result of a query we use the -*find* method. *find* returns a *Cursor*, which allows us to -iterate over all matching documents. There are several ways in which -we can iterate: we can call *next* to get documents one at a time -or we can get all the results by applying the cursor to *rest*: - - > Right cursor <- run $ find (select ["author" =: "Mike"] "posts") - > run $ rest cursor - -Of course you can use bind (*>>=*) to combine these into one line: - - > run $ find (select ["author" =: "Mike"] "posts") >>= rest - -Note, *next* automatically closes the cursor when the last -document has been read out of it. Similarly, *rest* automatically -closes the cursor after returning all the results. - -Counting --------- - -We can count how many documents are in an entire collection: - - > run $ count (select [] "posts") - -Or count how many documents match a query: - - > run $ count (select ["author" =: "Mike"] "posts") - -Advanced Queries -------------- - -To do - -Indexing --------- - -To do