Wednesday, November 25, 2015

Remove Primary Replica Node From 
MongoDB Sharded Cluster


First connect to replica set where you need to perform the removal of node. 

In my case, my primary replica node went down, hence I have to connect to secondary member. 

My Sharded cluster looks like following..


 shards:
        {  "_id" : "shard0000",  "host" : "rs0/10.20.176.93:30001,10.20.176.93:31001" }
        {  "_id" : "shard0001",  "host" : "rs1/10.20.176.93:30002,10.20.176.93:31002" }

        {  "_id" : "shard0002",  "host" : "rs2/10.20.176.93:30003,10.20.176.93:31003" }


[root@lpdosput00249 ~]# mongo --host lpdosput00249 --port 31001
MongoDB shell version: 3.0.7
connecting to: lpdosput00249:31001/test

-- Store the configuration in temporary object 

rs0:SECONDARY> cfg = rs.conf()

rs0:SECONDARY> cfg

{
        "_id" : "rs0",
        "version" : 2,
        "members" : [
                {
                        "_id" : 0,
                        "host" : "10.20.176.93:30001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1,
                        "tags" : {

                        },

                        "slaveDelay" : 0,
                        "votes" : 1
                },
                {
                        "_id" : 1,
                        "host" : "10.20.176.93:31001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1,
                        "tags" : {

                        },

                        "slaveDelay" : 0,
                        "votes" : 1
                }
        ],
        "settings" : {
                "chainingAllowed" : true,
                "heartbeatTimeoutSecs" : 10,
                "getLastErrorModes" : {

                },

                "getLastErrorDefaults" : {
                        "w" : 1,
                        "wtimeout" : 0
                }
        }
}

In my scenario, I have to promote my secondary as primary as my primary node went offline and could not be reached. 

So I connected to secondary node. One way to promote the secondary to primary was by in creasing the priority of the node. So I tried following ways to see if that works.

rs0:SECONDARY> cfg.members[0].priority = 0.5
0.5

rs0:SECONDARY> cfg.members[1].priority = 1

1
rs0:SECONDARY> cfg
{
        "_id" : "rs0",
        "version" : 2,
        "members" : [
                {
                        "_id" : 0,
                        "host" : "10.20.176.93:30001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 0.5, <-- Primary node has now Lower priority
                        "tags" : {

                        },

                        "slaveDelay" : 0,
                        "votes" : 1
                },
                {
                        "_id" : 1,
                        "host" : "10.20.176.93:31001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1, <-- Secondary node has now higher priority
                        "tags" : {

                        },

                        "slaveDelay" : 0,
                        "votes" : 1
                }
        ],
        "settings" : {
                "chainingAllowed" : true,
                "heartbeatTimeoutSecs" : 10,
                "getLastErrorModes" : {

                },

                "getLastErrorDefaults" : {
                        "w" : 1,
                        "wtimeout" : 0
                }
        }
}
rs0:SECONDARY>

Reconfigure the cluster with new configuration.

rs0:SECONDARY> rs.reconfig(cfg)
{
        "ok" : 0,
        "errmsg" : "replSetReconfig should only be run on PRIMARY, but my state is SECONDARY; use the \"force\" argument to override",
        "code" : 10107
}

rs0:SECONDARY> rs.reconfig(cfg, {force: 1} )

{ "ok" : 1 }


However, even after the re configuration, the secondary was still not promoted to primary. The reason for that is that primary node went offline. Hence only option I was left with is to remove the node forcefully as follows.

rs0:SECONDARY> cfg.members.splice(0,1)
[
        {
                "_id" : 0,
                "host" : "10.20.176.93:30001",
                "arbiterOnly" : false,
                "buildIndexes" : true,
                "hidden" : false,
                "priority" : 0.5,
                "tags" : {

                },
                "slaveDelay" : 0,
                "votes" : 1
        }
]
rs0:SECONDARY>
rs0:SECONDARY>
rs0:SECONDARY> cfg
{
        "_id" : "rs0",
        "version" : 112582,
        "members" : [
                {
                        "_id" : 1,
                        "host" : "10.20.176.93:31001", <-- you see only one node now.
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1,
                        "tags" : {

                        },
                        "slaveDelay" : 0,
                        "votes" : 1
                }
        ],
        "settings" : {
                "chainingAllowed" : true,
                "heartbeatTimeoutSecs" : 10,
                "getLastErrorModes" : {

                },
                "getLastErrorDefaults" : {
                        "w" : 1,
                        "wtimeout" : 0
                }
        }
}


rs0:SECONDARY> rs.reconfig(cfg)
{
        "ok" : 0,
        "errmsg" : "replSetReconfig should only be run on PRIMARY, but my state is SECONDARY; use the \"force\" argument to override",
        "code" : 10107
}

rs0:SECONDARY> rs.reconfig(cfg, {force: true} )
{ "ok" : 1 }
rs0:PRIMARY>

Since this node was part of sharded cluster, So after the removal of the replica member the status of the sharded cluster looks like following. 

 shards:
        {  "_id" : "shard0000",  "host" : "rs0/10.20.176.93:31001"}
        {  "_id" : "shard0001",  "host" : "rs1/10.20.176.93:30002,10.20.176.93:31002" }
        {  "_id" : "shard0002",  "host" : "rs2/10.20.176.93:30003,10.20.176.93:31003" }


Thursday, November 19, 2015


Convert Stand Alone MongoDB Sharded Cluster to 
Replicated Sharded Cluster



In this post I will describe the process to convert the Non replicated sharded cluster to replicated sharded cluster.

My current configuration - All my processes are running on single cloud host. I am using different ports to specify different mongo process instances.

1  Config server
1  Mongos Query Router
3  Shard servers for data.

Processes are as follows.

root      9518     1  0 Nov13 ?        00:14:17 mongod --configsvr --dbpath /mongodb/mongo-metadata1 --logpath /var/log/mongodb/mongod-configsvr1.log --port 27001 --fork

root      9534     1  0 Nov13 ?        00:09:25 mongod --configsvr --dbpath /mongodb/mongo-metadata2 --logpath /var/log/mongodb/mongod-configsvr2.log --port 27002 --fork

root     10008     1  0 Nov13 ?        00:04:47 mongos --configdb lpdosput00:27001 --port 20001 --fork --logpath /var/log/mongodb/mongos1.log

root     10087     1  0 Nov13 ?        00:10:09 mongod -f /etc/mongodb1.conf
root     10213     1  0 Nov13 ?        00:08:15 mongod -f /etc/mongodb2.conf
root     10228     1  0 Nov13 ?        00:08:17 mongod -f /etc/mongodb3.conf

My mongo config files are as follows.

# mongod.conf

# for documentation of all options, see:
#   http://docs.mongodb.org/manual/reference/configuration-options/

# where to write logging data.
systemLog:
  destination: file
  logAppend: true
  path: /var/log/mongodb/mongodb1.log

# Where and how to store data.
storage:
  dbPath: /mongodb/data1
  journal:
    enabled: true
#  engine:
#  mmapv1:
#  wiredTiger:

# how the process runs
processManagement:
  fork: true  # fork and run in background
  pidFilePath: /var/run/mongodb/mongod.pid  # location of pidfile

# network interfaces
net:
  port: 30001
  bindIp: 10.20.176.93  # Listen to local interface only, comment to listen on all interfaces.

#security:

#operationProfiling:

replication:
 oplogSizeMB: 30
 replSetName: rs0

#sharding:

## Enterprise-Only Options

#auditLog:

#snmp:

-- Now stop the first instance as follows. As we need to convert this single shard server to replica set.

-- Stop the instance and start it with modified configuration 

[root@lpdosput00 ~]# kill 10087
root@lpdosput00 ~]# mongod -f /etc/mongodb1.conf
mongos> sh.status()
--- Sharding Status ---
  sharding version: {
        "_id" : 1,
        "minCompatibleVersion" : 5,
        "currentVersion" : 6,
        "clusterId" : ObjectId("564659a6ecf5ebdaa8b6fec9")
}
  shards:
        {  "_id" : "shard0000",  "host" : "lpdosput00:30001" }
        {  "_id" : "shard0001",  "host" : "lpdosput00:30002" }
        {  "_id" : "shard0002",  "host" : "lpdosput00:30003" }
  balancer:
        Currently enabled:  yes
        Currently running:  no
        Failed balancer rounds in last 5 attempts:  0
        Migration Results for the last 24 hours:
                No recent migrations
  databases:
        {  "_id" : "admin",  "partitioned" : false,  "primary" : "config" }
        {  "_id" : "testdb",  "partitioned" : true,  "primary" : "shard0000" }
        {  "_id" : "test",  "partitioned" : false,  "primary" : "shard0000" }

mongos> rs.status()
{
        "info" : "mongos",
        "ok" : 0,
        "errmsg" : "replSetGetStatus is not supported through mongos"
}
mongos> exit

-- To create replica set one needs to connect to thru individual mongodb instance.

[root@lpdosput00 data1]# mongo --host lpdosput00 --port 30001

> rs.status()
{
        "info" : "run rs.initiate(...) if not yet done for the set",
        "ok" : 0,
        "errmsg" : "no replset config has been received",
        "code" : 94,
        "$gleStats" : {
                "lastOpTime" : Timestamp(0, 0),
                "electionId" : ObjectId("000000000000000000000000")
        }
}

-- Initialize the replica set as follows.

> rs.initiate()
{
        "info2" : "no configuration explicitly specified -- making one",
        "me" : "10.20.176.93:30001",
        "ok" : 1,
        "$gleStats" : {
                "lastOpTime" : Timestamp(1447882950, 1),
                "electionId" : ObjectId("000000000000000000000000")
        }
}
rs0:OTHER> rs.status()
{
        "set" : "rs0",
        "date" : ISODate("2015-11-18T21:42:37.538Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "10.20.176.93:30001",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 214,
                        "optime" : Timestamp(1447882957, 1),
                        "optimeDate" : ISODate("2015-11-18T21:42:37Z"),
                        "electionTime" : Timestamp(1447882950, 2),
                        "electionDate" : ISODate("2015-11-18T21:42:30Z"),
                        "configVersion" : 1,
                        "self" : true
                }
        ],
        "ok" : 1,
        "$gleStats" : {
                "lastOpTime" : Timestamp(1447882950, 1),
                "electionId" : ObjectId("564cf0c6591018076b991102")
        }
}

-- and prompt will change to ..
rs0:PRIMARY>

-- Modify and start the second and third instance 

rs1:PRIMARY> rs.status()
{
        "set" : "rs1",
        "date" : ISODate("2015-11-18T21:49:43.967Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "10.20.176.93:30002",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 36,
                        "optime" : Timestamp(1447883374, 1),
                        "optimeDate" : ISODate("2015-11-18T21:49:34Z"),
                        "electionTime" : Timestamp(1447883374, 2),
                        "electionDate" : ISODate("2015-11-18T21:49:34Z"),
                        "configVersion" : 1,
                        "self" : true
                }
        ],
        "ok" : 1
}

-- Replica set rs2 status.. rs2:PRIMARY> rs.status()
{
        "set" : "rs2",
        "date" : ISODate("2015-11-18T21:54:33.369Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "10.20.176.93:30003",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 115,
                        "optime" : Timestamp(1447883568, 1),
                        "optimeDate" : ISODate("2015-11-18T21:52:48Z"),
                        "electionTime" : Timestamp(1447883568, 2),
                        "electionDate" : ISODate("2015-11-18T21:52:48Z"),
                        "configVersion" : 1,
                        "self" : true
                }
        ],
        "ok" : 1
}

Now all the previous 3 standalone shards are configured as replicated shards i.e. rs0, rs1 and rs2. 
Now one need to add replicated members in each shard. 


Spin new mongodb instance with modified parameter and make sure you use proper replicaSet name in config file.

# mongod -f /etc/mongodb1_2.conf - on port 31001
# mongod -f /etc/mongodb2_2.conf - on port 31002
# mongod -f /etc/mongodb3_2.conf - on port 31003

-- Add secondary node to replica set rs0 rs0:PRIMARY> rs.add("10.20.176.93:31001")
{
        "ok" : 1,
        "$gleStats" : {
                "lastOpTime" : Timestamp(1447884829, 3),
                "electionId" : ObjectId("564cf0c6591018076b991102")
        }
}

rs0:PRIMARY> rs.conf()
{
        "_id" : "rs0",
        "version" : 2,
        "members" : [
                {
                        "_id" : 0,
                        "host" : "10.20.176.93:30001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1,
                        "tags" : {

                        },
                        "slaveDelay" : 0,
                        "votes" : 1
                },
                {
                        "_id" : 1,
                        "host" : "10.20.176.93:31001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1,
                        "tags" : {

                        },
                        "slaveDelay" : 0,
                        "votes" : 1
                }
}

-- Connect to secondary instance of rs0

[root@lpdosput00 mongodb]# mongo --host lpdosput00 --port 31001
MongoDB shell version: 3.0.7
connecting to: lpdosput00:31001/test
rs0:SECONDARY> 
rs0:SECONDARY> rs.status()
{
        "set" : "rs0",
        "date" : ISODate("2015-11-18T22:14:53.218Z"),
        "myState" : 2,
        "syncingTo" : "10.20.176.93:30001",
        "members" : [
                {
                        "_id" : 0,
                        "name" : "10.20.176.93:30001",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 63,
                        "optime" : Timestamp(1447884890, 4),
                        "optimeDate" : ISODate("2015-11-18T22:14:50Z"),
                        "lastHeartbeat" : ISODate("2015-11-18T22:14:52.037Z"),
                        "lastHeartbeatRecv" : ISODate("2015-11-18T22:14:51.940Z"),
                        "pingMs" : 0,
                        "electionTime" : Timestamp(1447882950, 2),
                        "electionDate" : ISODate("2015-11-18T21:42:30Z"),
                        "configVersion" : 2
                },
                {
                        "_id" : 1,
                        "name" : "10.20.176.93:31001",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 335,
                        "optime" : Timestamp(1447884890, 4),
                        "optimeDate" : ISODate("2015-11-18T22:14:50Z"),
                        "syncingTo" : "10.20.176.93:30001",
                        "configVersion" : 2,
                        "self" : true
                }
        ],
        "ok" : 1
}

So now you have first replica set configured and running. but if you check cluster status the changes are not reflected yet.

mongos> sh.status()
--- Sharding Status ---
  sharding version: {
        "_id" : 1,
        "minCompatibleVersion" : 5,
        "currentVersion" : 6,
        "clusterId" : ObjectId("564659a6ecf5ebdaa8b6fec9")
}
  shards:
        {  "_id" : "shard0000",  "host" : "lpdosput00:30001" }
        {  "_id" : "shard0001",  "host" : "lpdosput00:30002" }
        {  "_id" : "shard0002",  "host" : "lpdosput00:30003" }


The reason for that is that you have to now modify the sharding cluster configuration and add replica set. 
Since the new replica set is part of a sharded cluster, change the shard host information in the config database by doing the following:
Connect to one of the sharded cluster’s mongos instance and issue a command in the following form:
mongos> db.getSiblingDB("config").shards.save( {_id: "shard0000", host: "rs0/lpdosput00:30001,lpdosput00:31001" } )
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })

mongos> sh.status()
--- Sharding Status ---
  sharding version: {
        "_id" : 1,
        "minCompatibleVersion" : 5,
        "currentVersion" : 6,
        "clusterId" : ObjectId("564659a6ecf5ebdaa8b6fec9")
}
  shards:
       {  "_id" : "shard0000",  "host" : "rs0/lpdosput00:30001,lpdosput00:31001" }
        {  "_id" : "shard0001",  "host" : "lpdosput00:30002" }
        {  "_id" : "shard0002",  "host" : "lpdosput00:30003" }

Now as you can see the first replica set is the part of the cluster now. 
Repeat the process with the second and third instance of the replica set rs1 and rs2.

[root@lpdosput00 mongodb]# mongod  -f /etc/mongodb2_2.conf
2015-11-18T15:34:06.253-0700 W CONTROL  Option: storage.smallFiles is deprecated. Please use storage.mmapv1.smallFiles instead.
about to fork child process, waiting until server is ready for connections.
forked process: 20440
child process started successfully, parent exiting

[root@lpdosput00 mongodb]# mongod  -f /etc/mongodb3_2.conf
2015-11-18T15:34:32.954-0700 W CONTROL  Option: storage.smallFiles is deprecated. Please use storage.mmapv1.smallFiles instead.
about to fork child process, waiting until server is ready for connections.
forked process: 20484
child process started successfully, parent exiting

--Now connect to primary of the rs1 replica set directly and add second replica node.

rs1:PRIMARY> rs.add("10.20.176.93:31002")
{
        "ok" : 1,
        "$gleStats" : {
                "lastOpTime" : Timestamp(1447886228, 1),
                "electionId" : ObjectId("564cf26eb1f7baeb8735f2b3")
        }
}
rs1:PRIMARY> rs.status()
{
        "set" : "rs1",
        "date" : ISODate("2015-11-18T22:39:03.897Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "10.20.176.93:30002",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 2996,
                        "optime" : Timestamp(1447886228, 1),
                        "optimeDate" : ISODate("2015-11-18T22:37:08Z"),
                        "electionTime" : Timestamp(1447883374, 2),
                        "electionDate" : ISODate("2015-11-18T21:49:34Z"),
                        "configVersion" : 2,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "10.20.176.93:31002",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 115,
                        "optime" : Timestamp(1447886228, 1),
                        "optimeDate" : ISODate("2015-11-18T22:37:08Z"),
                        "lastHeartbeat" : ISODate("2015-11-18T22:39:02.274Z"),
                        "lastHeartbeatRecv" : ISODate("2015-11-18T22:39:02.334Z"),
                        "pingMs" : 0,
                        "configVersion" : 2
                }
        ],
        "ok" : 1,

-- Repeat the process for the rs2 replica set and add second replica node...

[root@lpdosput00 mongodb]# mongo --host lpdosput00 --port 30003
MongoDB shell version: 3.0.7
connecting to: lpdosput00:30003/test

rs2:PRIMARY> rs.add("10.20.176.93:31003")
{ "ok" : 1 }

rs2:PRIMARY>
rs2:PRIMARY> rs.status()
{
        "set" : "rs2",
        "date" : ISODate("2015-11-18T22:40:32.225Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "10.20.176.93:30003",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 2874,
                        "optime" : Timestamp(1447886424, 1),
                        "optimeDate" : ISODate("2015-11-18T22:40:24Z"),
                        "electionTime" : Timestamp(1447883568, 2),
                        "electionDate" : ISODate("2015-11-18T21:52:48Z"),
                        "configVersion" : 2,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "10.20.176.93:31003",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 7,
                        "optime" : Timestamp(1447886424, 1),
                        "optimeDate" : ISODate("2015-11-18T22:40:24Z"),
                        "lastHeartbeat" : ISODate("2015-11-18T22:40:30.644Z"),
                        "lastHeartbeatRecv" : ISODate("2015-11-18T22:40:30.707Z"),
                        "pingMs" : 0,
                        "configVersion" : 2
                }
        ],
        "ok" : 1

Now connect to mongos instance and modify the sharded cluster configuration for replica set rs1 and rs2.

[root@lpdosput00 mongodb]# mongo --host lpdosput00 --port 20001
MongoDB shell version: 3.0.7
connecting to: lpdosput00:20001/test

mongos> db.getSiblingDB("config").shards.save( {_id: "shard0001", host: "rs1/lpdosput00:30002,lpdosput00:31002" } )
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })

-- Check the status of cluster 
mongos>
mongos> db.getSiblingDB("config").shards.save( {_id: "shard0002", host: "rs2/lpdosput00:30003,lpdosput00:31003" } )
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })

mongos>
mongos> sh.status()
--- Sharding Status ---
  sharding version: {
        "_id" : 1,
        "minCompatibleVersion" : 5,
        "currentVersion" : 6,
        "clusterId" : ObjectId("564659a6ecf5ebdaa8b6fec9")
}
  shards:
        {  "_id" : "shard0000",  "host" : "rs0/10.20.176.93:30001,10.20.176.93:31001" }
        {  "_id" : "shard0001",  "host" : "rs1/10.20.176.93:30002,10.20.176.93:31002" }
        {  "_id" : "shard0002",  "host" : "rs2/10.20.176.93:30003,10.20.176.93:31003" }

Once configuration done, pls restart the mongos instance to make sure it picks up the correct config from config server. If possible, restart all components of the replica sets (i.e., all mongos and all shard mongod instances).

[root@lpdosput00 mongodb]# ps -ef | grep mongos

root     10008     1  0 Nov13 ?        00:04:51 mongos --configdb lpdosput00:27001 --port 20001 --fork --logpath /var/log/mongodb/mongos1.log

[root@lpdosput00  mongodb]# kill 10008

[root@lpdosput00  mongodb]#  mongos --configdb lpdosput00:27001 --port 20001 --fork --logpath /var/log/mongodb/mongos1.log
2015-11-18T15:45:16.933-0700 W SHARDING running with 1 config server should be done only for testing purposes and is not recommended for production
about to fork child process, waiting until server is ready for connections.
forked process: 21690
child process started successfully, parent exiting

[root@lpdosput00  mongodb]# ps -ef | grep mongos
root     21690     1  0 15:45 ?        00:00:00 mongos --configdb lpdosput00:27001 --port 20001 --fork --logpath /var/log/mongodb/mongos1.log

-- Create sharded collection...

mongos> use testdb
switched to db testdb
mongos> show collections
system.indexes
test_collection
mongos> sh.shardCollection( "testdb.test_collection", { x: "hashed" } )

Hope this will help you to convert your Normal Sharded Cluster to Replicated one.