Wednesday, November 25, 2015

Remove Primary Replica Node From 
MongoDB Sharded Cluster


First connect to replica set where you need to perform the removal of node. 

In my case, my primary replica node went down, hence I have to connect to secondary member. 

My Sharded cluster looks like following..


 shards:
        {  "_id" : "shard0000",  "host" : "rs0/10.20.176.93:30001,10.20.176.93:31001" }
        {  "_id" : "shard0001",  "host" : "rs1/10.20.176.93:30002,10.20.176.93:31002" }

        {  "_id" : "shard0002",  "host" : "rs2/10.20.176.93:30003,10.20.176.93:31003" }


[root@lpdosput00249 ~]# mongo --host lpdosput00249 --port 31001
MongoDB shell version: 3.0.7
connecting to: lpdosput00249:31001/test

-- Store the configuration in temporary object 

rs0:SECONDARY> cfg = rs.conf()

rs0:SECONDARY> cfg

{
        "_id" : "rs0",
        "version" : 2,
        "members" : [
                {
                        "_id" : 0,
                        "host" : "10.20.176.93:30001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1,
                        "tags" : {

                        },

                        "slaveDelay" : 0,
                        "votes" : 1
                },
                {
                        "_id" : 1,
                        "host" : "10.20.176.93:31001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1,
                        "tags" : {

                        },

                        "slaveDelay" : 0,
                        "votes" : 1
                }
        ],
        "settings" : {
                "chainingAllowed" : true,
                "heartbeatTimeoutSecs" : 10,
                "getLastErrorModes" : {

                },

                "getLastErrorDefaults" : {
                        "w" : 1,
                        "wtimeout" : 0
                }
        }
}

In my scenario, I have to promote my secondary as primary as my primary node went offline and could not be reached. 

So I connected to secondary node. One way to promote the secondary to primary was by in creasing the priority of the node. So I tried following ways to see if that works.

rs0:SECONDARY> cfg.members[0].priority = 0.5
0.5

rs0:SECONDARY> cfg.members[1].priority = 1

1
rs0:SECONDARY> cfg
{
        "_id" : "rs0",
        "version" : 2,
        "members" : [
                {
                        "_id" : 0,
                        "host" : "10.20.176.93:30001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 0.5, <-- Primary node has now Lower priority
                        "tags" : {

                        },

                        "slaveDelay" : 0,
                        "votes" : 1
                },
                {
                        "_id" : 1,
                        "host" : "10.20.176.93:31001",
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1, <-- Secondary node has now higher priority
                        "tags" : {

                        },

                        "slaveDelay" : 0,
                        "votes" : 1
                }
        ],
        "settings" : {
                "chainingAllowed" : true,
                "heartbeatTimeoutSecs" : 10,
                "getLastErrorModes" : {

                },

                "getLastErrorDefaults" : {
                        "w" : 1,
                        "wtimeout" : 0
                }
        }
}
rs0:SECONDARY>

Reconfigure the cluster with new configuration.

rs0:SECONDARY> rs.reconfig(cfg)
{
        "ok" : 0,
        "errmsg" : "replSetReconfig should only be run on PRIMARY, but my state is SECONDARY; use the \"force\" argument to override",
        "code" : 10107
}

rs0:SECONDARY> rs.reconfig(cfg, {force: 1} )

{ "ok" : 1 }


However, even after the re configuration, the secondary was still not promoted to primary. The reason for that is that primary node went offline. Hence only option I was left with is to remove the node forcefully as follows.

rs0:SECONDARY> cfg.members.splice(0,1)
[
        {
                "_id" : 0,
                "host" : "10.20.176.93:30001",
                "arbiterOnly" : false,
                "buildIndexes" : true,
                "hidden" : false,
                "priority" : 0.5,
                "tags" : {

                },
                "slaveDelay" : 0,
                "votes" : 1
        }
]
rs0:SECONDARY>
rs0:SECONDARY>
rs0:SECONDARY> cfg
{
        "_id" : "rs0",
        "version" : 112582,
        "members" : [
                {
                        "_id" : 1,
                        "host" : "10.20.176.93:31001", <-- you see only one node now.
                        "arbiterOnly" : false,
                        "buildIndexes" : true,
                        "hidden" : false,
                        "priority" : 1,
                        "tags" : {

                        },
                        "slaveDelay" : 0,
                        "votes" : 1
                }
        ],
        "settings" : {
                "chainingAllowed" : true,
                "heartbeatTimeoutSecs" : 10,
                "getLastErrorModes" : {

                },
                "getLastErrorDefaults" : {
                        "w" : 1,
                        "wtimeout" : 0
                }
        }
}


rs0:SECONDARY> rs.reconfig(cfg)
{
        "ok" : 0,
        "errmsg" : "replSetReconfig should only be run on PRIMARY, but my state is SECONDARY; use the \"force\" argument to override",
        "code" : 10107
}

rs0:SECONDARY> rs.reconfig(cfg, {force: true} )
{ "ok" : 1 }
rs0:PRIMARY>

Since this node was part of sharded cluster, So after the removal of the replica member the status of the sharded cluster looks like following. 

 shards:
        {  "_id" : "shard0000",  "host" : "rs0/10.20.176.93:31001"}
        {  "_id" : "shard0001",  "host" : "rs1/10.20.176.93:30002,10.20.176.93:31002" }
        {  "_id" : "shard0002",  "host" : "rs2/10.20.176.93:30003,10.20.176.93:31003" }