Sample Header Ad - 728x90

ADD new node Replication in MongoDB

0 votes
1 answer
211 views
Here i found some trouble. and i don't known why! Now I have one replication with 3 members (1 of primary, 2 of secondary), And i want to add another one (secondary) to sync the data. the data has 367G . When finished sync the data ,the new secondary in the replication has been down itself, then i start this node, he delete the data and starting sync again. I don't known why, please help me. 1. mongodb version 3.3.10 2. the oplog in primary has 9G 3. the disk have more than 654G , 38%Use 4. when i send this command in mongo shell rs.add({host:'172.16.30.123:27017',priority:0,votes:0}) rs.status() { "_id": 3, "name": "172.16.30.123:27017", "health": 1, "state": 5, "stateStr": "STARTUP2", "uptime": 10029, "optime": { "ts": Timestamp(0, 0), "t": NumberLong("-1") }, "optimeDurable": { "ts": Timestamp(0, 0), "t": NumberLong("-1") }, "optimeDate": ISODate("1970-01-01T00:00:00Z"), "optimeDurableDate": ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat": ISODate("2018-05-10T12:28:40.256Z"), "lastHeartbeatRecv": ISODate("2018-05-10T12:28:40.722Z"), "pingMs": NumberLong("0"), "syncingTo": "172.16.30.225:27017", "configVersion": 9 }, now,you can see the node is syncingto 172.16.30.225 here is the node's log 2018-05-10T17:01:14.247+0800 I REPL [InitialSyncInserters-0] starting to run synchronous task on runner. 2018-05-10T17:01:14.373+0800 I REPL [InitialSyncInserters-0] done running the synchronous task. 2018-05-10T17:01:14.458+0800 I REPL [InitialSyncInserters-0] starting to run synchronous task on runner. 2018-05-10T17:01:14.575+0800 I REPL [InitialSyncInserters-0] done running the synchronous task. 2018-05-10T17:01:14.575+0800 I REPL [InitialSyncInserters-0] starting to run synchronous task on runner. 2018-05-10T17:01:14.989+0800 I REPL [InitialSyncInserters-0] done running the synchronous task. 2018-05-10T17:01:14.989+0800 I REPL [replication-82] data clone finished, status: OK 2018-05-10T17:01:14.991+0800 F EXECUTOR [replication-82] Exception escaped task in thread pool replication 2018-05-10T17:01:14.991+0800 F - [replication-82] terminate() called. An exception is active; attempting to gather more information 2018-05-10T17:01:15.008+0800 F - [replication-82] DBException::toString(): 2 source in remote command request cannot be empty Actual exception type: mongo::UserException 0x55d8112ca811 0x55d8112ca0d5 0x55d811d102a6 0x55d811d102f1 0x55d811249418 0x55d811249dd0 0x55d81124a979 0x55d811d2b040 0x7f04a287ce25 0x7f04a25aa34d ----- BEGIN BACKTRACE ----- {"backtrace":[{"b":"55D80FE7F000","o":"144B811","s":"_ZN5mongo15printStackTraceERSo"},{"b":"55D80FE7F000","o":"144B0D5"},{"b":"55D80FE7F000","o":"1E912A6","s":"_ZN10__cxxabiv111__terminateEPFvvE"},{"b":"55D80FE7F000","o":"1E912F1"},{"b":"55D80FE7F000","o":"13CA418","s":"_ZN5mongo10ThreadPool10_doOneTaskEPSt11unique_lockISt5mutexE"},{"b":"55D80FE7F000","o":"13CADD0","s":"_ZN5mongo10ThreadPool13_consumeTasksEv"},{"b":"55D80FE7F000","o":"13CB979","s":"_ZN5mongo10ThreadPool17_workerThreadBodyEPS0_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"},{"b":"55D80FE7F000","o":"1EAC040","s":"execute_native_thread_routine"},{"b":"7F04A2875000","o":"7E25"},{"b":"7F04A24B2000","o":"F834D","s":"clone"}],"processInfo":{ "mongodbVersion" : "3.3.10", "gitVersion" : "4d826acb5648a78d0af0fefac5abe6fbbe7c854a", "compiledModules" : [], "uname" : { "sysname" : "Linux", "release" : "3.10.0-693.17.1.el7.x86_64", "version" : "#1 SMP Thu Jan 25 20:13:58 UTC 2018", "machine" : "x86_64" }, "somap" : [ { "b" : "55D80FE7F000", "elfType" : 3, "buildId" : "76E66D90C81BC61AF236A1AF6A6F753332397346" }, { "b" : "7FFE6A6DB000", "elfType" : 3, "buildId" : "47E1DE363A68C3E5970550C87DAFA3CCF9713953" }, { "b" : "7F04A3816000", "path" : "/lib64/libssl.so.10", "elfType" : 3, "buildId" : "ED0AC7DEB91A242C194B3DEF27A215F41CE43116" }, { "b" : "7F04A33B5000", "path" : "/lib64/libcrypto.so.10", "elfType" : 3, "buildId" : "BC0AE9CA0705BEC1F0C0375AAD839843BB219CB1" }, { "b" : "7F04A31AD000", "path" : "/lib64/librt.so.1", "elfType" : 3, "buildId" : "6D322588B36D2617C03C0F3B93677E62FCFFDA81" }, { "b" : "7F04A2FA9000", "path" : "/lib64/libdl.so.2", "elfType" : 3, "buildId" : "1E42EBFB272D37B726F457D6FE3C33D2B094BB69" }, { "b" : "7F04A2CA7000", "path" : "/lib64/libm.so.6", "elfType" : 3, "buildId" : "808BD35686C193F218A5AAAC6194C49214CFF379" }, { "b" : "7F04A2A91000", "path" : "/lib64/libgcc_s.so.1", "elfType" : 3, "buildId" : "3E85E6D20D2CE9CDAD535084BEA56620BAAD687C" }, { "b" : "7F04A2875000", "path" : "/lib64/libpthread.so.0", "elfType" : 3, "buildId" : "A48D21B2578A8381FBD8857802EAA660504248DC" }, { "b" : "7F04A24B2000", "path" : "/lib64/libc.so.6", "elfType" : 3, "buildId" : "95FF02A4BEBABC573C7827A66D447F7BABDDAA44" }, { "b" : "7F04A3A88000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "22FA66DA7D14C88BF36C69454A357E5F1DEFAE4E" }, { "b" : "7F04A2265000", "path" : "/lib64/libgssapi_krb5.so.2", "elfType" : 3, "buildId" : "DA322D74F55A0C4293085371A8D0E94B5962F5E7" }, { "b" : "7F04A1F7D000", "path" : "/lib64/libkrb5.so.3", "elfType" : 3, "buildId" : "B69E63024D408E400401EEA6815317BDA38FB7C2" }, { "b" : "7F04A1D79000", "path" : "/lib64/libcom_err.so.2", "elfType" : 3, "buildId" : "A3832734347DCA522438308C9F08F45524C65C9B" }, { "b" : "7F04A1B46000", "path" : "/lib64/libk5crypto.so.3", "elfType" : 3, "buildId" : "A48639BF901DB554479BFAD114CB354CF63D7D6E" }, { "b" : "7F04A1930000", "path" : "/lib64/libz.so.1", "elfType" : 3, "buildId" : "EA8E45DC8E395CC5E26890470112D97A1F1E0B65" }, { "b" : "7F04A1722000", "path" : "/lib64/libkrb5support.so.0", "elfType" : 3, "buildId" : "6FDF5B013FD2739D304CFB9D723DCBC149EE03C9" }, { "b" : "7F04A151E000", "path" : "/lib64/libkeyutils.so.1", "elfType" : 3, "buildId" : "2E01D5AC08C1280D013AAB96B292AC58BC30A263" }, { "b" : "7F04A1304000", "path" : "/lib64/libresolv.so.2", "elfType" : 3, "buildId" : "FF4E72F4E574E143330FB3C66DB51613B0EC65EA" }, { "b" : "7F04A10DD000", "path" : "/lib64/libselinux.so.1", "elfType" : 3, "buildId" : "A88379F56A51950A33198890D37F5F8AEE71F8B4" }, { "b" : "7F04A0E7B000", "path" : "/lib64/libpcre.so.1", "elfType" : 3, "buildId" : "9CA3D11F018BEEB719CDB34BE800BF1641350D0A" } ] }} mongod(_ZN5mongo15printStackTraceERSo+0x41) [0x55d8112ca811] mongod(+0x144B0D5) [0x55d8112ca0d5] mongod(_ZN10__cxxabiv111__terminateEPFvvE+0x6) [0x55d811d102a6] mongod(+0x1E912F1) [0x55d811d102f1] mongod(_ZN5mongo10ThreadPool10_doOneTaskEPSt11unique_lockISt5mutexE+0x3C8) [0x55d811249418] mongod(_ZN5mongo10ThreadPool13_consumeTasksEv+0xC0) [0x55d811249dd0] mongod(_ZN5mongo10ThreadPool17_workerThreadBodyEPS0_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x149) [0x55d81124a979] mongod(execute_native_thread_routine+0x20) [0x55d811d2b040] libpthread.so.0(+0x7E25) [0x7f04a287ce25] libc.so.6(clone+0x6D) [0x7f04a25aa34d] ----- END BACKTRACE ----- Actual exception type: mongo::UserException at last the mongo process has been shutdown,when i start it again,it's will delete the data and start sync again here is the full output of rs.status() { "set": "rs0", "date": ISODate("2018-05-11T00:53:19.313Z"), "myState": 1, "term": NumberLong("3"), "heartbeatIntervalMillis": NumberLong("2000"), "optimes": { "lastCommittedOpTime": { "ts": Timestamp(1525999999, 6), "t": NumberLong("3") }, "appliedOpTime": { "ts": Timestamp(1525999999, 6), "t": NumberLong("3") }, "durableOpTime": { "ts": Timestamp(1525999999, 5), "t": NumberLong("3") } }, "members": [ { "_id": 0, "name": "172.16.30.223:27017", "health": 1, "state": 1, "stateStr": "PRIMARY", "uptime": 20032944, "optime": { "ts": Timestamp(1525999999, 6), "t": NumberLong("3") }, "optimeDate": ISODate("2018-05-11T00:53:19Z"), "electionTime": Timestamp(1505967067, 1), "electionDate": ISODate("2017-09-21T04:11:07Z"), "configVersion": 9, "self": true }, { "_id": 1, "name": "172.16.30.224:27017", "health": 1, "state": 2, "stateStr": "SECONDARY", "uptime": 9748305, "optime": { "ts": Timestamp(1525999998, 23), "t": NumberLong("3") }, "optimeDurable": { "ts": Timestamp(1525999998, 23), "t": NumberLong("3") }, "optimeDate": ISODate("2018-05-11T00:53:18Z"), "optimeDurableDate": ISODate("2018-05-11T00:53:18Z"), "lastHeartbeat": ISODate("2018-05-11T00:53:18.751Z"), "lastHeartbeatRecv": ISODate("2018-05-11T00:53:18.697Z"), "pingMs": NumberLong("0"), "syncingTo": "172.16.30.223:27017", "configVersion": 9 }, { "_id": 2, "name": "172.16.30.225:27017", "health": 1, "state": 2, "stateStr": "SECONDARY", "uptime": 20032938, "optime": { "ts": Timestamp(1525999998, 21), "t": NumberLong("3") }, "optimeDurable": { "ts": Timestamp(1525999998, 21), "t": NumberLong("3") }, "optimeDate": ISODate("2018-05-11T00:53:18Z"), "optimeDurableDate": ISODate("2018-05-11T00:53:18Z"), "lastHeartbeat": ISODate("2018-05-11T00:53:18.751Z"), "lastHeartbeatRecv": ISODate("2018-05-11T00:53:19.029Z"), "pingMs": NumberLong("0"), "syncingTo": "172.16.30.223:27017", "configVersion": 9 }, { "_id": 3, "name": "172.16.30.123:27017", "health": 0, "state": 8, "stateStr": "(not reachable/healthy)", "uptime": 0, "optime": { "ts": Timestamp(0, 0), "t": NumberLong("-1") }, "optimeDurable": { "ts": Timestamp(0, 0), "t": NumberLong("-1") }, "optimeDate": ISODate("1970-01-01T00:00:00Z"), "optimeDurableDate": ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat": ISODate("2018-05-11T00:53:17.635Z"), "lastHeartbeatRecv": ISODate("2018-05-10T15:24:23.323Z"), "pingMs": NumberLong("0"), "lastHeartbeatMessage": "Connection refused", "configVersion": -1 }, { "_id": 4, "name": "172.16.30.127:27017", "health": 0, "state": 8, "stateStr": "(not reachable/healthy)", "uptime": 0, "optime": { "ts": Timestamp(0, 0), "t": NumberLong("-1") }, "optimeDurable": { "ts": Timestamp(0, 0), "t": NumberLong("-1") }, "optimeDate": ISODate("1970-01-01T00:00:00Z"), "optimeDurableDate": ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat": ISODate("2018-05-11T00:53:17.454Z"), "lastHeartbeatRecv": ISODate("2018-05-10T09:01:26.996Z"), "pingMs": NumberLong("0"), "lastHeartbeatMessage": "Connection refused", "configVersion": -1 } ], "ok": 1 and rs.conf(), the status was two node finished sync , and two node's mongodb process was been done, the primary's status show the informations like this.Normaly the status should STARTUP2 became SECONDARY. { "_id": "rs0", "version": 9, "protocolVersion": NumberLong("1"), "members": [ { "_id": 0, "host": "172.16.30.223:27017", "arbiterOnly": false, "buildIndexes": true, "hidden": false, "priority": 1, "tags": { }, "slaveDelay": NumberLong("0"), "votes": 1 }, { "_id": 1, "host": "172.16.30.224:27017", "arbiterOnly": false, "buildIndexes": true, "hidden": false, "priority": 1, "tags": { }, "slaveDelay": NumberLong("0"), "votes": 1 }, { "_id": 2, "host": "172.16.30.225:27017", "arbiterOnly": false, "buildIndexes": true, "hidden": false, "priority": 1, "tags": { }, "slaveDelay": NumberLong("0"), "votes": 1 }, { "_id": 3, "host": "172.16.30.123:27017", "arbiterOnly": false, "buildIndexes": true, "hidden": false, "priority": 0, "tags": { }, "slaveDelay": NumberLong("0"), "votes": 0 }, { "_id": 4, "host": "172.16.30.127:27017", "arbiterOnly": false, "buildIndexes": true, "hidden": false, "priority": 0, "tags": { }, "slaveDelay": NumberLong("0"), "votes": 0 } ], "settings": { "chainingAllowed": true, "heartbeatIntervalMillis": 2000, "heartbeatTimeoutSecs": 10, "electionTimeoutMillis": 10000, "getLastErrorModes": { }, "getLastErrorDefaults": { "w": 1, "wtimeout": 0 }, "replicaSetId": ObjectId("5994eb51712e4cd82e549341") } } here is the rs.printSlaveReplicationInfo() localhost(mongod-3.3.10)[PRIMARY:rs0] admin> rs.printSlaveReplicationInfo() source: 172.16.30.224:27017 syncedTo: Fri May 11 2018 09:37:13 GMT+0800 (CST) 1 secs (0 hrs) behind the primary source: 172.16.30.225:27017 syncedTo: Fri May 11 2018 09:37:13 GMT+0800 (CST) 1 secs (0 hrs) behind the primary source: 172.16.30.123:27017 syncedTo: Thu Jan 01 1970 08:00:00 GMT+0800 (CST) 1526002634 secs (423889.62 hrs) behind the primary source: 172.16.30.127:27017 syncedTo: Thu Jan 01 1970 08:00:00 GMT+0800 (CST) 1526002634 secs (423889.62 hrs) behind the primary localhost(mongod-3.3.10)[PRIMARY:rs0] admin> the oplog information localhost(mongod-3.3.10)[PRIMARY:rs0] local> show tables me → 0.000MB / 0.016MB oplog.rs → 9319.549MB / 3418.199MB replset.election → 0.000MB / 0.035MB startup_log → 0.009MB / 0.035MB system.replset → 0.001MB / 0.035MB localhost(mongod-3.3.10)[PRIMARY:rs0] local>
Asked by Tony Damon (1 rep)
May 10, 2018, 01:02 PM
Last activity: Jun 13, 2025, 11:05 AM