1.版本选择
选择了目前最新的版本,v6.5.2
2.架构说明
DR Auto-Sync 是一种跨同城两中心(网络延迟<1.5ms,带宽>10Gbps)部署的单一集群方案,即两个数据中心只部署一个 TiDB 集群,两中心间的数据复制通过集群自身的 Raft 机制完成。两中心可同时对外进行读写服务,任一中心发生故障不影响数据一致性。
DR Auto-Sync 双活模式部署拓扑如下(示例),需要开启 TiDB 的两个特殊功能:
Placement-Rules ,用以设定每个 TiKV 的角色
Voter - 该 TiKV 上的 replica 可投票、可被选为 leader
Follower - 该 TiKV 上的 replica 可投票,不可被选为 leader
Learner - 该 TiKV 上的 replica 只异步接收日志,不参与投票
3.Tikv Label设计
4.IP规划
No. | 角色 | IP |
1 | tidb_server01 | 10.2.83.133 |
2 | tidb_server02 | 10.2.83.135 |
3 | tidb-pd-01 | 10.2.83.138 |
4 | tidb-pd-02 | 10.2.83.136 |
5 | tidb-pd-03 | 10.2.83.145 |
6 | tiflash-01 | 10.2.83.148 |
7 | tiflash-02 | 10.2.83.149 |
8 | tikv-01 | 10.2.83.150 |
9 | tikv-02 | 10.2.83.146 |
10 | tikv-03 | 10.2.83.142 |
11 | tikv-04 | 10.2.83.151 |
12 | monitor | 10.2.83.127 |
5.安装集群
安装拓扑文件准备:
global:
user: "tidb"
ssh_port: 22
deploy_dir: "/data/tidb_cluster/tidb-deploy"
data_dir: "/data/tidb_cluster/tidb-data"
monitored:
node_exporter_port: 9100
blackbox_exporter_port: 9115
deploy_dir: "/data/tidb/monitored/monitored-9100"
data_dir: "/data/tidb/monitored/monitored-9100/data"
log_dir: "/data/tidb/monitored/monitored-9100/log"
server_configs:
pd:
replication.location-labels: ["az","rack","host"]
pd_servers:
- host: 10.2.83.138
name: "tidb-pd-01"
ssh_port: 22
client_port: 2379
peer_port: 2380
deploy_dir: "/data/tidb/deploy/pd-2379"
data_dir: "/data/tidb/data/pd-2379"
log_dir: "/data/tidb/deploy/pd-2379/log"
- host: 10.2.83.136
name: "tidb-pd-02"
ssh_port: 22
client_port: 2379
peer_port: 2380
deploy_dir: "/data/tidb/deploy/pd-2379"
data_dir: "/data/tidb/data/pd-2379"
log_dir: "/data/tidb/deploy/pd-2379/log"
- host: 10.2.83.145
name: "tidb-pd-03"
ssh_port: 22
client_port: 2379
peer_port: 2380
deploy_dir: "/data/tidb/deploy/pd-2379"
data_dir: "/data/tidb/data/pd-2379"
log_dir: "/data/tidb/deploy/pd-2379/log"
tidb_servers:
- host: 10.2.83.133
ssh_port: 22
port: 4000
status_port: 10080
deploy_dir: "/data/tidb/deploy/tidb-4000"
log_dir: "/data/tidb/deploy/tidb-4000/log"
- host: 10.2.83.135
ssh_port: 22
port: 4000
status_port: 10080
deploy_dir: "/data/tidb/deploy/tidb-4000"
log_dir: "/data/tidb/deploy/tidb-4000/log"
tikv_servers:
- host: 10.2.83.150
ssh_port: 22
port: 20160
status_port: 20180
deploy_dir: "/data/tidb/deploy/tikv-20160"
data_dir: "/data/tidb/data/tikv-20160"
log_dir: "/data/tidb/deploy/tikv-20160/log"
config:
server.labels: { az: "flexium", rack: "flexium-1", host: "150" }
- host: 10.2.83.146
ssh_port: 22
port: 20160
status_port: 20180
deploy_dir: "/data/tidb/deploy/tikv-20160"
data_dir: "/data/tidb/data/tikv-20160"
log_dir: "/data/tidb/deploy/tikv-20160/log"
config:
server.labels: { az: "flexium", rack: "flexium-2", host: "146" }
- host: 10.2.83.142
ssh_port: 22
port: 20160
status_port: 20180
deploy_dir: "/data/tidb/deploy/tikv-20160"
data_dir: "/data/tidb/data/tikv-20160"
log_dir: "/data/tidb/deploy/tikv-20160/log"
config:
server.labels: { az: "junkun", rack: "junkun-1", host: "142" }
- host: 10.2.83.151
ssh_port: 22
port: 20160
status_port: 20180
deploy_dir: "/data/tidb/deploy/tikv-20160"
data_dir: "/data/tidb/data/tikv-20160"
log_dir: "/data/tidb/deploy/tikv-20160/log"
config:
server.labels: { az: "junkun", rack: "junkun-2", host: "151" }
tiflash_servers:
- host: 10.2.83.148
ssh_port: 22
tcp_port: 9000
http_port: 8123
flash_service_port: 3930
flash_proxy_port: 20170
flash_proxy_status_port: 20292
metrics_port: 8234
deploy_dir: "/data/tidb/deploy/tiflash-9000"
data_dir: "/data/tidb/tiflash-9000"
log_dir: "/data/tidb/deploy/tiflash-9000/log"
- host: 10.2.83.149
ssh_port: 22
tcp_port: 9000
http_port: 8123
flash_service_port: 3930
flash_proxy_port: 20170
flash_proxy_status_port: 20292
metrics_port: 8234
deploy_dir: "/data/tidb/deploy/tiflash-9000"
data_dir: "/data/tidb/tiflash-9000"
log_dir: "/data/tidb/deploy/tiflash-9000/log"
monitoring_servers:
- host: 10.2.83.127
ssh_port: 22
port: 9090
deploy_dir: "/data/tidb/deploy/prometheus-8249"
data_dir: "/data/tidb/data/prometheus-8249"
log_dir: "/data/tidb/deploy/prometheus-8249/log"
grafana_servers:
- host: 10.2.83.127
port: 3000
deploy_dir: /data/tidb/deploy/grafana-3000
alertmanager_servers:
- host: 10.2.83.127
ssh_port: 22
web_port: 9093
cluster_port: 9094
deploy_dir: "/data/tidb/deploy/alertmanager-9093"
data_dir: "/data/tidb/data/alertmanager-9093"
log_dir: "/data/tidb/deploy/alertmanager-9093/log"
6.Placement Rules 规划
cat rule.json
[
{
"group_id": "pd",
"group_index": 0,
"group_override": false,
"rules": [
{
"group_id": "pd",
"id": "az-flexium",
"start_key": "",
"end_key": "",
"role": "voter",
"count": 2,
"location_labels": ["az","rack","host"],
"label_constraints": [{"key": "az", "op": "in", "values": ["flexium"]}]
},
{
"group_id": "pd",
"id": "rack-flexium",
"start_key": "",
"end_key": "",
"role": "voter",
"count": 1,
"location_labels": ["az","rack","host"],
"label_constraints": [{"key": "rack", "op": "in", "values": ["junkun-1"]}]
},
{
"group_id": "pd",
"id": "rack-junkun",
"start_key": "",
"end_key": "",
"role": "learner",
"count": 1,
"location_labels": ["az","rack","host"],
"label_constraints": [{"key": "rack", "op": "in", "values": ["junkun-2"]}]
}
]
}
]
如果需要使用 rule.json 中的配置,可以使用以下命令把原有的配置备份到 default.json 文件,再使用 rule.json 中的配置覆盖原有配置:
1.备份:pd-ctl config placement-rules rule-bundle load --out="default.json"
2.导入:pd-ctl config placement-rules rule-bundle save --in="rule.json"
显示success即为成功
3.检查配置是否加载:
pd-ctl config placement-rules show
7.配置 DR Auto-Sync
增加 DR Auto-Sync 配置
pd-ctl config set replication-mode dr-auto-sync
pd-ctl config set replication-mode dr-auto-sync label-key az
pd-ctl config set replication-mode dr-auto-sync primary flexium
pd-ctl config set replication-mode dr-auto-sync dr junkun
pd-ctl config set replication-mode dr-auto-sync primary-replicas 2
pd-ctl config set replication-mode dr-auto-sync dr-replicas 1
pd-ctl config set replication-mode dr-auto-sync wait-store-timeout 1m
检查配置是否生效
pd-ctl config show replication-mode
8.切换方案模拟
8.1 在同城灾备增加一个pd节点,不会重启 tidb,tikv 和 pd。因此不会造成集群服务闪断或中止。
scale_out_pd.yaml
pd_servers:
- host: 10.2.83.145
ssh_port: 22
name: pd-145-2379
client_port: 2381
peer_port: 2382
deploy_dir: /data/tidb/deploy/pd-2381
data_dir: /data/tidb/data/pd-2381
扩容:
tiup cluster scale-out tidb_test scale_out_pd.yaml
缩容:
tiup cluster scale-in tidb_test -N 10.2.83.145:2381