cilium 命令安装

1
2
3
4
5
6
7
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
CLI_ARCH=amd64
if [ "$(uname -m)" = "aarch64" ]; then CLI_ARCH=arm64; fi
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum
sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin
rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}

cilium 安装 helm 参数说明

  • version v1.15.0-rc.0 版本 查看版本https://helm.cilium.io/
  • k8sServiceHost master vip 地址
  • k8sServicePort master vip 端口
  • ipv4NativeRoutingCIDR 可指定集群任意节点ip 或者 pod cidr地址
  • ipam.mode 默认cluster-pool 参数 kubernetes 从 k8s v1.Node 对象的 podCIDR 字段读取可用 IP 池 alibabacloud, azure, eni 各大公有云自己定制的 ipam 插件
  • ipam.operator.clusterPoolIPv4PodCIDRList 当然ipam.mode 配置为cluster-pool 参数生效 指定 POD cidr 地址
  • l2podAnnouncements.interface 指定使用网卡
  • bandwidthManager.bbr 内核大于5.5才能使用
  • 其它参数 请参考 https://github.com/cilium/cilium/tree/main/install/kubernetes/cilium

helm cilium 安装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# 添加 cilium helm源
helm repo add cilium https://helm.cilium.io/
# 更新添加helm源
helm repo update
# 安装 cilium
helm install cilium cilium/cilium --version 1.15.0-rc.0 \
--namespace=kube-system \
--set k8sServiceHost=127.0.0.1 \
--set k8sServicePort=6443 \
--set nodeinit.enabled=true \
--set routingMode=native \
--set tunnel=disabled \
--set rollOutCiliumPods=true \
--set bpf.masquerade=true \
--set bpfClockProbe=true \
--set bpf.preallocateMaps=true \
--set bpf.tproxy=true \
--set bpf.hostLegacyRouting=false \
--set autoDirectNodeRoutes=true \
--set localRedirectPolicy=true \
--set enableCiliumEndpointSlice=true \
--set enableK8sEventHandover=true \
--set externalIPs.enabled=true \
--set hostPort.enabled=true \
--set socketLB.enabled=true \
--set nodePort.enabled=true \
--set sessionAffinity=true \
--set annotateK8sNode=true \
--set nat46x64Gateway.enabled=false \
--set ipv6.enabled=false \
--set pmtuDiscovery.enabled=true \
--set enableIPv6BIGTCP=false \
--set sctp.enabled=true \
--set wellKnownIdentities.enabled=true \
--set hubble.enabled=false \
--set ipv4NativeRoutingCIDR=10.80.0.0/12 \
--set ipam.mode=kubernetes \
--set ipam.operator.clusterPoolIPv4PodCIDRList[0]="10.80.0.0/12" \
--set installNoConntrackIptablesRules=true \
--set enableIPv4BIGTCP=true \
--set egressGateway.enabled=false \
--set endpointRoutes.enabled=false \
--set kubeProxyReplacement=true \
--set highScaleIPcache.enabled=false \
--set l2announcements.enabled=true \
--set k8sClientRateLimit.qps=30 \
--set k8sClientRateLimit.burst=40 \
--set l2podAnnouncements.interface=eth0 \
--set l2announcements.leaseDuration=3s \
--set l2announcements.leaseRenewDeadline=1s \
--set l2announcements.leaseRetryPeriod=200ms \
--set image.useDigest=false \
--set operator.image.useDigest=false \
--set operator.rollOutPods=true \
--set authentication.enabled=false \
--set bandwidthManager.enabled=true \
--set bandwidthManager.bbr=true

查看 安装状态

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
cilium status
root@Qist:/tmp# cilium status
/¯¯\
/¯¯\__/¯¯\ Cilium: OK
\__/¯¯\__/ Operator: OK
/¯¯\__/¯¯\ Envoy DaemonSet: disabled (using embedded mode)
\__/¯¯\__/ Hubble Relay: disabled
\__/ ClusterMesh: disabled

DaemonSet cilium Desired: 7, Ready: 7/7, Available: 7/7
Deployment cilium-operator Desired: 2, Ready: 2/2, Available: 2/2
Containers: cilium Running: 7
cilium-operator Running: 2
Cluster Pods: 26/26 managed by Cilium
Helm chart version: 1.15.0-rc.0
Image versions cilium quay.io/cilium/cilium:v1.15.0-rc.0: 7
cilium-operator quay.io/cilium/operator-generic:v1.15.0-rc.0: 2

测试集群是否正常

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
cat <<EOF | kubectl create -f -
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: net-tools
labels:
k8s-app: net-tools
spec:
selector:
matchLabels:
k8s-app: net-tools
template:
metadata:
labels:
k8s-app: net-tools
spec:
tolerations:
- effect: NoSchedule
operator: Exists
- key: CriticalAddonsOnly
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- name: net-tools
image: juestnow/net-tools
command:
- /bin/sh
- "-c"
- set -e -x; tail -f /dev/null
resources:
limits:
memory: 30Mi
requests:
cpu: 50m
memory: 20Mi
dnsConfig:
options:
- name: single-request-reopen

EOF

root@Qist:/tmp# kubectl -n default get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
net-tools-29chz 1/1 Running 2 (5d18h ago) 14d 10.80.3.70 k8s-node-3 <none> <none>
net-tools-2ngh4 1/1 Running 2 (5d16h ago) 14d 10.80.1.74 k8s-master-3 <none> <none>
net-tools-7lsf2 1/1 Running 2 (5d16h ago) 14d 10.80.2.20 k8s-master-2 <none> <none>
net-tools-lpnfk 1/1 Running 2 (5d16h ago) 14d 10.80.6.251 k8s-node-1 <none> <none>
net-tools-p4bbq 1/1 Running 2 (5d16h ago) 14d 10.80.0.63 k8s-master-1 <none> <none>
net-tools-sdkhr 1/1 Running 2 (5d18h ago) 14d 10.80.5.232 k8s-node-4 <none> <none>
net-tools-sgjm2 1/1 Running 2 (5d16h ago) 14d 10.80.4.229 k8s-node-2 <none> <none>
# 进入 任意pod 测试网络是否联通
kubectl -n default exec -ti net-tools-29chz /bin/sh
/ # ping 10.80.1.74
PING 10.80.1.74 (10.80.1.74): 56 data bytes
64 bytes from 10.80.1.74: seq=0 ttl=62 time=1.399 ms
--- 10.80.1.74 ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max = 1.399/1.399/1.399 ms

/ # dig www.qq.com

; <<>> DiG 9.14.8 <<>> www.qq.com
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 8279
;; flags: qr rd ra; QUERY: 1, ANSWER: 3, AUTHORITY: 0, ADDITIONAL: 1

;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 4096
; COOKIE: e8e82d67cca18f90 (echoed)
;; QUESTION SECTION:
;www.qq.com. IN A

;; ANSWER SECTION:
www.qq.com. 30 IN CNAME ins-r23tsuuf.ias.tencent-cloud.net.
ins-r23tsuuf.ias.tencent-cloud.net. 30 IN A 121.14.77.221
ins-r23tsuuf.ias.tencent-cloud.net. 30 IN A 121.14.77.201

;; Query time: 30 msec
;; SERVER: 10.66.0.2#53(10.66.0.2)
;; WHEN: Wed Jan 17 02:21:42 UTC 2024
;; MSG SIZE rcvd: 209

/ # ping www.baidu.com -c3
PING www.baidu.com (183.2.172.42): 56 data bytes
64 bytes from 183.2.172.42: seq=0 ttl=50 time=7.681 ms
64 bytes from 183.2.172.42: seq=1 ttl=50 time=7.655 ms
64 bytes from 183.2.172.42: seq=2 ttl=50 time=7.747 ms

--- www.baidu.com ping statistics ---
3 packets transmitted, 3 packets received, 0% packet loss
round-trip min/avg/max = 7.655/7.694/7.747 ms
# 测试网络正常

cert-manager 是一种自动执行证书管理的工具

官网:https://cert-manager.io/

安装cert-manager:

1
2
3
4
5
6
7
8
9
10
helm repo add jetstack https://charts.jetstack.io
helm repo update
# crds 安装
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.3/cert-manager.crds.yaml
# 安装 cert-manager
helm install \
cert-manager jetstack/cert-manager \
--namespace cert-manager \
--create-namespace \
--version v1.13.3

创建自签CA

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
cat <<EOF | kubectl create -f -
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: selfsigned-issuer
spec:
selfSigned: {}
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: selfsigned-ca
namespace: cert-manager
spec:
isCA: true
commonName: selfsigned-ca
secretName: root-secret
privateKey:
algorithm: ECDSA
size: 256
issuerRef:
name: selfsigned-issuer
kind: ClusterIssuer
group: cert-manager.io
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: ca-issuer
spec:
ca:
secretName: root-secret
EOF

说明:

issuer与clusterissuer两个签发资源,issuer只能在同一命名空间内签发证书,clusterissuer可以在所有命名空间内签发证书。如果是issuer,则证书secret所属的namspace应与issuer一致;如果是clusterissuer,则证书所属的namespace应与cert-manager安装的namespace一致。

上面用的是cert-manager的自签证书做为CA,也可以自已定义个CA放在secret里,然后做为clusterissuer来进行后续的签发。

应用后使用如下命令查看clusterissuer与certificate:

kubectl get clusterissuer

kubectl get certificate -A

状态READY为true说明签发正常,否则可以使用describe查看错误原因。

测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
cat <<EOF | kubectl create -f -
apiVersion: v1
kind: Service
metadata:
labels:
app: nginx
name: nginx-test
spec:
ports:
- name: http
protocol: TCP
port: 80
targetPort: 80
selector:
app: nginx
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-test
labels:
app: nginx
spec:
replicas: 2
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx:1.14.2
ports:
- containerPort: 80
EOF

# 新建ingress
cat <<EOF | kubectl apply -f -
kind: Ingress
apiVersion: networking.k8s.io/v1
metadata:
name: nginx-test
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
cert-manager.io/cluster-issuer: ca-issuer
spec:
ingressClassName: nginx
tls:
- hosts:
- www.test.com
secretName: test-tls
rules:
- host: www.test.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: nginx-test
port:
name: http
EOF

在注解中定义cert-manager.io/cluster-issuer,并指定clusterissuer的名称;
如为issuer则使用cert-manager.io/issuer注解。
spec.tls.hosts.secretName定义secret的名称,自动签发的证书会写在这个secret里。

应用后,会发现新生成secret:

1
2
3
root@Qist:~# kubectl get secrets
NAME TYPE DATA AGE
test-tls kubernetes.io/tls 3 84s

手动签发certificate,ingress直接使用这个secret(关闭注解)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
cat <<EOF | kubectl apply -f -
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: test-tls
spec:
dnsNames:
- www.test.com
issuerRef:
group: cert-manager.io
kind: ClusterIssuer
name: ca-issuer
secretName: test-tls
duration: 87600h #10年
usages:
- digital signature
- key encipherment
EOF

具体参数:https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec

卸载cert-manager

1
2
3
4
5
6
7
8
9
10
# 查看 crds
kubectl get Issuers,ClusterIssuers,Certificates,CertificateRequests,Orders,Challenges --all-namespaces
# 卸载 cert-manager
helm --namespace cert-manager delete cert-manager
# 删除命名空间
kubectl delete namespace cert-manager
# vX.Y.Z 改成集群对应版本号 删除crds
kubectl delete -f https://github.com/cert-manager/cert-manager/releases/download/vX.Y.Z/cert-manager.crds.yaml
# 删除 webhook
kubectl delete apiservice v1beta1.webhook.cert-manager.io

环境说明:

#操作系统: Rocky Linux release 9.3
#containerd版本:1.6.26
#kubernetes版本:v1.28.2
#K8S master 节点IP:192.168.2.175
#K8S master 节点IP:192.168.2.176
#K8S master 节点IP:192.168.2.177
#K8S worker节点IP:192.168.2.185
#K8S worker节点IP:192.168.2.187
#K8S worker节点IP:192.168.3.62
#K8S worker节点IP:192.168.3.70
#VIP 192.168.3.251
#网络插件:flannel
#kube-proxy网络转发: ipvs
#kubernetes源: 阿里云镜像站
#service-cidr:10.96.0.0/16
#pod-network-cidr:10.244.0.0/16

  • 没做特殊说明就是在所有节点进行操作

部署准备:

操作在所有节点进行

1、修改内核参数

1
2
3
4
5
6
7
vim /etc/sysctl.conf
vm.swappiness=0
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables=1
sysctl -p

2、关闭swap

1
2
3
4
swapoff -a && sysctl -w vm.swappiness=0
修改 fstab 不在挂载 swap
vi /etc/fstab
/dev/mapper/centos-swap swap swap defaults 0 0

3、将 SELinux 设置为 disabled 模式

1
2
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config

4、cgroup2 开启(经常升级内核不建议执行不然会出现升级后不能启动,只能用旧内核启动的问题)

1
2
3
grubby \
--update-kernel=ALL \
--args="systemd.unified_cgroup_hierarchy=1"

5、内核模块加载

1
2
3
4
5
6
7
8
9
10
11
12
cat <<EOF | tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
nf_conntrack
EOF

cat <<EOF | tee /etc/modules-load.d/ipvs.conf
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
EOF

6、重启系统使上面配置生效

reboot

7、安装依赖

1
dnf install -y   dnf-utils  ipvsadm  telnet  wget  net-tools  conntrack  ipset  jq  iptables  curl  sysstat  libseccomp  socat  nfs-utils  fuse  fuse-devel 

安装 containerd

1、导入containerd源

1
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo

2、安装containerd

1
yum install containerd.io

containerd 配置

1、配置 containerd

1
2
3
4
5
6
7
8
9
10
11
 #生成默认配置
containerd config default > /etc/containerd/config.toml
#修改配置
sandbox_image = "registry.k8s.io/pause:3.6" 改成国内地址
sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.9"
Updated config for group driver changed..

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
...
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = true

2、 创建存储挂在数据

1
2
3
4
5
6
7
8
9
10
11
12
13
mkdir -p /var/lib/containerd/
mkdir -p /apps/containerd/ # 改成你大硬盘路径

/etc/fstab

echo "/apps/containerd /var/lib/containerd none defaults,bind,nofail 0 0" >>/etc/fstab

systemctl daemon-reload
# 挂在
mount -a
# 查看是否挂在
[root@k8s-master-1 containerd]# mount | grep containerd
/dev/vda3 on /var/lib/containerd type xfs (rw,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)

3 开启开机启动并启动

1
systemctl enable containerd.service --now

4、查看进程是否启动

1
systemctl status containerd.service 

5、 查看数据盘是否有文件有证明挂在正确

ll /apps/containerd/

6、 创建crictl 配置

1
2
3
4
5
6
7
8
cat <<EOF | tee /etc/crictl.yaml
runtime-endpoint: "unix:///var/run/containerd/containerd.sock"
image-endpoint: "unix:///var/run/containerd/containerd.sock"
timeout: 10
debug: false
pull-image-on-create: true
disable-pull-on-run: false
EOF

8、查看配置是否生效

1
2
3
4
[root@k8s-master-1 containerd]# crictl info|  grep sandboxImage
"sandboxImage": "registry.aliyuncs.com/google_containers/pause:3.6",
[root@k8s-master-1 containerd]# crictl info| grep SystemdCgroup
"SystemdCgroup": true

安装 kubelet kubeadm kubectl

1、 导入repo源

1
2
3
4
5
6
7
8
# 注意,这里就是用el7的源,google没有为rhel8、rhel9再单独打包
cat <<EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=kubernetes
baseurl=https://mirrors.tuna.tsinghua.edu.cn/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
EOF
  • kubelet所有节点都需要安装
  • kubectl可以安装在任意机器,只要能远程连接到k8s的节点即可
  • kubeadm所有节点都需要安装
1
2
3
4
5
6
7
8
# 安装yum源中最新版本
# yum install -y kubelet kubeadm kubectl

# 查看当前yum源有哪些kubelet版本
# yum list kubelet kubeadm kubectl --showduplicates

# yum 安装指定1.28.2版本
yum install -y kubelet-1.28.2-0 kubeadm-1.28.2-0 kubectl-1.28.2-0

配置kubelet

1、 创建kubelet 存储挂在

1
2
3
4
5
6
7
8
9
mkdir /var/lib/kubelet
mkdir /apps/kubelet
/etc/fstab

echo "/apps/kubelet /var/lib/kubelet none defaults,bind,nofail 0 0" >>/etc/fstab

systemctl daemon-reload
# 挂在
mount -a

2、 查看是否挂在

1
2
[root@k8s-master-1]# mount | grep kubelet
/dev/vda3 on /var/lib/kubelet type xfs (rw,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)

6、刷新 service

1
2
3
4
5
6
systemctl daemon-reload

# 设置kubelet 开机启动
systemctl enable kubelet.service
# 查看启动状态
systemctl status kubelet.service

7、创建lb master1 节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# 官方文档: https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md#kube-vip
# 可使用镜像 juestnow/kube-vip:v0.6.4
# KVVERSION=$(curl -sL https://api.github.com/repos/kube-vip/kube-vip/releases | jq -r ".[0].name")
export KVVERSION='v0.6.4'
export VIP=192.168.3.251
export INTERFACE='eth0'
# 简化命令,将命令设置为别名
alias kube-vip="ctr run --rm --net-host docker.io/juestnow/kube-vip:$KVVERSION vip /kube-vip"

# 下载镜像
ctr images pull docker.io/juestnow/kube-vip:$KVVERSION

# 执行命令创建yaml
kube-vip manifest pod \
--interface $INTERFACE \
--vip $VIP \
--controlplane \
--arp \
--leaderElection | tee /etc/kubernetes/manifests/kube-vip.yaml

# 修改镜像策略
sed -i 's/Always/IfNotPresent/g' /etc/kubernetes/manifests/kube-vip.yaml
sed -i "s#ghcr.io/kube-vip/kube-vip:v0.6.4#docker.io/juestnow/kube-vip:$KVVERSION#g" /etc/kubernetes/manifests/kube-vip.yaml

# 下载镜像
crictl pull docker.io/juestnow/kube-vip:$KVVERSION
# 修改后内容
cat /etc/kubernetes/manifests/kube-vip.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
name: kube-vip
namespace: kube-system
spec:
containers:
- args:
- manager
env:
- name: vip_arp
value: "true"
- name: port
value: "6443"
- name: vip_interface
value: eth0
- name: vip_cidr
value: "32"
- name: cp_enable
value: "true"
- name: cp_namespace
value: kube-system
- name: vip_ddns
value: "false"
- name: vip_leaderelection
value: "true"
- name: vip_leasename
value: plndr-cp-lock
- name: vip_leaseduration
value: "5"
- name: vip_renewdeadline
value: "3"
- name: vip_retryperiod
value: "1"
- name: vip_address
value: 192.168.3.251
- name: prometheus_server
value: :2112
image: docker.io/juestnow/kube-vip:v0.6.4
imagePullPolicy: IfNotPresent
name: kube-vip
resources: {}
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
volumeMounts:
- mountPath: /etc/kubernetes/admin.conf
name: kubeconfig
hostAliases:
- hostnames:
- kubernetes
ip: 127.0.0.1
hostNetwork: true
volumes:
- hostPath:
path: /etc/kubernetes/admin.conf
name: kubeconfig
status: {}

# 最后将该配置文件放到所有控制平面的/etc/kubernetes/manifests
scp -rp /etc/kubernetes/manifests/kube-vip.yaml root@192.168.2.176:/etc/kubernetes/manifests/
scp -rp /etc/kubernetes/manifests/kube-vip.yaml root@192.168.2.177:/etc/kubernetes/manifests/

初始化kubernetes master1 执行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
kubeadm init --apiserver-advertise-address=0.0.0.0 \
--apiserver-cert-extra-sans=127.0.0.1 \
--kubernetes-version 1.28.2 \
--image-repository=registry.aliyuncs.com/google_containers \
--ignore-preflight-errors=all \
--service-cidr=10.96.0.0/16 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=all \
--upload-certs \
--control-plane-endpoint=192.168.3.251 \
--cri-socket=unix:///var/run/containerd/containerd.sock

#初始化过程
[root@k8s-master-1 tmp]# kubeadm init --apiserver-advertise-address=0.0.0.0 \
--apiserver-cert-extra-sans=127.0.0.1 \
--kubernetes-version 1.28.2 \
--image-repository=registry.aliyuncs.com/google_containers \
--ignore-preflight-errors=all \
--service-cidr=10.96.0.0/16 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=all \
--upload-certs \
--control-plane-endpoint=192.168.3.251 \
--cri-socket=unix:///var/run/containerd/containerd.sock
[init] Using Kubernetes version: v1.28.2
[preflight] Running pre-flight checks
[WARNING Hostname]: hostname "k8s-master-1" could not be reached
[WARNING Hostname]: hostname "k8s-master-1": lookup k8s-master-1 on 192.168.2.84:53: no such host
[WARNING FileContent--proc-sys-net-bridge-bridge-nf-call-iptables]: /proc/sys/net/bridge/bridge-nf-call-iptables does not exist
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
W0111 19:59:33.239720 811848 checks.go:835] detected that the sandbox image "registry.aliyuncs.com/google_containers/pause:3.6" of the container runtime is inconsistent with that used by kubeadm. It is recommended that using "registry.aliyuncs.com/google_containers/pause:3.9" as the CRI sandbox image.
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local rocky] and IPs [10.96.0.1 192.168.2.175 192.168.3.251 127.0.0.1]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [localhost k8s-master-1] and IPs [192.168.2.175 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [localhost k8s-master-1] and IPs [192.168.2.175 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 7.563962 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[upload-certs] Using certificate key:
c6a80e1929786899137bb0a765323fa0cb7c14fb8c0bedb61a0eaf1583a13abd
[mark-control-plane] Marking the node rocky as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node rocky as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
[bootstrap-token] Using token: u9ryln.7f9t2ih8v1es5d79
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of the control-plane node running the following command on each as root:

kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
--discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a \
--control-plane --certificate-key 5da3036c3748773980d0cc9ee4352ace20f6b3a5fbee5a5aad2a9ff0bba3ccd2

Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
--discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a

#错误排除
journalctl -u kubelet
# 查看集群状态
[root@k8s-master-1 apps]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy ok
# 查看集群pod
[root@k8s-master-1 apps]# kubectl get pod -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-66f779496c-dk8sr 0/1 Pending 0 4m40s
kube-system coredns-66f779496c-vmqcl 0/1 Pending 0 4m40s
kube-system etcd-k8s-master-1 1/1 Running 1 4m52s
kube-system kube-apiserver-k8s-master-1 1/1 Running 1 4m57s
kube-system kube-controller-manager-k8s-master-1 1/1 Running 1 4m52s
kube-system kube-proxy-rmc6j 1/1 Running 0 4m40s
kube-system kube-scheduler-k8s-master-1 1/1 Running 1 4m53s

# 修改 kube-proxy 为ipvs
kubectl -n kube-system edit cm kube-proxy
logging:
flushFrequency: 0
options:
json:
infoBufferSize: "0"
verbosity: 0
metricsBindAddress: ""
mode: "ipvs" # 添加ipvs
nodePortAddresses: null
# 让配置生效
kubectl -n kube-system delete pod kube-proxy-rmc6j
# 查看 kube-ipvs0 网卡是否创建
[root@k8s-master-1 apps]# ip a | grep kube-ipvs0
3: kube-ipvs0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
inet 10.96.0.1/32 scope global kube-ipvs0
inet 10.96.0.10/32 scope global kube-ipvs0
# 查看 ipvs 信息
[root@k8s-master-1 apps]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 10.96.0.1:443 rr
-> 192.168.2.175:6443 Masq 1 0 0
TCP 10.96.0.10:53 rr
TCP 10.96.0.10:9153 rr
UDP 10.96.0.10:53 rr

部署 master2,master3 执行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
  kubeadm join 192.168.3.251:6443 --token u9ryln.7f9t2ih8v1es5d79 \
--discovery-token-ca-cert-hash sha256:2c8298c1e572f37919d6df24cb80984b421a25ffd06bcc8ba522afb0ce9a5f83 \
--control-plane --certificate-key c6a80e1929786899137bb0a765323fa0cb7c14fb8c0bedb61a0eaf1583a13abd

[root@k8s-master-2 tmp]# kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
> --discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a \
> --control-plane --certificate-key 5da3036c3748773980d0cc9ee4352ace20f6b3a5fbee5a5aad2a9ff0bba3ccd2
W0111 20:26:07.335263 8470 initconfiguration.go:120] Usage of CRI endpoints without URL scheme is deprecated and can cause kubelet errors in the future. Automatically prepending scheme "unix" to the "criSocket" with value "/var/run/cri-docker/cri-docker.sock". Please update your configuration!
[preflight] Running pre-flight checks
[WARNING Hostname]: hostname "k8s-master-2" could not be reached
[WARNING Hostname]: hostname "k8s-master-2": lookup k8s-master-2 on 192.168.2.84:53: no such host
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[preflight] Running pre-flight checks before initializing the new control plane instance
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[download-certs] Saving the certificates to the folder: "/etc/kubernetes/pki"
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [k8s-master-2 localhost] and IPs [192.168.2.176 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [k8s-master-2 localhost] and IPs [192.168.2.176 127.0.0.1 ::1]
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [k8s-master-2 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.2.176 192.168.3.251 127.0.0.1]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
[certs] Using the existing "sa" key
[kubeconfig] Generating kubeconfig files
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[check-etcd] Checking that the etcd cluster is healthy
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
[etcd] Announced new etcd member joining to the existing etcd cluster
[etcd] Creating static Pod manifest for "etcd"
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
[kubelet-check] Initial timeout of 40s passed.
The 'update-status' phase is deprecated and will be removed in a future release. Currently it performs no operation
[mark-control-plane] Marking the node k8s-master-2 as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node k8s-master-2 as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]

This node has joined the cluster and a new control plane instance was created:

* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.

To start administering your cluster from this node, you need to run the following as a regular user:

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

Run 'kubectl get nodes' to see this node join the cluster.

# 设置kubelet 开机启动
systemctl enable kubelet.service
# 查看启动状态
systemctl status kubelet.service

#错误排除
journalctl -u kubelet

# master 节点查看节点
[root@k8s-master-1 tmp]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master-1 NotReady control-plane 5m56s v1.28.2
k8s-master-2 NotReady control-plane 10m v1.28.2
k8s-master-3 NotReady control-plane 10m v1.28.2

部署 node 节点(所有node节点执行)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
--discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a
[root@k8s-node-1 ~]# kubeadm join 192.168.3.251:6443 --token vx5j0a.7n1jgk7cj7hffkmy \
> --discovery-token-ca-cert-hash sha256:6055c9951d7d92d1243006e973a41a375b71b8e20ae4ccdf35ac4a7edfd4531a
W0112 09:18:43.791610 356308 initconfiguration.go:120] Usage of CRI endpoints without URL scheme is deprecated and can cause kubelet errors in the future. Automatically prepending scheme "unix" to the "criSocket" with value "/var/run/cri-docker/cri-docker.sock". Please update your configuration!
[preflight] Running pre-flight checks
[WARNING Hostname]: hostname "k8s-node-1" could not be reached
[WARNING Hostname]: hostname "k8s-node-1": lookup k8s-node-1 on 192.168.2.84:53: no such host
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.

flannel cni 部署

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
cat <<EOF | kubectl create -f -
---
kind: Namespace
apiVersion: v1
metadata:
name: kube-flannel
labels:
k8s-app: flannel
pod-security.kubernetes.io/enforce: privileged
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: flannel
name: flannel
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
- apiGroups:
- networking.k8s.io
resources:
- clustercidrs
verbs:
- list
- watch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: flannel
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: flannel
name: flannel
namespace: kube-flannel
---
kind: ConfigMap
apiVersion: v1
metadata:
name: kube-flannel-cfg
namespace: kube-flannel
labels:
tier: node
k8s-app: flannel
app: flannel
data:
cni-conf.json: |
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
net-conf.json: |
{
"Network": "10.244.0.0/16",
"Backend": {
"Type": "vxlan"
}
}
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-flannel-ds
namespace: kube-flannel
labels:
tier: node
app: flannel
k8s-app: flannel
spec:
selector:
matchLabels:
app: flannel
template:
metadata:
labels:
tier: node
app: flannel
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
hostNetwork: true
priorityClassName: system-node-critical
tolerations:
- operator: Exists
effect: NoSchedule
serviceAccountName: flannel
initContainers:
- name: install-cni-plugin
image: docker.io/flannel/flannel-cni-plugin:v1.2.0
command:
- cp
args:
- -f
- /flannel
- /opt/cni/bin/flannel
volumeMounts:
- name: cni-plugin
mountPath: /opt/cni/bin
- name: install-cni
image: docker.io/flannel/flannel:v0.22.3
command:
- cp
args:
- -f
- /etc/kube-flannel/cni-conf.json
- /etc/cni/net.d/10-flannel.conflist
volumeMounts:
- name: cni
mountPath: /etc/cni/net.d
- name: flannel-cfg
mountPath: /etc/kube-flannel/
containers:
- name: kube-flannel
image: docker.io/flannel/flannel:v0.22.3
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
resources:
requests:
cpu: "100m"
memory: "50Mi"
securityContext:
privileged: false
capabilities:
add: ["NET_ADMIN", "NET_RAW"]
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: EVENT_QUEUE_DEPTH
value: "5000"
volumeMounts:
- name: run
mountPath: /run/flannel
- name: flannel-cfg
mountPath: /etc/kube-flannel/
- name: xtables-lock
mountPath: /run/xtables.lock
volumes:
- name: run
hostPath:
path: /run/flannel
- name: cni-plugin
hostPath:
path: /opt/cni/bin
- name: cni
hostPath:
path: /etc/cni/net.d
- name: flannel-cfg
configMap:
name: kube-flannel-cfg
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate
EOF

集群测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
[root@k8s-master-1 tmp]# kubectl  get pod -A
kubectl get pod -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-flannel kube-flannel-ds-775sk 1/1 Running 0 3m26s
kube-flannel kube-flannel-ds-px8vn 1/1 Running 0 3m26s
kube-system coredns-66f779496c-86psn 1/1 Running 0 13h
kube-system coredns-66f779496c-ptkdz 1/1 Running 0 13h
kube-system etcd-rocky 1/1 Running 3 13h
kube-system kube-apiserver-rocky 1/1 Running 14 (13h ago) 13h
kube-system kube-controller-manager-rocky 1/1 Running 6 (13h ago) 13h
kube-system kube-proxy-5rld2 1/1 Running 0 13h
kube-system kube-proxy-hkzts 1/1 Running 0 14m
kube-system kube-scheduler-rocky 1/1 Running 5 (13h ago) 13h
kube-system kube-vip-rocky 1/1 Running 9 (32m ago) 13h
# dns 测试
dig @10.96.0.10 www.qq.com
cat <<EOF | kubectl create -f -
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: net-tools
labels:
k8s-app: net-tools
spec:
selector:
matchLabels:
k8s-app: net-tools
template:
metadata:
labels:
k8s-app: net-tools
spec:
tolerations:
- effect: NoSchedule
operator: Exists
- key: CriticalAddonsOnly
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- name: net-tools
image: juestnow/net-tools
command:
- /bin/sh
- "-c"
- set -e -x; tail -f /dev/null
resources:
limits:
memory: 30Mi
requests:
cpu: 50m
memory: 20Mi
dnsConfig:
options:
- name: single-request-reopen

EOF
[root@k8s-master-1 tmp]# kubectl get pod
NAME READY STATUS RESTARTS AGE
net-tools-8wxnf 0/1 ContainerCreating 0 18s
net-tools-bxdns 0/1 ContainerCreating 0 18s

[root@k8s-master-1 tmp]# kubectl get pod
NAME READY STATUS RESTARTS AGE
net-tools-8wxnf 1/1 Running 0 105s
net-tools-bxdns 1/1 Running 0 105s
[root@k8s-master-1 tmp]#
[root@k8s-master-1 tmp]#
[root@k8s-master-1 tmp]# kubectl exec -ti net-tools-8wxnf /bin/sh
/ # ping www.qq.com
PING www.qq.com (121.14.77.221): 56 data bytes
64 bytes from 121.14.77.221: seq=0 ttl=51 time=7.157 ms
^C
--- www.qq.com ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max = 7.157/7.157/7.157 ms
#内网
/ # nc -vz kubernetes 443
kubernetes (10.96.0.1:443) open
/ # curl -k https://kubernetes
{
"kind": "Status",
"apiVersion": "v1",
"metadata": {},
"status": "Failure",
"message": "forbidden: User \"system:anonymous\" cannot get path \"/\"",
"reason": "Forbidden",
"details": {},
"code": 403
}/ #
# 内部解析正常
#证明集群网络正常

查看集节点信息

1
2
3
4
5
6
7
8
NAME           STATUS   ROLES           AGE   VERSION   INTERNAL-IP     EXTERNAL-IP    OS-IMAGE                      KERNEL-VERSION                 CONTAINER-RUNTIME
k8s-master-1 Ready control-plane 13h v1.28.2 192.168.2.175 <none> Rocky Linux 9.3 (Blue Onyx) 5.14.0-284.30.1.el9_2.x86_64 containerd://1.6.26
k8s-master-2 Ready control-plane 13h v1.28.2 192.168.2.176 <none> Rocky Linux 9.3 (Blue Onyx) 5.14.0-284.30.1.el9_2.x86_64 containerd://1.6.26
k8s-master-3 Ready control-plane 13h v1.28.2 192.168.2.177 <none> Rocky Linux 9.3 (Blue Onyx) 5.14.0-284.30.1.el9_2.x86_64 containerd://1.6.26
k8s-node-1 Ready <none> 13h v1.28.2 192.168.2.185 <none> Rocky Linux 9.3 (Blue Onyx) 5.14.0-284.30.1.el9_2.x86_64 containerd://1.6.26
k8s-node-2 Ready <none> 13h v1.28.2 192.168.2.187 <none> Rocky Linux 9.3 (Blue Onyx) 5.14.0-284.30.1.el9_2.x86_64 containerd://1.6.26
k8s-node-3 Ready <none> 13h v1.28.2 192.168.3.62 <none> Rocky Linux 9.3 (Blue Onyx) 5.14.0-284.30.1.el9_2.x86_64 containerd://1.6.26
k8s-node-4 Ready <none> 13h v1.28.2 192.168.3.70 <none> Rocky Linux 9.3 (Blue Onyx) 5.14.0-284.30.1.el9_2.x86_64 containerd://1.6.26

超大集群负载方案

由于使用kube-vip 方案同时只能一个master 对外提供服务不能多master 负载均衡 下面内部使用 127.0.0.1 每个节点启动 代理 可以是nginx haproxy
以下使用镜像:

  • 项目地址 https://github.com/qist/k8s/tree/master/dockerfile/k8s-ha-master
  • nginx镜像 docker.io/juestnow/nginx-proxy:1.21.6
  • haproxy镜像 docker.io/juestnow/haproxy-proxy:2.5.4
  • prometheus 端口 8404
  • CP_HOSTS 后端 master ip 192.168.2.175,192.168.2.176,192.168.2.177
  • CPU_NUM 配置进程使用cpu 数量 4
  • BACKEND_PORT 后端端口 6443
  • HOST_PORT 代理监听端口 8443
  • 所有节点执行
  • kube-vip kubectl 跟 ci/cd 工具使用
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
cat <<EOF | tee /etc/kubernetes/manifests/kube-lb.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-lb
tier: control-plane
annotations:
prometheus.io/port: "8404"
prometheus.io/scrape: "true"
name: kube-lb
namespace: kube-system
spec:
containers:
- args:
- "CP_HOSTS=192.168.2.175,192.168.2.176,192.168.2.177"
image: docker.io/juestnow/haproxy-proxy:2.5.4
imagePullPolicy: IfNotPresent
name: kube-lb
env:
- name: CPU_NUM
value: "4"
- name: BACKEND_PORT
value: "6443"
- name: HOST_PORT
value: "8443"
- name: CP_HOSTS
value: "192.168.2.175,192.168.2.176,192.168.2.177"
hostNetwork: true
priorityClassName: system-cluster-critical
status: {}
EOF

# 查看是否部署完成
[root@k8s-master-1 ~]# kubectl -n kube-system get pod| grep kube-lb
kube-lb-k8s-master-1 1/1 Running 0 77s
kube-lb-k8s-master-2 1/1 Running 0 84s

# 进入节点查询端口是否监听
[root@k8s-master-1~]# ss -tnlp | grep 8443
LISTEN 0 4096 *:8443 *:* users:(("haproxy",pid=829813,fd=7))
# 替换 配置kubeconfig 文件 server 地址 master1 节点
sed -i 's/192.168.2.175:6443/127.0.0.1:8443/g' /etc/kubernetes/controller-manager.conf
sed -i 's/192.168.2.175:6443/127.0.0.1:8443/g' /etc/kubernetes/scheduler.conf
sed -i 's/192.168.3.251:6443/127.0.0.1:8443/g' /etc/kubernetes/kubelet.conf

# controller-manager scheduler server 地址是本地ip
# master2 节点
sed -i 's/192.168.2.176:6443/127.0.0.1:8443/g' /etc/kubernetes/controller-manager.conf
sed -i 's/192.168.2.176:6443/127.0.0.1:8443/g' /etc/kubernetes/scheduler.conf
sed -i 's/192.168.3.251:6443/127.0.0.1:8443/g' /etc/kubernetes/kubelet.conf
# master3 节点
sed -i 's/192.168.2.177:6443/127.0.0.1:8443/g' /etc/kubernetes/controller-manager.conf
sed -i 's/192.168.2.177:6443/127.0.0.1:8443/g' /etc/kubernetes/scheduler.conf
sed -i 's/192.168.3.251:6443/127.0.0.1:8443/g' /etc/kubernetes/kubelet.conf

# 所有ndoe 节点
sed -i 's/192.168.3.251:6443/127.0.0.1:8443/g' /etc/kubernetes/kubelet.conf

# 重启 kubelet
systemctl restart kubelet

# 以修改 master1 为例
[root@k8s-master-1 ~]# netstat -tnp| grep kubelet
tcp 0 0 127.0.0.1:33892 127.0.0.1:8443 ESTABLISHED 832614/kubelet

# 查看 controller-manager scheduler CONTAINER ID
[root@master-1 ~]# crictl ps
CONTAINER IMAGE CREATED STATE NAME ATTEMPT POD ID POD
ea2189e1a86da bd4be6845ffba 12 minutes ago Running kube-lb 0 c11bebef002c1 kube-lb-master-1
62f1acd2683c4 ead0a4a53df89 38 minutes ago Running coredns 0 1c299fce6b19e coredns-66f779496c-86psn
619ccabe67ac7 ead0a4a53df89 38 minutes ago Running coredns 0 e8d5a7fc93544 coredns-66f779496c-ptkdz
d25e5a09017f4 e23f7ca36333c 38 minutes ago Running kube-flannel 0 7b27e08049458 kube-flannel-ds-775sk
983817065f5b3 35d002bc4cbfa About an hour ago Running kube-vip 9 e9d04ca9f5db9 kube-vip-master-1
1c2733db52682 cdcab12b2dd16 14 hours ago Running kube-apiserver 14 31488e8169f07 kube-apiserver-master-1
c16f1b235008f 55f13c92defb1 14 hours ago Running kube-controller-manager 6 9d14a61354bc4 kube-controller-manager-master-1
8b19001f00f0b 7a5d9d67a13f6 14 hours ago Running kube-scheduler 5 cc35049599e04 kube-scheduler-master-1
a1e3b1477ee15 c120fed2beb84 14 hours ago Running kube-proxy 0 c42361e6da312 kube-proxy-5rld2
24a99953e8dd5 73deb9a3f7025 14 hours ago Running etcd 3 933789ea5868d etcd-master-1

# 删除 controller-manager scheduler
crictl rm -f 8b19001f00f0b c16f1b235008f

# 查看 是否修改成功
[root@master-1 ~]# netstat -tnp| grep 8443
tcp 0 0 127.0.0.1:33892 127.0.0.1:8443 ESTABLISHED 832614/kubelet
tcp 0 0 127.0.0.1:36420 127.0.0.1:8443 ESTABLISHED 833501/kube-control
tcp 0 0 127.0.0.1:36446 127.0.0.1:8443 ESTABLISHED 833500/kube-schedul
tcp 0 0 127.0.0.1:36430 127.0.0.1:8443 ESTABLISHED 833500/kube-schedul
tcp6 0 0 127.0.0.1:8443 127.0.0.1:36430 ESTABLISHED 829813/haproxy
tcp6 0 0 127.0.0.1:8443 127.0.0.1:36446 ESTABLISHED 829813/haproxy
tcp6 0 0 127.0.0.1:8443 127.0.0.1:36420 ESTABLISHED 829813/haproxy
tcp6 0 0 127.0.0.1:8443 127.0.0.1:33892 ESTABLISHED 829813/haproxy

# 修改 kube-proxy kubeconfig 地址
kubectl -n kube-system edit cm kube-proxy

找到 server: https://192.168.3.251:6443
改成 server: https://127.0.0.1:8443

# 重启 kube-proxy pod
kubectl -n kube-system rollout restart daemonsets kube-proxy

# 查看是否启动成功
[root@master-1 ~]# netstat -tnp| grep kube-proxy
tcp 0 0 127.0.0.1:16228 127.0.0.1:8443 ESTABLISHED 836301/kube-proxy

在启用 Hyper-V 后,总是会产生各种端口被占用的问题

我遇到过的问题就有:

启动 IDEA 时,报错:
java.net.BindException: Address already in use: bind

使用 Clash for Windows 时,Could not connect to Clash Core,日志:
time=”2020-07-28T07:08:37+08:00” level=error msg=”External controller error: listen tcp 127.0.0.1:9090: bind: An attempt was made to access a socket in a way forbidden by its access permissions.”

显然,这两个错误都是端口被占用造成的。

根据​ ​IDEA Start Failed: Address already in use - Serge Baranov 的回答​​,IDEA 会在 6942~6991 中寻找一个端口并 bind。由此可见,错误原因是这 50 个端口都已经被占用。

根据日志内容,第二个问题是 9090 端口被占用造成的

对于一般的端口占用问题,比如我的 27891 被占用,可以先查找出正在使用此端口的进程,再强行终止这个进程(如果失败可以试试以管理员身份):

1
2
3
4
5
6
PS C:\Users\hyuuko> netstat -ano | findstr 27891
TCP 127.0.0.1:8780 127.0.0.1:27891 SYN_SENT 6276
TCP 127.0.0.1:8802 127.0.0.1:27891 SYN_SENT 6276
PS C:\Users\hyuuko> taskkill /pid 6276 /F
成功: 已终止 PID 为 6276 的进程。
PS C:\Users\hyuuko>

然而,我使用​​netstat -ano | findstr 端口号​​命令时,发现 6942~6991 和 9090 并未被某个进程使用。这说明这些端口可能是被系统保留了,比如 Hyper-V。

根据​ ​List of TCP and UDP port numbers - Wikipedia​​所言,tcp/udp 端口号被分为 3 段:

端口类型 范围 用途
周知端口 0 - 1023 提供广泛使用的网络服务类型的系统进程使用
注册端口 1024 - 49151 给用户进程或应用程序使用,比如 IDEA
动态端口 49152-65535 用于私有或定制服务、临时目的以及临时端口的自动分配

Hyper-V 会将动态端口中的几段范围的端口保留给自己使用,用户的应用程序无法使用这些端口。从 Windows Vista 和 Windows Server 2008 起,Windows 将 49152-65535 划分为 动态端口,见​ ​Service overview and network port requirements for Windows​​。然而在某次更新后,Windows 的动态端口范围变成了 1024~15000,我们可以查看动态端口范围和被保留的端口范围:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# 查看tcp ipv4动态端口范围
PS C:\Users\hyuuko> netsh int ipv4 show dynamicport tcp

协议 tcp 动态端口范围
---------------------------------
启动端口 : 1024
端口数 : 13977

# 查看tcp ipv4端口排除范围(被系统或者我们自己保留的端口)
PS C:\Users\hyuuko> netsh int ipv4 show excludedport tcp

协议 tcp 端口排除范围

开始端口 结束端口
---------- --------
1578 1677
1678 1777
太多了这里省略...
8974 9073
9074 9173
9174 9273
太多了这里省略...
11301 11400
11401 11500
50000 50059 *

* - 管理的端口排除。

PS C:\Users\hyuuko>

可以看到,9074~9173 等等范围内的端口被系统保留了(绝对是 Hyper-V 干的!),导致 clash 不能使用 9090 端口。现在知道原因了,有三种解决办法(我用的第二种)

第一种解决办法(不推荐)​​​​

更改 clash 使用的端口,将 9090 改成较高的 29091,可是治标不治本,因为 Hyper-V 下次可能就会将 29091 保留给自己用。再者而这还不能解决 IDEA 的问题,你不能改变 IDEA 想要使用的端口。

第二种解决办法​​​​

1
netsh int ipv4 set dynamicport tcp start=49152 num=16384

然后重启电脑。Hyper-V 就会从 4915265535 范围内保留一部分端口,69426991 和 9090 不受影响。

查看一下此时的动态端口范围:

1
2
3
4
5
6
PS C:\Users\hyuuko> netsh int ipv4 show dynamicport tcp

协议 tcp 动态端口范围
---------------------------------
启动端口 : 49152
端口数 : 16384

第三种解决办法​

先以管理员身份打开 powershell,然后将 9090 等端口设置为排除端口给应用程序使用。

1
2
3
4
# 保留 6942~6951 这10个端口给应用程序使用
netsh int ipv4 add excludedportrange protocol=tcp startport=6942 numberofports=10
# 保留 9090 端口给应用程序使用
netsh int ipv4 add excludedportrange protocol=tcp startport=9090 numberofports=1

然后重启电脑。因为 9090 等端口被保留给应用程序使用了,Hyper-V 就无法将 9090 保留给自己使用了。

查看一下此时被保留的端口:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
PS C:\Users\hyuuko> netsh int ipv4 show excludedport tcp

协议 tcp 端口排除范围

开始端口 结束端口
---------- --------
1578 1677
1678 1777
太多了这里省略...
9090 9090 *
太多了这里省略...
11301 11400
11401 11500
太多了这里省略...

* - 管理的端口排除。

带星号的就是被管理员保留的端口,可以被应用程序使用

如果要取消保留端口,可以:

1
netsh int ipv4 delete excludedportrange protocol=tcp startport=9090 numberofports=1

环境说明:

#操作系统:CentOS Stream release 8 Rocky Linux release 8.8
#containerd版本:1.7.11
#kubernetes版本:v1.28.4
#K8S master 节点IP:192.168.2.175
#K8S worker节点IP:192.168.3.62
#网络插件:flannel
#kube-proxy网络转发: ipvs
#kubernetes源: 阿里云镜像站
#service-cidr:10.96.0.0/16
#pod-network-cidr:10.244.0.0/16

部署准备:

操作在所有节点进行

1、修改内核参数

1
2
3
4
5
6
7
vim /etc/sysctl.conf
vm.swappiness=0
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables=1
sysctl -p

2、关闭swap

1
2
3
4
swapoff -a && sysctl -w vm.swappiness=0
修改 fstab 不在挂载 swap
vi /etc/fstab
/dev/mapper/centos-swap swap swap defaults 0 0

3、将 SELinux 设置为 disabled 模式

1
2
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config

4、cgroup2 开启(经常升级内核不建议执行不然会出现升级后不能启动,只能用旧内核启动的问题)

1
2
3
grubby \
--update-kernel=ALL \
--args="systemd.unified_cgroup_hierarchy=1"

5、内核模块加载

1
2
3
4
5
6
7
8
9
10
11
12
cat <<EOF | tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
nf_conntrack
EOF

cat <<EOF | tee /etc/modules-load.d/ipvs.conf
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
EOF

6、重启系统使上面配置生效

reboot

7、安装依赖

1
dnf install -y   dnf-utils  ipvsadm  telnet  wget  net-tools  conntrack  ipset  jq  iptables  curl  sysstat  libseccomp  socat  nfs-utils  fuse  fuse-devel 

安装 containerd

1、下载containerd

1
2
3
4
5
cd /tmp
wget https://github.com/containerd/containerd/releases/download/v1.7.11/containerd-1.7.11-linux-amd64.tar.gz
tar -xvf containerd-1.7.11-linux-amd64.tar.gz
cd bin
cp -pdr * /usr/bin/

2、下载crictl

1
2
3
4
cd /tmp
wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.28.0/crictl-v1.28.0-linux-amd64.tar.gz
tar -xvf crictl-v1.28.0-linux-amd64.tar.gz
cp -pdr crictl /usr/bin/

2、下载cni

1
2
3
4
5
6
7
cd /tmp
mkdir -pv /opt/cni/bin
mkdir bin
cd bin
wget https://github.com/containernetworking/plugins/releases/download/v1.4.0/cni-plugins-linux-amd64-v1.4.0.tgz
tar -xvf cni-plugins-linux-amd64-v1.4.0.tgz
cp -pdr * /opt/cni/bin/

3、下载runc

1
2
3
4
cd /tmp
wget https://github.com/opencontainers/runc/releases/download/v1.1.10/runc.amd64
chmod +x runc.amd64
cp -pdr runc.amd64 /usr/bin/runc

containerd 配置

1、配置 containerd

1
2
3
4
5
6
7
8
9
10
11
12
 mkdir -p /etc/containerd/
#生成默认配置
containerd config default > /etc/containerd/config.toml
#修改配置
sandbox_image = "registry.k8s.io/pause:3.8" 改成国内地址
sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.8"
Updated config for group driver changed..

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
...
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = true

2、创建启动配置文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
cat <<EOF | tee /usr/lib/systemd/system/containerd.service
[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target

[Service]

ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/bin/containerd
KillMode=process
Delegate=yes
LimitNOFILE=655350
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity

[Install]
WantedBy=multi-user.target
EOF

3、 创建存储挂在数据

1
2
3
4
5
6
7
8
9
10
11
12
13
mkdir -p /var/lib/containerd/
mkdir -p /apps/containerd/ # 改成你大硬盘路径

/etc/fstab

echo "/apps/containerd /var/lib/containerd none defaults,bind,nofail 0 0" >>/etc/fstab

systemctl daemon-reload
# 挂在
mount -a
# 查看是否挂在
[root@k8s-master-1 containerd]# mount | grep containerd
/dev/vda3 on /var/lib/containerd type xfs (rw,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)

4 开启开机启动并启动

1
systemctl enable containerd.service --now

5、查看进程是否启动

1
systemctl status containerd.service 

6、 查看数据盘是否有文件有证明挂在正确

ll /apps/containerd/

7、 创建crictl 配置

1
2
3
4
5
6
7
8
cat <<EOF | tee /etc/crictl.yaml
runtime-endpoint: "unix:///var/run/containerd/containerd.sock"
image-endpoint: "unix:///var/run/containerd/containerd.sock"
timeout: 10
debug: false
pull-image-on-create: true
disable-pull-on-run: false
EOF

8、查看配置是否生效

1
2
3
4
[root@k8s-master-1 containerd]# crictl info|  grep sandboxImage
"sandboxImage": "registry.aliyuncs.com/google_containers/pause:3.8",
[root@k8s-master-1 containerd]# crictl info| grep SystemdCgroup
"SystemdCgroup": true

安装 kubelet kubeadm kubectl

1、 只需下载node就可以了

1
2
3
4
5
6
cd /tmp
wget https://dl.k8s.io/v1.28.4/kubernetes-node-linux-amd64.tar.gz

tar -xvf kubernetes-node-linux-amd64.tar.gz
cd kubernetes/node/bin
cp -pdr kubeadm kubectl kubelet /usr/bin/

配置kubelet

1、 创建kubelet 存储挂在

1
2
3
4
5
6
7
8
9
mkdir /var/lib/kubelet
mkdir /apps/kubelet
/etc/fstab

echo "/apps/kubelet /var/lib/kubelet none defaults,bind,nofail 0 0" >>/etc/fstab

systemctl daemon-reload
# 挂在
mount -a

2、 查看是否挂在

1
2
[root@k8s-master-1]# mount | grep kubelet
/dev/vda3 on /var/lib/kubelet type xfs (rw,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)

3、创建kubelet 启动配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
mkdir -p  /usr/lib/systemd/system/kubelet.service.d/
cat <<EOF | tee /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf
# Note: This dropin only works with kubeadm and kubelet v1.11+
[Service]
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
# This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamically
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
# This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use
# the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.
EnvironmentFile=-/etc/sysconfig/kubelet
ExecStart=
ExecStart=/usr/bin/kubelet \$KUBELET_KUBECONFIG_ARGS \$KUBELET_CONFIG_ARGS \$KUBELET_KUBEADM_ARGS \$KUBELET_EXTRA_ARGS
EOF
cat <<EOF | tee /etc/sysconfig/kubelet
KUBELET_EXTRA_ARGS=
EOF

4、 创建 kubelet.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
cat <<EOF | tee /usr/lib/systemd/system/kubelet.service
[Unit]
Description=kubelet: The Kubernetes Node Agent
Documentation=https://kubernetes.io/docs/
Wants=network-online.target
After=network-online.target

[Service]
ExecStart=/usr/bin/kubelet
Restart=always
StartLimitInterval=0
RestartSec=10

[Install]
WantedBy=multi-user.target
EOF

6、刷新 service

1
systemctl daemon-reload

初始化kubernetes master

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
kubeadm init --apiserver-advertise-address=0.0.0.0 \
--apiserver-cert-extra-sans=127.0.0.1 \
--image-repository=registry.aliyuncs.com/google_containers \
--ignore-preflight-errors=all \
--service-cidr=10.96.0.0/16 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=all \
--cri-socket=unix:///var/run/containerd/containerd.sock

#初始化过程
[root@k8s-master-1 apps]# kubeadm init --apiserver-advertise-address=0.0.0.0 \
> --apiserver-cert-extra-sans=127.0.0.1 \
> --image-repository=registry.aliyuncs.com/google_containers \
> --ignore-preflight-errors=all \
> --service-cidr=10.96.0.0/16 \
> --pod-network-cidr=10.244.0.0/16 \
> --ignore-preflight-errors=all \
> --cri-socket=unix:///var/run/containerd/containerd.sock
[init] Using Kubernetes version: v1.28.4
[preflight] Running pre-flight checks
[WARNING FileExisting-tc]: tc not found in system path
[WARNING Hostname]: hostname "k8s-master-1" could not be reached
[WARNING Hostname]: hostname "k8s-master-1": lookup k8s-master-1 on 192.168.2.84:53: no such host
[WARNING Service-Kubelet]: kubelet service is not enabled, please run 'systemctl enable kubelet.service'
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
W1211 17:02:12.360608 38189 checks.go:835] detected that the sandbox image "registry.aliyuncs.com/google_containers/pause:3.8" of the container runtime is inconsistent with that used by kubeadm. It is recommended that using "registry.aliyuncs.com/google_containers/pause:3.9" as the CRI sandbox image.
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [k8s-master-1 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.2.175 127.0.0.1]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [k8s-master-1 localhost] and IPs [192.168.2.175 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [k8s-master-1 localhost] and IPs [192.168.2.175 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 8.504053 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node k8s-master-1 as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node k8s-master-1 as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
[bootstrap-token] Using token: j03vax.ed9rursqoz27olk6
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.2.175:6443 --token j03vax.ed9rursqoz27olk6 \
--discovery-token-ca-cert-hash sha256:8aa5fb17b6909dce425d1e8bafd41e85beeb18bfda6bba4025ff36662c2774f2
# 设置kubelet 开机启动
systemctl enable kubelet.service
# 查看启动状态
systemctl status kubelet.service

#错误排除
journalctl -u kubelet
# 查看集群状态
[root@k8s-master-1 apps]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy ok
# 查看集群pod
[root@k8s-master-1 apps]# kubectl get pod -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-66f779496c-dk8sr 0/1 Pending 0 4m40s
kube-system coredns-66f779496c-vmqcl 0/1 Pending 0 4m40s
kube-system etcd-k8s-master-1 1/1 Running 1 4m52s
kube-system kube-apiserver-k8s-master-1 1/1 Running 1 4m57s
kube-system kube-controller-manager-k8s-master-1 1/1 Running 1 4m52s
kube-system kube-proxy-rmc6j 1/1 Running 0 4m40s
kube-system kube-scheduler-k8s-master-1 1/1 Running 1 4m53s

# 修改 kube-proxy 为ipvs
kubectl -n kube-system edit cm kube-proxy
logging:
flushFrequency: 0
options:
json:
infoBufferSize: "0"
verbosity: 0
metricsBindAddress: ""
mode: "ipvs" # 添加ipvs
nodePortAddresses: null
# 让配置生效
kubectl -n kube-system delete pod kube-proxy-rmc6j
# 查看 kube-ipvs0 网卡是否创建
[root@k8s-master-1 apps]# ip a | grep kube-ipvs0
3: kube-ipvs0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
inet 10.96.0.1/32 scope global kube-ipvs0
inet 10.96.0.10/32 scope global kube-ipvs0
# 查看 ipvs 信息
[root@k8s-master-1 apps]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 10.96.0.1:443 rr
-> 192.168.2.175:6443 Masq 1 0 0
TCP 10.96.0.10:53 rr
TCP 10.96.0.10:9153 rr
UDP 10.96.0.10:53 rr

部署 node 节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
kubeadm join 192.168.2.175:6443 --token j03vax.ed9rursqoz27olk6 \
--discovery-token-ca-cert-hash sha256:8aa5fb17b6909dce425d1e8bafd41e85beeb18bfda6bba4025ff36662c2774f2

[root@k8s-node-3 tmp]# kubeadm join 192.168.2.175:6443 --token j03vax.ed9rursqoz27olk6 \
> --discovery-token-ca-cert-hash sha256:8aa5fb17b6909dce425d1e8bafd41e85beeb18bfda6bba4025ff36662c2774f2
[preflight] Running pre-flight checks
[WARNING FileExisting-tc]: tc not found in system path
[WARNING Hostname]: hostname "k8s-node-3" could not be reached
[WARNING Hostname]: hostname "k8s-node-3": lookup k8s-node-3 on 192.168.2.84:53: no such host
[WARNING Service-Kubelet]: kubelet service is not enabled, please run 'systemctl enable kubelet.service'
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.

# 设置kubelet 开机启动
systemctl enable kubelet.service
# 查看启动状态
systemctl status kubelet.service

#错误排除
journalctl -u kubelet

# master 节点查看节点
[root@k8s-master-1 tmp]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master-1 NotReady control-plane 31m v1.28.4
k8s-node-3 NotReady <none> 88s v1.28.4

flannel cni 部署

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
cat <<EOF | kubectl create -f -
---
kind: Namespace
apiVersion: v1
metadata:
name: kube-flannel
labels:
k8s-app: flannel
pod-security.kubernetes.io/enforce: privileged
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: flannel
name: flannel
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
- apiGroups:
- networking.k8s.io
resources:
- clustercidrs
verbs:
- list
- watch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: flannel
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: flannel
name: flannel
namespace: kube-flannel
---
kind: ConfigMap
apiVersion: v1
metadata:
name: kube-flannel-cfg
namespace: kube-flannel
labels:
tier: node
k8s-app: flannel
app: flannel
data:
cni-conf.json: |
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
net-conf.json: |
{
"Network": "10.244.0.0/16",
"Backend": {
"Type": "vxlan"
}
}
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-flannel-ds
namespace: kube-flannel
labels:
tier: node
app: flannel
k8s-app: flannel
spec:
selector:
matchLabels:
app: flannel
template:
metadata:
labels:
tier: node
app: flannel
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
hostNetwork: true
priorityClassName: system-node-critical
tolerations:
- operator: Exists
effect: NoSchedule
serviceAccountName: flannel
initContainers:
- name: install-cni-plugin
image: docker.io/flannel/flannel-cni-plugin:v1.2.0
command:
- cp
args:
- -f
- /flannel
- /opt/cni/bin/flannel
volumeMounts:
- name: cni-plugin
mountPath: /opt/cni/bin
- name: install-cni
image: docker.io/flannel/flannel:v0.22.3
command:
- cp
args:
- -f
- /etc/kube-flannel/cni-conf.json
- /etc/cni/net.d/10-flannel.conflist
volumeMounts:
- name: cni
mountPath: /etc/cni/net.d
- name: flannel-cfg
mountPath: /etc/kube-flannel/
containers:
- name: kube-flannel
image: docker.io/flannel/flannel:v0.22.3
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
resources:
requests:
cpu: "100m"
memory: "50Mi"
securityContext:
privileged: false
capabilities:
add: ["NET_ADMIN", "NET_RAW"]
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: EVENT_QUEUE_DEPTH
value: "5000"
volumeMounts:
- name: run
mountPath: /run/flannel
- name: flannel-cfg
mountPath: /etc/kube-flannel/
- name: xtables-lock
mountPath: /run/xtables.lock
volumes:
- name: run
hostPath:
path: /run/flannel
- name: cni-plugin
hostPath:
path: /opt/cni/bin
- name: cni
hostPath:
path: /etc/cni/net.d
- name: flannel-cfg
configMap:
name: kube-flannel-cfg
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate
EOF

集群测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
[root@k8s-master-1 tmp]# kubectl  get pod -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-flannel kube-flannel-ds-5mvkj 1/1 Running 0 57s
kube-flannel kube-flannel-ds-qccq4 1/1 Running 0 56s
kube-system coredns-66f779496c-dk8sr 1/1 Running 0 35m
kube-system coredns-66f779496c-vmqcl 1/1 Running 0 35m
kube-system etcd-k8s-master-1 1/1 Running 1 35m
kube-system kube-apiserver-k8s-master-1 1/1 Running 1 35m
kube-system kube-controller-manager-k8s-master-1 1/1 Running 1 35m
kube-system kube-proxy-65tbm 1/1 Running 0 5m51s
kube-system kube-proxy-rg882 1/1 Running 0 27m
kube-system kube-scheduler-k8s-master-1 1/1 Running 1 35m
# dns 测试
dig @10.96.0.10 www.qq.com
cat <<EOF | kubectl create -f -
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: net-tools
labels:
k8s-app: net-tools
spec:
selector:
matchLabels:
k8s-app: net-tools
template:
metadata:
labels:
k8s-app: net-tools
spec:
tolerations:
- effect: NoSchedule
operator: Exists
- key: CriticalAddonsOnly
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- name: net-tools
image: juestnow/net-tools
command:
- /bin/sh
- "-c"
- set -e -x; tail -f /dev/null
resources:
limits:
memory: 30Mi
requests:
cpu: 50m
memory: 20Mi
dnsConfig:
options:
- name: single-request-reopen

EOF
[root@k8s-master-1 tmp]# kubectl get pod
NAME READY STATUS RESTARTS AGE
net-tools-8wxnf 0/1 ContainerCreating 0 18s
net-tools-bxdns 0/1 ContainerCreating 0 18s

[root@k8s-master-1 tmp]# kubectl get pod
NAME READY STATUS RESTARTS AGE
net-tools-8wxnf 1/1 Running 0 105s
net-tools-bxdns 1/1 Running 0 105s
[root@k8s-master-1 tmp]#
[root@k8s-master-1 tmp]#
[root@k8s-master-1 tmp]# kubectl exec -ti net-tools-8wxnf /bin/sh
/ # ping www.qq.com
PING www.qq.com (121.14.77.221): 56 data bytes
64 bytes from 121.14.77.221: seq=0 ttl=51 time=7.157 ms
^C
--- www.qq.com ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max = 7.157/7.157/7.157 ms
#内网
/ # nc -vz kubernetes 443
kubernetes (10.96.0.1:443) open
/ # curl -k https://kubernetes
{
"kind": "Status",
"apiVersion": "v1",
"metadata": {},
"status": "Failure",
"message": "forbidden: User \"system:anonymous\" cannot get path \"/\"",
"reason": "Forbidden",
"details": {},
"code": 403
}/ #
# 内部解析正常
#证明集群网络正常