紀錄 K8S 安裝相關
K8S 可以採用 kubespray 方便我們快速建立 production ready cluster 除了建立外,連日後的維護都能用他完成 可以大量節省 kubernetes 的建置維護工作 https://kubespray.io
在部屬時最關心的高可用性問題 https://kubespray.io/#/docs/ha-mode kubespray 預設會在 worknode 底下建立 nginx-based proxy 每個 worknode 都連到 locolhost 的 nginx nginx 再連線至 control node
部屬
在 LAB 環境底下先準備好 rockylinux 並設定完成 linux 建議規格為 2 CPU + 1.5G RAM
allow IPv4 forwarding
firewalld
待修改
準備 ansible
ansible 可以讓重複工作以腳本方式運行
https://computingforgeeks.com/install-kubernetes-cluster-on-rocky-linux-with-kubeadm-crio/
# Install dependencies from ``requirements.txt``
sudo pip3 install -r requirements.txt
# Copy ``inventory/sample`` as ``inventory/mycluster``
cp -rfp inventory/sample inventory/mycluster
# Update Ansible inventory file with inventory builder
declare -a IPS=(10.10.1.3 10.10.1.4 10.10.1.5)
CONFIG_FILE=inventory/mycluster/hosts.yaml python3 contrib/inventory_builder/inventory.py ${IPS[@]}
# Review and change parameters under ``inventory/mycluster/group_vars``
cat inventory/mycluster/group_vars/all/all.yml
cat inventory/mycluster/group_vars/k8s_cluster/k8s-cluster.yml
# Deploy Kubespray with Ansible Playbook - run the playbook as root
# The option `--become` is required, as for example writing SSL keys in /etc/,
# installing packages and interacting with various systemd daemons.
# Without --become the playbook will fail to run!
# 測試語法
ansible-playbook --syntax-check -i inventory/mycluster/hosts.yaml --become --become-user=root cluster.yml
移除 rockylinux hostname 設定
開始部屬 ansible-playbook -i inventory/mycluster/hosts.yaml –become –become-user=root cluster.yml
安裝 rocky linux 8
完成 IP 及 hostname 設定 複製 SSH 金鑰
ssh-keygen -t rsa -b 4096 -N ''
# Copy SSH keys to all Kubernetes cluster nodes
for host in k8s-{1..3}; do
ssh-copy-id root@$host
done
# Worker Nodes
for host in k8s-{4..6}; do
ssh-copy-id root@$host
done
使用 Kubespray 部屬節點
git clone https://github.com/kubernetes-sigs/kubespray.git
安裝
#set the system hostname and update DNS in your /etc/hosts
cat <<EOF>> /etc/hosts
192.168.5.141 k8s-1
192.168.5.142 k8s-2
192.168.5.143 k8s-3
192.168.5.200 k8s-endpoint.localdomain
EOF
# disable swap
sudo sed -i '/swap/d' /etc/fstab
# install package
yum -y install git wget curl vim bash-completion curl jq tar iproute-tc
# config SELinux
sed -i --follow-symlinks 's/SELINUX=enforcing/SELINUX=permissive/g' /etc/sysconfig/selinux
reboot
# firewall
firewall-cmd --permanent --add-port=6443/tcp
firewall-cmd --permanent --add-port=2379-2380/tcp
firewall-cmd --permanent --add-port=10250/tcp
firewall-cmd --permanent --add-port=10251/tcp
firewall-cmd --permanent --add-port=10252/tcp
firewall-cmd --permanent --add-port=10255/tcp
firewall-cmd --reload
# containerd
cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
# Setup required sysctl params, these persist across reboots.
cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
# Apply sysctl params without reboot
sudo sysctl --system
dnf config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
yum install -y containerd.io
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml
sudo systemctl restart containerd
nano /etc/containerd/config.toml
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
...
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = true
sudo systemctl enable --now containerd
# install kubernetes
cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-\$basearch
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
exclude=kubelet kubeadm kubectl
EOF
sudo yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
# config for coredns
vi /etc/sysconfig/kubelet
KUBELET_EXTRA_ARGS="--fail-swap-on=false"
sudo systemctl enable --now kubelet
init 單節點
建立第一個 master node
kubeadm init --pod-network-cidr=172.16.0.0/16
# install cni
wget https://docs.projectcalico.org/manifests/tigera-operator.yaml
wget https://docs.projectcalico.org/manifests/custom-resources.yaml
# 編輯 custom-resources.yaml cidr: 部分
kubectl create -f https://docs.projectcalico.org/manifests/tigera-operator.yaml
kubectl create -f https://docs.projectcalico.org/manifests/custom-resources.yaml
# 確認狀態
#取消前面動作
#kubeadm reset
init 多節點
高可用性 kube-vip Generating a Manifest
export VIP=192.168.5.200
export INTERFACE=ens160
# Get the latest version of the kube-vip
KVVERSION=$(curl -sL https://api.github.com/repos/kube-vip/kube-vip/releases | jq -r ".[0].name")
alias kube-vip="ctr run --rm --net-host ghcr.io/kube-vip/kube-vip:$KVVERSION vip /kube-vip"
ctr image pull ghcr.io/kube-vip/kube-vip:$KVVERSION
# 如有 kubeadm reset
# 此步須重做
# 建立 manifest
kube-vip manifest pod \
--interface $INTERFACE \
--address $VIP \
--controlplane \
--services \
--arp \
--leaderElection | tee /etc/kubernetes/manifests/kube-vip.yaml
# 建立 cluster
kubeadm init --control-plane-endpoint "k8s-endpoint.localdomain:6443" --upload-certs
kubeadm init --control-plane-endpoint 192.168.5.200:6443 --pod-network-cidr=172.16.0.0/16 --upload-certs
# install cni
wget https://docs.projectcalico.org/manifests/tigera-operator.yaml
wget https://docs.projectcalico.org/manifests/custom-resources.yaml
# 編輯 custom-resources.yaml cidr: 部分
kubectl create -f https://docs.projectcalico.org/manifests/tigera-operator.yaml
kubectl create -f https://docs.projectcalico.org/manifests/custom-resources.yaml
建立完成
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 192.168.5.200:6443 --token fzxw27.lnfylsxol4m2cdby \
--discovery-token-ca-cert-hash sha256:5f331fcdfcfe7c66bdfe8400be35d224494de1eebd51f0da90a69a345b9856e5 \
--control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.5.200:6443 --token fzxw27.lnfylsxol4m2cdby \
--discovery-token-ca-cert-hash sha256:5f331fcdfcfe7c66bdfe8400be35d224494de1eebd51f0da90a69a345b9856e5
trobleshoot
# node STATUS NotReady
systemctl status kubelet
systemctl status kubelet
journalctl -xeu kubelet
kubectl get nodes
kubectl get pode -A
kubectl logs -n tigera-operator -l k8s-app=tigera-operator
# cleanup
kubectl config get-clusters
kubectl config delete-cluster kubernetes
其他資料
dnf install keepalived haproxy -y
# keepalived
mv /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf.old
nano /etc/keepalived/keepalived.conf
# 官方 sample
! /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state ${STATE}
interface ${INTERFACE}
virtual_router_id ${ROUTER_ID}
priority ${PRIORITY}
authentication {
auth_type PASS
auth_pass ${AUTH_PASS}
}
virtual_ipaddress {
${APISERVER_VIP}
}
track_script {
check_apiserver
}
}
# 需修改
There are some placeholders in bash variable style to fill in:
${STATE} is MASTER for one and BACKUP for all other hosts, hence the virtual IP will initially be assigned to the MASTER.
${INTERFACE} is the network interface taking part in the negotiation of the virtual IP, e.g. eth0.
${ROUTER_ID} should be the same for all keepalived cluster hosts while unique amongst all clusters in the same subnet. Many distros pre-configure its value to 51.
${PRIORITY} should be higher on the control plane node than on the backups. Hence 101 and 100 respectively will suffice.
${AUTH_PASS} should be the same for all keepalived cluster hosts, e.g. 42
${APISERVER_VIP} is the virtual IP address negotiated between the keepalived cluster hosts.
實際 config (master)
! /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state MASTER
interface ens160
virtual_router_id 101
priority 101
authentication {
auth_type PASS
auth_pass vu-al-541931u/4
}
virtual_ipaddress {
192.168.5.200/24
}
track_script {
check_apiserver
}
}
實際 config (slave)
! /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state BACKUP
interface ens160
virtual_router_id 101
priority 100
authentication {
auth_type PASS
auth_pass vu-al-541931u/4
}
virtual_ipaddress {
192.168.5.200/24
}
track_script {
check_apiserver
}
}
check script /etc/keepalived/check_apiserver.sh
#!/bin/sh
errorExit() {
echo "*** $*" 1>&2
exit 1
}
curl --silent --max-time 2 --insecure https://localhost:${APISERVER_DEST_PORT}/ -o /dev/null || errorExit "Error GET https://localhost:${APISERVER_DEST_PORT}/"
if ip addr | grep -q ${APISERVER_VIP}; then
curl --silent --max-time 2 --insecure https://${APISERVER_VIP}:${APISERVER_DEST_PORT}/ -o /dev/null || errorExit "Error GET https://${APISERVER_VIP}:${APISERVER_DEST_PORT}/"
fi
需修改
${APISERVER_VIP} is the virtual IP address negotiated between the keepalived cluster hosts.
${APISERVER_DEST_PORT} the port through which Kubernetes will talk to the API Server.
實際範例
#!/bin/sh
errorExit() {
echo "*** $*" 1>&2
exit 1
}
curl --silent --max-time 2 --insecure https://localhost:6443/ -o /dev/null || errorExit "Error GET https://localhost:6443/"
if ip addr | grep -q 192.168.5.200; then
curl --silent --max-time 2 --insecure https://192.168.5.200:6443/ -o /dev/null || errorExit "Error GET https://192.168.5.200:6443/"
fi
開始服務
firewall-cmd --permanent --zone=public --add-protocol=vrrp
firewall-cmd --reload
chmod +x /etc/keepalived/check_apiserver.sh
systemctl enable keepalived
systemctl restart keepalived
haproxy
# /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
log /dev/log local0
log /dev/log local1 notice
daemon
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 1
timeout http-request 10s
timeout queue 20s
timeout connect 5s
timeout client 20s
timeout server 20s
timeout http-keep-alive 10s
timeout check 10s
#---------------------------------------------------------------------
# apiserver frontend which proxys to the control plane nodes
#---------------------------------------------------------------------
frontend apiserver
bind *:${APISERVER_DEST_PORT}
mode tcp
option tcplog
default_backend apiserver
#---------------------------------------------------------------------
# round robin balancing for apiserver
#---------------------------------------------------------------------
backend apiserver
option httpchk GET /healthz
http-check expect status 200
mode tcp
option ssl-hello-chk
balance roundrobin
server ${HOST1_ID} ${HOST1_ADDRESS}:${APISERVER_SRC_PORT} check
# [...]
修改
${APISERVER_DEST_PORT} the port through which Kubernetes will talk to the API Server.
${APISERVER_SRC_PORT} the port used by the API Server instances
${HOST1_ID} a symbolic name for the first load-balanced API Server host
${HOST1_ADDRESS} a resolvable address (DNS name, IP address) for the first load-balanced API Server host
additional server lines, one for each load-balanced API Server host
實際範例
# /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
log /dev/log local0
log /dev/log local1 notice
daemon
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 1
timeout http-request 10s
timeout queue 20s
timeout connect 5s
timeout client 20s
timeout server 20s
timeout http-keep-alive 10s
timeout check 10s
#---------------------------------------------------------------------
# apiserver frontend which proxys to the control plane nodes
#---------------------------------------------------------------------
frontend apiserver
bind *:6443
mode tcp
option tcplog
default_backend apiserver
#---------------------------------------------------------------------
# round robin balancing for apiserver
#---------------------------------------------------------------------
backend apiserver
option httpchk GET /healthz
http-check expect status 200
mode tcp
option ssl-hello-chk
balance roundrobin
server ${HOST1_ID} ${HOST1_ADDRESS}:${APISERVER_SRC_PORT} check
# [...]
Container runtimes CRI-O
# Container runtimes CRI-o
# Create the .conf file to load the modules at bootup
cat <<EOF | sudo tee /etc/modules-load.d/crio.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
# Set up required sysctl params, these persist across reboots.
cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
sudo sysctl --system
# Install CRI-O
curl https://raw.githubusercontent.com/cri-o/cri-o/main/scripts/get | bash
nano /etc/crio/crio.conf.d/02-cgroup-manager.conf
sudo systemctl daemon-reload
sudo systemctl enable crio --now
cat <<EOF>> /etc/crio/crio.conf.d/02-cgroup-manager.conf
[crio.runtime]
conmon_cgroup = "pod"
cgroup_manager = "cgroupfs"
EOF