02k8s-install-centos.sh 104 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864
  1. #!/usr/bin/env bash
  2. # 参考 https://raw.githubusercontent.com/lework/kainstall/v1.4.9/kainstall-centos.sh
  3. [[ -n $DEBUG ]] && set -x
  4. set -o errtrace # Make sure any error trap is inherited
  5. set -o nounset # Disallow expansion of unset variables
  6. set -o pipefail # Use last non-zero exit code in a pipeline
  7. # 版本
  8. KUBE_VERSION="${KUBE_VERSION:-latest}"
  9. FLANNEL_VERSION="${FLANNEL_VERSION:-0.17.0}"
  10. CALICO_VERSION="${CALICO_VERSION:-3.22.1}"
  11. CILIUM_VERSION="${CILIUM_VERSION:-1.9.13}"
  12. HELM_VERSION="${HELM_VERSION:-3.10.1}"
  13. INGRESS_NGINX="${INGRESS_NGINX:-4.2.5}"
  14. RANCHER_VERSION="${RANCHER_VERSION:-2.6.9}"
  15. #METRICS_SERVER_VERSION="${METRICS_SERVER_VERSION:-0.6.1}"
  16. #KUBE_PROMETHEUS_VERSION="${KUBE_PROMETHEUS_VERSION:-0.10.0}"
  17. #ELASTICSEARCH_VERSION="${ELASTICSEARCH_VERSION:-8.1.0}"
  18. #ROOK_VERSION="${ROOK_VERSION:-1.8.7}"
  19. #LONGHORN_VERSION="${LONGHORN_VERSION:-1.2.4}"
  20. # 集群配置
  21. KUBE_DNSDOMAIN="${KUBE_DNSDOMAIN:-cluster.local}"
  22. KUBE_APISERVER="${KUBE_APISERVER:-apiserver.$KUBE_DNSDOMAIN}"
  23. KUBE_POD_SUBNET="${KUBE_POD_SUBNET:-10.244.0.0/16}"
  24. KUBE_SERVICE_SUBNET="${KUBE_SERVICE_SUBNET:-10.96.0.0/16}"
  25. KUBE_IMAGE_REPO="${KUBE_IMAGE_REPO:-registry.cn-hangzhou.aliyuncs.com/kainstall}"
  26. KUBE_NETWORK="${KUBE_NETWORK:-flannel}"
  27. KUBE_INGRESS="${KUBE_INGRESS:-nginx}"
  28. KUBE_MONITOR="${KUBE_MONITOR:-prometheus}"
  29. KUBE_STORAGE="${KUBE_STORAGE:-rook}"
  30. KUBE_LOG="${KUBE_LOG:-elasticsearch}"
  31. KUBE_FLANNEL_TYPE="${KUBE_FLANNEL_TYPE:-vxlan}"
  32. KUBE_CRI="${KUBE_CRI:-docker}"
  33. KUBE_CRI_VERSION="${KUBE_CRI_VERSION:-latest}"
  34. KUBE_CRI_ENDPOINT="${KUBE_CRI_ENDPOINT:-/var/run/dockershim.sock}"
  35. # 定义的master和worker节点地址,以逗号分隔
  36. MASTER_NODES="${MASTER_NODES:-}"
  37. WORKER_NODES="${WORKER_NODES:-}"
  38. # 定义在哪个节点上进行设置
  39. MGMT_NODE="${MGMT_NODE:-127.0.0.1}"
  40. # 节点的连接信息
  41. SSH_USER="${SSH_USER:-root}"
  42. SSH_PASSWORD="${SSH_PASSWORD:-}"
  43. SSH_PRIVATE_KEY="${SSH_PRIVATE_KEY:-}"
  44. SSH_PORT="${SSH_PORT:-22}"
  45. SUDO_USER="${SUDO_USER:-root}"
  46. # 节点设置
  47. HOSTNAME_PREFIX="${HOSTNAME_PREFIX:-k8s}"
  48. # nginx的端口配置
  49. NGINX_HTTP_PORT="${NGINX_HTTP_PORT:-80}"
  50. # 脚本设置
  51. TMP_DIR="$(rm -rf /tmp/kainstall* && mktemp -d -t kainstall.XXXXXXXXXX)"
  52. LOG_FILE="${TMP_DIR}/kainstall.log"
  53. SSH_OPTIONS="-o ConnectTimeout=600 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
  54. ERROR_INFO="\n\033[31mERROR Summary: \033[0m\n "
  55. ACCESS_INFO="\n\033[32mACCESS Summary: \033[0m\n "
  56. COMMAND_OUTPUT=""
  57. SCRIPT_PARAMETER="$*"
  58. OFFLINE_DIR="/tmp/kainstall-offline-file/"
  59. OFFLINE_FILE=""
  60. OS_SUPPORT="centos7 centos8 rocky8.7"
  61. GITHUB_PROXY="${GITHUB_PROXY:-https://ghproxy.com/}"
  62. GCR_PROXY="${GCR_PROXY:-k8sgcr.lework.workers.dev}"
  63. SKIP_UPGRADE_PLAN=${SKIP_UPGRADE_PLAN:-false}
  64. SKIP_SET_OS_REPO=${SKIP_SET_OS_REPO:-false}
  65. trap trap::info 1 2 3 15 EXIT
  66. ######################################################################################################
  67. # 通用函数
  68. ######################################################################################################
  69. # 信号处理
  70. function trap::info() {
  71. [[ ${#ERROR_INFO} -gt 37 ]] && echo -e "$ERROR_INFO"
  72. [[ ${#ACCESS_INFO} -gt 38 ]] && echo -e "$ACCESS_INFO"
  73. [ -f "$LOG_FILE" ] && echo -e "\n\n See detailed log >>> $LOG_FILE \n\n"
  74. trap '' EXIT
  75. exit
  76. }
  77. # 错误日志
  78. function log::error() {
  79. local item; item="[$(date +'%Y-%m-%dT%H:%M:%S.%N%z')]: \033[31mERROR: \033[0m$*"
  80. ERROR_INFO="${ERROR_INFO}${item}\n "
  81. echo -e "${item}" | tee -a "$LOG_FILE"
  82. }
  83. # 基础日志
  84. function log::info() {
  85. printf "[%s]: \033[32mINFO: \033[0m%s\n" "$(date +'%Y-%m-%dT%H:%M:%S.%N%z')" "$*" | tee -a "$LOG_FILE"
  86. }
  87. # 警告日志
  88. function log::warning() {
  89. printf "[%s]: \033[33mWARNING: \033[0m%s\n" "$(date +'%Y-%m-%dT%H:%M:%S.%N%z')" "$*" | tee -a "$LOG_FILE"
  90. }
  91. # 访问信息
  92. function log::access() {
  93. ACCESS_INFO="${ACCESS_INFO}$*\n "
  94. printf "[%s]: \033[32mINFO: \033[0m%s\n" "$(date +'%Y-%m-%dT%H:%M:%S.%N%z')" "$*" | tee -a "$LOG_FILE"
  95. }
  96. # 执行日志
  97. function log::exec() {
  98. printf "[%s]: \033[34mEXEC: \033[0m%s\n" "$(date +'%Y-%m-%dT%H:%M:%S.%N%z')" "$*" >> "$LOG_FILE"
  99. }
  100. # 版本号转数字
  101. function utils::version_to_number() {
  102. echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }';
  103. }
  104. # 重试
  105. function utils::retry() {
  106. local retries=$1
  107. shift
  108. local count=0
  109. until eval "$*"; do
  110. exit=$?
  111. wait=$((2 ** count))
  112. count=$((count + 1))
  113. if [ "$count" -lt "$retries" ]; then
  114. echo "Retry $count/$retries exited $exit, retrying in $wait seconds..."
  115. sleep $wait
  116. else
  117. echo "Retry $count/$retries exited $exit, no more retries left."
  118. return $exit
  119. fi
  120. done
  121. return 0
  122. }
  123. # 转义引号
  124. function utils::quote() {
  125. # shellcheck disable=SC2046
  126. if [ $(echo "$*" | tr -d "\n" | wc -c) -eq 0 ]; then
  127. echo "''"
  128. elif [ $(echo "$*" | tr -d "[a-z][A-Z][0-9]:,.=~_/\n-" | wc -c) -gt 0 ]; then
  129. printf "%s" "$*" | sed -e "1h;2,\$H;\$!d;g" -e "s/'/\'\"\'\"\'/g" | sed -e "1h;2,\$H;\$!d;g" -e "s/^/'/g" -e "s/$/'/g"
  130. else
  131. echo "$*"
  132. fi
  133. }
  134. # 下载文件
  135. function utils::download_file() {
  136. local url="$1"
  137. local dest="$2"
  138. local unzip_tag="${3:-1}"
  139. local dest_dirname; dest_dirname=$(dirname "$dest")
  140. local filename; filename=$(basename "$dest")
  141. log::info "[download]" "${filename}"
  142. command::exec "${MGMT_NODE}" "
  143. set -e
  144. if [ ! -f \"${dest}\" ]; then
  145. [ ! -d \"${dest_dirname}\" ] && mkdir -pv \"${dest_dirname}\"
  146. wget --timeout=10 --waitretry=3 --tries=5 --retry-connrefused --no-check-certificate \"${url}\" -O \"${dest}\"
  147. if [[ \"${unzip_tag}\" == \"unzip\" ]]; then
  148. command -v unzip 2>/dev/null || yum install -y unzip
  149. unzip -o \"${dest}\" -d \"${dest_dirname}\"
  150. fi
  151. else
  152. echo \"${dest} is exists!\"
  153. fi
  154. "
  155. local status="$?"
  156. check::exit_code "$status" "download" "${filename}" "exit"
  157. return "$status"
  158. }
  159. # 判断是否在数组中存在元素
  160. function utils::is_element_in_array() {
  161. local -r element="${1}"
  162. local -r array=("${@:2}")
  163. local walker=''
  164. for walker in "${array[@]}"
  165. do
  166. [[ "${walker}" = "${element}" ]] && return 0
  167. done
  168. return 1
  169. }
  170. # 执行命令
  171. function command::exec() {
  172. local host=${1:-}
  173. shift
  174. local command="$*"
  175. if [[ "${SUDO_TAG:-}" == "1" ]]; then
  176. sudo_options="sudo -H -n -u ${SUDO_USER}"
  177. if [[ "${SUDO_PASSWORD:-}" != "" ]]; then
  178. sudo_options="${sudo_options// -n/} -p \"\" -S <<< \"${SUDO_PASSWORD}\""
  179. fi
  180. command="$sudo_options bash -c $(utils::quote "$command")"
  181. fi
  182. command="$(utils::quote "$command")"
  183. if [[ "${host}" == "127.0.0.1" || "${host}" == "10.0.93.197" ]]; then
  184. # 本地执行
  185. log::exec "[command]" "bash -c $(printf "%s" "${command//${SUDO_PASSWORD:-}/zzzzzz}")"
  186. # shellcheck disable=SC2094
  187. COMMAND_OUTPUT=$(eval bash -c "${command}" 2>> "$LOG_FILE" | tee -a "$LOG_FILE")
  188. local status=$?
  189. else
  190. # 远程执行
  191. local ssh_cmd="ssh"
  192. if [[ "${SSH_PASSWORD}" != "" ]]; then
  193. ssh_cmd="sshpass -p \"${SSH_PASSWORD}\" ${ssh_cmd}"
  194. elif [[ "$SSH_PRIVATE_KEY" != "" ]]; then
  195. [ -f "${SSH_PRIVATE_KEY}" ] || { log::error "[exec]" "ssh private_key:${SSH_PRIVATE_KEY} not found."; exit 1; }
  196. ssh_cmd="${ssh_cmd} -i $SSH_PRIVATE_KEY"
  197. fi
  198. log::exec "[command]" "${ssh_cmd//${SSH_PASSWORD:-}/zzzzzz} ${SSH_OPTIONS} ${SSH_USER}@${host} -p ${SSH_PORT} bash -c $(printf "%s" "${command//${SUDO_PASSWORD:-}/zzzzzz}")"
  199. # shellcheck disable=SC2094
  200. COMMAND_OUTPUT=$(eval "${ssh_cmd} ${SSH_OPTIONS} ${SSH_USER}@${host} -p ${SSH_PORT}" bash -c '"${command}"' 2>> "$LOG_FILE" | tee -a "$LOG_FILE")
  201. local status=$?
  202. fi
  203. return $status
  204. }
  205. # 拷贝文件
  206. function command::scp() {
  207. local host=${1:-}
  208. local src=${2:-}
  209. local dest=${3:-/tmp/}
  210. if [[ "${host}" == "127.0.0.1" ]]; then
  211. local command="cp -rf ${src} ${dest}"
  212. log::exec "[command]" "bash -c \"${command}\""
  213. # shellcheck disable=SC2094
  214. COMMAND_OUTPUT=$(bash -c "${command}" 2>> "$LOG_FILE" | tee -a "$LOG_FILE")
  215. local status=$?
  216. else
  217. local scp_cmd="scp"
  218. if [[ "${SSH_PASSWORD}" != "" ]]; then
  219. scp_cmd="sshpass -p \"${SSH_PASSWORD}\" ${scp_cmd}"
  220. elif [[ "$SSH_PRIVATE_KEY" != "" ]]; then
  221. [ -f "${SSH_PRIVATE_KEY}" ] || { log::error "[exec]" "ssh private_key:${SSH_PRIVATE_KEY} not found."; exit 1; }
  222. scp_cmd="${scp_cmd} -i $SSH_PRIVATE_KEY"
  223. fi
  224. log::exec "[command]" "${scp_cmd} ${SSH_OPTIONS} -P ${SSH_PORT} -r ${src} ${SSH_USER}@${host}:${dest}" >> "$LOG_FILE"
  225. # shellcheck disable=SC2094
  226. COMMAND_OUTPUT=$(eval "${scp_cmd} ${SSH_OPTIONS} -P ${SSH_PORT} -r ${src} ${SSH_USER}@${host}:${dest}" 2>> "$LOG_FILE" | tee -a "$LOG_FILE")
  227. local status=$?
  228. fi
  229. return $status
  230. }
  231. # 检查命令是否存在
  232. function check::command_exists() {
  233. local cmd=${1}
  234. local package=${2}
  235. if command -V "$cmd" > /dev/null 2>&1; then
  236. log::info "[check]" "$cmd command exists."
  237. else
  238. log::warning "[check]" "I require $cmd but it's not installed."
  239. log::warning "[check]" "install $package package."
  240. command::exec "127.0.0.1" "yum install -y ${package}"
  241. check::exit_code "$?" "check" "$package install" "exit"
  242. fi
  243. }
  244. ######################################################################################################
  245. # 安装函数
  246. ######################################################################################################
  247. # 节点初始化脚本
  248. function script::init_node() {
  249. # clean
  250. sed -i -e "/$KUBE_APISERVER/d" -e '/-worker-/d' -e '/-master-/d' /etc/hosts
  251. sed -i '/## Kainstall managed start/,/## Kainstall managed end/d' /etc/security/limits.conf /etc/systemd/system.conf /etc/bashrc /etc/rc.local /etc/audit/rules.d/audit.rules
  252. # Disable selinux
  253. sed -i '/SELINUX/s/enforcing/disabled/' /etc/selinux/config
  254. setenforce 0
  255. # Disable swap
  256. swapoff -a && sysctl -w vm.swappiness=0
  257. sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab
  258. # Disable firewalld
  259. for target in firewalld python-firewall firewalld-filesystem iptables; do
  260. systemctl stop $target &>/dev/null || true
  261. systemctl disable $target &>/dev/null || true
  262. done
  263. # repo
  264. [[ -f /etc/yum.repos.d/CentOS-Base.repo && "${SKIP_SET_OS_REPO,,}" == "false" ]] && sed -e 's!^#baseurl=!baseurl=!g' \
  265. -e 's!^mirrorlist=!#mirrorlist=!g' \
  266. -e 's!mirror.centos.org!mirrors.aliyun.com!g' \
  267. -i /etc/yum.repos.d/CentOS-Base.repo
  268. [[ "${OFFLINE_TAG:-}" != "1" && "${SKIP_SET_OS_REPO,,}" == "false" ]] && yum install -y epel-release
  269. [[ -f /etc/yum.repos.d/epel.repo && "${SKIP_SET_OS_REPO,,}" == "false" ]] && sed -e 's!^mirrorlist=!#mirrorlist=!g' \
  270. -e 's!^metalink=!#metalink=!g' \
  271. -e 's!^#baseurl=!baseurl=!g' \
  272. -e 's!//download.*/pub!//mirrors.aliyun.com!g' \
  273. -e 's!http://mirrors\.aliyun!https://mirrors.aliyun!g' \
  274. -i /etc/yum.repos.d/epel.repo
  275. # Change limits
  276. [ ! -f /etc/security/limits.conf_bak ] && cp /etc/security/limits.conf{,_bak}
  277. cat << EOF >> /etc/security/limits.conf
  278. ## Kainstall managed start
  279. root soft nofile 655360
  280. root hard nofile 655360
  281. root soft nproc 655360
  282. root hard nproc 655360
  283. root soft core unlimited
  284. root hard core unlimited
  285. * soft nofile 655360
  286. * hard nofile 655360
  287. * soft nproc 655360
  288. * hard nproc 655360
  289. * soft core unlimited
  290. * hard core unlimited
  291. ## Kainstall managed end
  292. EOF
  293. # /etc/systemd/system.conf
  294. [ -f /etc/security/limits.d/20-nproc.conf ] && sed -i 's#4096#655360#g' /etc/security/limits.d/20-nproc.conf
  295. cat << EOF >> /etc/systemd/system.conf
  296. ## Kainstall managed start
  297. DefaultLimitCORE=infinity
  298. DefaultLimitNOFILE=655360
  299. DefaultLimitNPROC=655360
  300. DefaultTasksMax=75%
  301. ## Kainstall managed end
  302. EOF
  303. # Change sysctl
  304. cat << EOF > /etc/sysctl.d/99-kube.conf
  305. # https://www.kernel.org/doc/Documentation/sysctl/
  306. #############################################################################################
  307. # 调整虚拟内存
  308. #############################################################################################
  309. # Default: 30
  310. # 0 - 任何情况下都不使用swap。
  311. # 1 - 除非内存不足(OOM),否则不使用swap。
  312. vm.swappiness = 0
  313. # 内存分配策略
  314. #0 - 表示内核将检查是否有足够的可用内存供应用进程使用;如果有足够的可用内存,内存申请允许;否则,内存申请失败,并把错误返回给应用进程。
  315. #1 - 表示内核允许分配所有的物理内存,而不管当前的内存状态如何。
  316. #2 - 表示内核允许分配超过所有物理内存和交换空间总和的内存
  317. vm.overcommit_memory=1
  318. # OOM时处理
  319. # 1关闭,等于0时,表示当内存耗尽时,内核会触发OOM killer杀掉最耗内存的进程。
  320. vm.panic_on_oom=0
  321. # vm.dirty_background_ratio 用于调整内核如何处理必须刷新到磁盘的脏页。
  322. # Default value is 10.
  323. # 该值是系统内存总量的百分比,在许多情况下将此值设置为5是合适的。
  324. # 此设置不应设置为零。
  325. vm.dirty_background_ratio = 5
  326. # 内核强制同步操作将其刷新到磁盘之前允许的脏页总数
  327. # 也可以通过更改 vm.dirty_ratio 的值(将其增加到默认值30以上(也占系统内存的百分比))来增加
  328. # 推荐 vm.dirty_ratio 的值在60到80之间。
  329. vm.dirty_ratio = 60
  330. # vm.max_map_count 计算当前的内存映射文件数。
  331. # mmap 限制(vm.max_map_count)的最小值是打开文件的ulimit数量(cat /proc/sys/fs/file-max)。
  332. # 每128KB系统内存 map_count应该大约为1。 因此,在32GB系统上,max_map_count为262144。
  333. # Default: 65530
  334. vm.max_map_count = 2097152
  335. #############################################################################################
  336. # 调整文件
  337. #############################################################################################
  338. fs.may_detach_mounts = 1
  339. # 增加文件句柄和inode缓存的大小,并限制核心转储。
  340. fs.file-max = 2097152
  341. fs.nr_open = 2097152
  342. fs.suid_dumpable = 0
  343. # 文件监控
  344. fs.inotify.max_user_instances=8192
  345. fs.inotify.max_user_watches=524288
  346. fs.inotify.max_queued_events=16384
  347. #############################################################################################
  348. # 调整网络设置
  349. #############################################################################################
  350. # 为每个套接字的发送和接收缓冲区分配的默认内存量。
  351. net.core.wmem_default = 25165824
  352. net.core.rmem_default = 25165824
  353. # 为每个套接字的发送和接收缓冲区分配的最大内存量。
  354. net.core.wmem_max = 25165824
  355. net.core.rmem_max = 25165824
  356. # 除了套接字设置外,发送和接收缓冲区的大小
  357. # 必须使用net.ipv4.tcp_wmem和net.ipv4.tcp_rmem参数分别设置TCP套接字。
  358. # 使用三个以空格分隔的整数设置这些整数,分别指定最小,默认和最大大小。
  359. # 最大大小不能大于使用net.core.wmem_max和net.core.rmem_max为所有套接字指定的值。
  360. # 合理的设置是最小4KiB,默认64KiB和最大2MiB缓冲区。
  361. net.ipv4.tcp_wmem = 20480 12582912 25165824
  362. net.ipv4.tcp_rmem = 20480 12582912 25165824
  363. # 增加最大可分配的总缓冲区空间
  364. # 以页为单位(4096字节)进行度量
  365. net.ipv4.tcp_mem = 65536 25165824 262144
  366. net.ipv4.udp_mem = 65536 25165824 262144
  367. # 为每个套接字的发送和接收缓冲区分配的最小内存量。
  368. net.ipv4.udp_wmem_min = 16384
  369. net.ipv4.udp_rmem_min = 16384
  370. # 启用TCP窗口缩放,客户端可以更有效地传输数据,并允许在代理方缓冲该数据。
  371. net.ipv4.tcp_window_scaling = 1
  372. # 提高同时接受连接数。
  373. net.ipv4.tcp_max_syn_backlog = 10240
  374. # 将net.core.netdev_max_backlog的值增加到大于默认值1000
  375. # 可以帮助突发网络流量,特别是在使用数千兆位网络连接速度时,
  376. # 通过允许更多的数据包排队等待内核处理它们。
  377. net.core.netdev_max_backlog = 65536
  378. # 增加选项内存缓冲区的最大数量
  379. net.core.optmem_max = 25165824
  380. # 被动TCP连接的SYNACK次数。
  381. net.ipv4.tcp_synack_retries = 2
  382. # 允许的本地端口范围。
  383. net.ipv4.ip_local_port_range = 2048 65535
  384. # 防止TCP时间等待
  385. # Default: net.ipv4.tcp_rfc1337 = 0
  386. net.ipv4.tcp_rfc1337 = 1
  387. # 减少tcp_fin_timeout连接的时间默认值
  388. net.ipv4.tcp_fin_timeout = 15
  389. # 积压套接字的最大数量。
  390. # Default is 128.
  391. net.core.somaxconn = 32768
  392. # 打开syncookies以进行SYN洪水攻击保护。
  393. net.ipv4.tcp_syncookies = 1
  394. # 避免Smurf攻击
  395. # 发送伪装的ICMP数据包,目的地址设为某个网络的广播地址,源地址设为要攻击的目的主机,
  396. # 使所有收到此ICMP数据包的主机都将对目的主机发出一个回应,使被攻击主机在某一段时间内收到成千上万的数据包
  397. net.ipv4.icmp_echo_ignore_broadcasts = 1
  398. # 为icmp错误消息打开保护
  399. net.ipv4.icmp_ignore_bogus_error_responses = 1
  400. # 启用自动缩放窗口。
  401. # 如果延迟证明合理,这将允许TCP缓冲区超过其通常的最大值64K。
  402. net.ipv4.tcp_window_scaling = 1
  403. # 打开并记录欺骗,源路由和重定向数据包
  404. net.ipv4.conf.all.log_martians = 1
  405. net.ipv4.conf.default.log_martians = 1
  406. # 告诉内核有多少个未附加的TCP套接字维护用户文件句柄。 万一超过这个数字,
  407. # 孤立的连接会立即重置,并显示警告。
  408. # Default: net.ipv4.tcp_max_orphans = 65536
  409. net.ipv4.tcp_max_orphans = 65536
  410. # 不要在关闭连接时缓存指标
  411. net.ipv4.tcp_no_metrics_save = 1
  412. # 启用RFC1323中定义的时间戳记:
  413. # Default: net.ipv4.tcp_timestamps = 1
  414. net.ipv4.tcp_timestamps = 1
  415. # 启用选择确认。
  416. # Default: net.ipv4.tcp_sack = 1
  417. net.ipv4.tcp_sack = 1
  418. # 增加 tcp-time-wait 存储桶池大小,以防止简单的DOS攻击。
  419. # net.ipv4.tcp_tw_recycle 已从Linux 4.12中删除。请改用net.ipv4.tcp_tw_reuse。
  420. net.ipv4.tcp_max_tw_buckets = 14400
  421. net.ipv4.tcp_tw_reuse = 1
  422. # accept_source_route 选项使网络接口接受设置了严格源路由(SSR)或松散源路由(LSR)选项的数据包。
  423. # 以下设置将丢弃设置了SSR或LSR选项的数据包。
  424. net.ipv4.conf.all.accept_source_route = 0
  425. net.ipv4.conf.default.accept_source_route = 0
  426. # 打开反向路径过滤
  427. net.ipv4.conf.all.rp_filter = 1
  428. net.ipv4.conf.default.rp_filter = 1
  429. # 禁用ICMP重定向接受
  430. net.ipv4.conf.all.accept_redirects = 0
  431. net.ipv4.conf.default.accept_redirects = 0
  432. net.ipv4.conf.all.secure_redirects = 0
  433. net.ipv4.conf.default.secure_redirects = 0
  434. # 禁止发送所有IPv4 ICMP重定向数据包。
  435. net.ipv4.conf.all.send_redirects = 0
  436. net.ipv4.conf.default.send_redirects = 0
  437. # 开启IP转发.
  438. net.ipv4.ip_forward = 1
  439. # 禁止IPv6
  440. net.ipv6.conf.lo.disable_ipv6=1
  441. net.ipv6.conf.all.disable_ipv6 = 1
  442. net.ipv6.conf.default.disable_ipv6 = 1
  443. # 要求iptables不对bridge的数据进行处理
  444. net.bridge.bridge-nf-call-ip6tables = 1
  445. net.bridge.bridge-nf-call-iptables = 1
  446. net.bridge.bridge-nf-call-arptables = 1
  447. # arp缓存
  448. # 存在于 ARP 高速缓存中的最少层数,如果少于这个数,垃圾收集器将不会运行。缺省值是 128
  449. net.ipv4.neigh.default.gc_thresh1=2048
  450. # 保存在 ARP 高速缓存中的最多的记录软限制。垃圾收集器在开始收集前,允许记录数超过这个数字 5 秒。缺省值是 512
  451. net.ipv4.neigh.default.gc_thresh2=4096
  452. # 保存在 ARP 高速缓存中的最多记录的硬限制,一旦高速缓存中的数目高于此,垃圾收集器将马上运行。缺省值是 1024
  453. net.ipv4.neigh.default.gc_thresh3=8192
  454. # 持久连接
  455. net.ipv4.tcp_keepalive_time = 600
  456. net.ipv4.tcp_keepalive_intvl = 30
  457. net.ipv4.tcp_keepalive_probes = 10
  458. # conntrack表
  459. net.nf_conntrack_max=1048576
  460. net.netfilter.nf_conntrack_max=1048576
  461. net.netfilter.nf_conntrack_buckets=262144
  462. net.netfilter.nf_conntrack_tcp_timeout_fin_wait=30
  463. net.netfilter.nf_conntrack_tcp_timeout_time_wait=30
  464. net.netfilter.nf_conntrack_tcp_timeout_close_wait=15
  465. net.netfilter.nf_conntrack_tcp_timeout_established=300
  466. #############################################################################################
  467. # 调整内核参数
  468. #############################################################################################
  469. # 地址空间布局随机化(ASLR)是一种用于操作系统的内存保护过程,可防止缓冲区溢出攻击。
  470. # 这有助于确保与系统上正在运行的进程相关联的内存地址不可预测,
  471. # 因此,与这些流程相关的缺陷或漏洞将更加难以利用。
  472. # Accepted values: 0 = 关闭, 1 = 保守随机化, 2 = 完全随机化
  473. kernel.randomize_va_space = 2
  474. # 调高 PID 数量
  475. kernel.pid_max = 65536
  476. kernel.threads-max=30938
  477. # coredump
  478. kernel.core_pattern=core
  479. # 决定了检测到soft lockup时是否自动panic,缺省值是0
  480. kernel.softlockup_all_cpu_backtrace=1
  481. kernel.softlockup_panic=1
  482. EOF
  483. # history
  484. cat << EOF >> /etc/bashrc
  485. ## Kainstall managed start
  486. # history actions record,include action time, user, login ip
  487. HISTFILESIZE=5000
  488. HISTSIZE=5000
  489. USER_IP=\$(who -u am i 2>/dev/null | awk '{print \$NF}' | sed -e 's/[()]//g')
  490. if [ -z \$USER_IP ]
  491. then
  492. USER_IP=\$(hostname -i)
  493. fi
  494. HISTTIMEFORMAT="%Y-%m-%d %H:%M:%S \$USER_IP:\$(whoami) "
  495. export HISTFILESIZE HISTSIZE HISTTIMEFORMAT
  496. # PS1
  497. PS1='\[\033[0m\]\[\033[1;36m\][\u\[\033[0m\]@\[\033[1;32m\]\h\[\033[0m\] \[\033[1;31m\]\w\[\033[0m\]\[\033[1;36m\]]\[\033[33;1m\]\\$ \[\033[0m\]'
  498. ## Kainstall managed end
  499. EOF
  500. # journal
  501. mkdir -p /var/log/journal /etc/systemd/journald.conf.d
  502. cat << EOF > /etc/systemd/journald.conf.d/99-prophet.conf
  503. [Journal]
  504. # 持久化保存到磁盘
  505. Storage=persistent
  506. # 压缩历史日志
  507. Compress=yes
  508. SyncIntervalSec=5m
  509. RateLimitInterval=30s
  510. RateLimitBurst=1000
  511. # 最大占用空间 2G
  512. SystemMaxUse=2G
  513. # 单日志文件最大 100M
  514. SystemMaxFileSize=100M
  515. # 日志保存时间 3 周
  516. MaxRetentionSec=3week
  517. # 不将日志转发到 syslog
  518. ForwardToSyslog=no
  519. EOF
  520. # motd
  521. cat << EOF > /etc/profile.d/zz-ssh-login-info.sh
  522. #!/bin/sh
  523. #
  524. # @Time : 2020-02-04
  525. # @Author : lework
  526. # @Desc : ssh login banner
  527. export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:\$PATH
  528. #shopt -q login_shell && : || return 0
  529. # os
  530. upSeconds="\$(cut -d. -f1 /proc/uptime)"
  531. secs=\$((\${upSeconds}%60))
  532. mins=\$((\${upSeconds}/60%60))
  533. hours=\$((\${upSeconds}/3600%24))
  534. days=\$((\${upSeconds}/86400))
  535. UPTIME_INFO=\$(printf "%d days, %02dh %02dm %02ds" "\$days" "\$hours" "\$mins" "\$secs")
  536. if [ -f /etc/redhat-release ] ; then
  537. PRETTY_NAME=\$(< /etc/redhat-release)
  538. elif [ -f /etc/debian_version ]; then
  539. DIST_VER=\$(</etc/debian_version)
  540. PRETTY_NAME="\$(grep PRETTY_NAME /etc/os-release | sed -e 's/PRETTY_NAME=//g' -e 's/"//g') (\$DIST_VER)"
  541. else
  542. PRETTY_NAME=\$(cat /etc/*-release | grep "PRETTY_NAME" | sed -e 's/PRETTY_NAME=//g' -e 's/"//g')
  543. fi
  544. if [[ -d "/system/app/" && -d "/system/priv-app" ]]; then
  545. model="\$(getprop ro.product.brand) \$(getprop ro.product.model)"
  546. elif [[ -f /sys/devices/virtual/dmi/id/product_name ||
  547. -f /sys/devices/virtual/dmi/id/product_version ]]; then
  548. model="\$(< /sys/devices/virtual/dmi/id/product_name)"
  549. model+=" \$(< /sys/devices/virtual/dmi/id/product_version)"
  550. elif [[ -f /sys/firmware/devicetree/base/model ]]; then
  551. model="\$(< /sys/firmware/devicetree/base/model)"
  552. elif [[ -f /tmp/sysinfo/model ]]; then
  553. model="\$(< /tmp/sysinfo/model)"
  554. fi
  555. MODEL_INFO=\${model}
  556. KERNEL=\$(uname -srmo)
  557. USER_NUM=\$(who -u | wc -l)
  558. RUNNING=\$(ps ax | wc -l | tr -d " ")
  559. # disk
  560. totaldisk=\$(df -h -x devtmpfs -x tmpfs -x debugfs -x aufs -x overlay --total 2>/dev/null | tail -1)
  561. disktotal=\$(awk '{print \$2}' <<< "\${totaldisk}")
  562. diskused=\$(awk '{print \$3}' <<< "\${totaldisk}")
  563. diskusedper=\$(awk '{print \$5}' <<< "\${totaldisk}")
  564. DISK_INFO="\033[0;33m\${diskused}\033[0m of \033[1;34m\${disktotal}\033[0m disk space used (\033[0;33m\${diskusedper}\033[0m)"
  565. # cpu
  566. cpu=\$(awk -F':' '/^model name/ {print \$2}' /proc/cpuinfo | uniq | sed -e 's/^[ \t]*//')
  567. cpun=\$(grep -c '^processor' /proc/cpuinfo)
  568. cpuc=\$(grep '^cpu cores' /proc/cpuinfo | tail -1 | awk '{print \$4}')
  569. cpup=\$(grep '^physical id' /proc/cpuinfo | wc -l)
  570. CPU_INFO="\${cpu} \${cpup}P \${cpuc}C \${cpun}L"
  571. # get the load averages
  572. read one five fifteen rest < /proc/loadavg
  573. LOADAVG_INFO="\033[0;33m\${one}\033[0m / \${five} / \${fifteen} with \033[1;34m\$(( cpun*cpuc ))\033[0m core(s) at \033[1;34m\$(grep '^cpu MHz' /proc/cpuinfo | tail -1 | awk '{print \$4}')\033 MHz"
  574. # mem
  575. MEM_INFO="\$(cat /proc/meminfo | awk '/MemTotal:/{total=\$2/1024/1024;next} /MemAvailable:/{use=total-\$2/1024/1024; printf("\033[0;33m%.2fGiB\033[0m of \033[1;34m%.2fGiB\033[0m RAM used (\033[0;33m%.2f%%\033[0m)",use,total,(use/total)*100);}')"
  576. # network
  577. # extranet_ip=" and \$(curl -s ip.cip.cc)"
  578. IP_INFO="\$(ip a|grep -E '^[0-9]+: em*|^[0-9]+: eno*|^[0-9]+: enp*|^[0-9]+: ens*|^[0-9]+: eth*|^[0-9]+: wlp*' -A2|grep inet|awk -F ' ' '{print $2}'|cut -f1 -d/|xargs echo)"
  579. # Container info
  580. CONTAINER_INFO="\$(sudo /usr/bin/crictl ps -a -o yaml 2> /dev/null | awk '/^ state: /{gsub("CONTAINER_", "", \$NF) ++S[\$NF]}END{for(m in S) printf "%s%s:%s ",substr(m,1,1),tolower(substr(m,2)),S[m]}')Images:\$(sudo /usr/bin/crictl images -q 2> /dev/null | wc -l)"
  581. # info
  582. echo -e "
  583. Information as of: \033[1;34m\$(date +"%Y-%m-%d %T")\033[0m
  584. \033[0;1;31mProduct\033[0m............: \${MODEL_INFO}
  585. \033[0;1;31mOS\033[0m.................: \${PRETTY_NAME}
  586. \033[0;1;31mKernel\033[0m.............: \${KERNEL}
  587. \033[0;1;31mCPU\033[0m................: \${CPU_INFO}
  588. \033[0;1;31mHostname\033[0m...........: \033[1;34m\$(hostname)\033[0m
  589. \033[0;1;31mIP Addresses\033[0m.......: \033[1;34m\${IP_INFO}\033[0m
  590. \033[0;1;31mUptime\033[0m.............: \033[0;33m\${UPTIME_INFO}\033[0m
  591. \033[0;1;31mMemory\033[0m.............: \${MEM_INFO}
  592. \033[0;1;31mLoad Averages\033[0m......: \${LOADAVG_INFO}
  593. \033[0;1;31mDisk Usage\033[0m.........: \${DISK_INFO}
  594. \033[0;1;31mUsers online\033[0m.......: \033[1;34m\${USER_NUM}\033[0m
  595. \033[0;1;31mRunning Processes\033[0m..: \033[1;34m\${RUNNING}\033[0m
  596. \033[0;1;31mContainer Info\033[0m.....: \${CONTAINER_INFO}
  597. "
  598. EOF
  599. chmod +x /etc/profile.d/zz-ssh-login-info.sh
  600. echo 'ALL ALL=(ALL) NOPASSWD:/usr/bin/crictl' > /etc/sudoers.d/crictl
  601. # time sync
  602. ntpd --help >/dev/null 2>&1 && yum remove -y ntp
  603. [[ "${OFFLINE_TAG:-}" != "1" ]] && yum install -y chrony
  604. [ ! -f /etc/chrony.conf_bak ] && cp /etc/chrony.conf{,_bak} #备份默认配置
  605. cat << EOF > /etc/chrony.conf
  606. server ntp.aliyun.com iburst
  607. server cn.ntp.org.cn iburst
  608. server ntp.shu.edu.cn iburst
  609. server 0.cn.pool.ntp.org iburst
  610. server 1.cn.pool.ntp.org iburst
  611. server 2.cn.pool.ntp.org iburst
  612. server 3.cn.pool.ntp.org iburst
  613. driftfile /var/lib/chrony/drift
  614. makestep 1.0 3
  615. logdir /var/log/chrony
  616. EOF
  617. timedatectl set-timezone Asia/Shanghai
  618. chronyd -q -t 1 'server cn.pool.ntp.org iburst maxsamples 1'
  619. systemctl enable chronyd
  620. systemctl start chronyd
  621. chronyc sources -v
  622. chronyc sourcestats
  623. hwclock --systohc
  624. # package
  625. [[ "${OFFLINE_TAG:-}" != "1" ]] && yum install -y curl wget
  626. # ipvs
  627. [[ "${OFFLINE_TAG:-}" != "1" ]] && yum install -y ipvsadm ipset sysstat conntrack libseccomp
  628. module=(
  629. ip_vs
  630. ip_vs_rr
  631. ip_vs_wrr
  632. ip_vs_sh
  633. overlay
  634. nf_conntrack
  635. br_netfilter
  636. )
  637. [ -f /etc/modules-load.d/ipvs.conf ] && cp -f /etc/modules-load.d/ipvs.conf{,_bak}
  638. for kernel_module in "${module[@]}";do
  639. /sbin/modinfo -F filename "$kernel_module" |& grep -qv ERROR && echo "$kernel_module" >> /etc/modules-load.d/ipvs.conf
  640. done
  641. systemctl restart systemd-modules-load
  642. systemctl enable systemd-modules-load
  643. sysctl --system
  644. # audit
  645. [[ "${OFFLINE_TAG:-}" != "1" ]] && yum install -y audit audit-libs
  646. # /etc/audit/rules.d/audit.rules
  647. cat << EOF >> /etc/audit/rules.d/audit.rules
  648. ## Kainstall managed start
  649. # Ignore errors
  650. -i
  651. # SYSCALL
  652. -a always,exit -F arch=b64 -S kill,tkill,tgkill -F a1=9 -F key=trace_kill_9
  653. -a always,exit -F arch=b64 -S kill,tkill,tgkill -F a1=15 -F key=trace_kill_15
  654. # docker
  655. -w /usr/bin/dockerd -k docker
  656. -w /var/lib/docker -k docker
  657. -w /etc/docker -k docker
  658. -w /usr/lib/systemd/system/docker.service -k docker
  659. -w /etc/systemd/system/docker.service -k docker
  660. -w /usr/lib/systemd/system/docker.socket -k docker
  661. -w /etc/default/docker -k docker
  662. -w /etc/sysconfig/docker -k docker
  663. -w /etc/docker/daemon.json -k docker
  664. # containerd
  665. -w /usr/bin/containerd -k containerd
  666. -w /var/lib/containerd -k containerd
  667. -w /usr/lib/systemd/system/containerd.service -k containerd
  668. -w /etc/containerd/config.toml -k containerd
  669. # cri-o
  670. -w /usr/bin/crio -k cri-o
  671. -w /etc/crio -k cri-o
  672. # runc
  673. -w /usr/bin/runc -k runc
  674. # kube
  675. -w /usr/bin/kubeadm -k kubeadm
  676. -w /usr/bin/kubelet -k kubelet
  677. -w /usr/bin/kubectl -k kubectl
  678. -w /var/lib/kubelet -k kubelet
  679. -w /etc/kubernetes -k kubernetes
  680. ## Kainstall managed end
  681. EOF
  682. chmod 600 /etc/audit/rules.d/audit.rules
  683. sed -i 's#max_log_file =.*#max_log_file = 80#g' /etc/audit/auditd.conf
  684. if [ -f /usr/libexec/initscripts/legacy-actions/auditd/restart ]; then
  685. /usr/libexec/initscripts/legacy-actions/auditd/restart
  686. else
  687. systemctl stop auditd && systemctl start auditd
  688. fi
  689. systemctl enable auditd
  690. grep single-request-reopen /etc/resolv.conf || sed -i '1ioptions timeout:2 attempts:3 rotate single-request-reopen' /etc/resolv.conf
  691. ipvsadm --clear
  692. iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
  693. }
  694. # 升级内核
  695. function script::upgrade_kernel() {
  696. local ver; ver=$(rpm --eval "%{centos_ver}")
  697. [[ "${OFFLINE_TAG:-}" != "1" ]] && yum install -y "https://www.elrepo.org/elrepo-release-${ver}.el${ver}.elrepo.noarch.rpm"
  698. sed -e "s/^mirrorlist=/#mirrorlist=/g" \
  699. -e "s/elrepo.org\/linux/mirrors.tuna.tsinghua.edu.cn\/elrepo/g" \
  700. -i /etc/yum.repos.d/elrepo.repo
  701. [[ "${OFFLINE_TAG:-}" != "1" ]] && yum install -y --disablerepo="*" --enablerepo=elrepo-kernel kernel-lt{,-devel}
  702. grub2-set-default 0 && grub2-mkconfig -o /etc/grub2.cfg
  703. grubby --default-kernel
  704. grubby --args="user_namespace.enable=1" --update-kernel="$(grubby --default-kernel)"
  705. }
  706. # 节点软件升级
  707. function script::upgrage_kube() {
  708. local role=${1:-init}
  709. local version="-${2:-latest}"
  710. version="${version#-latest}"
  711. set -e
  712. echo '[install] kubeadm'
  713. kubeadm version
  714. yum install -y "kubeadm${version}" --disableexcludes=kubernetes
  715. kubeadm version
  716. echo '[upgrade]'
  717. if [[ "$role" == "init" ]]; then
  718. local plan_info; plan_info=$(kubeadm upgrade plan)
  719. local v; v=$(printf "%s" "$plan_info" | grep 'kubeadm upgrade apply ' | awk '{print $4}'| tail -1 )
  720. printf "%s\n" "${plan_info}"
  721. kubeadm upgrade apply "${v}" -y
  722. else
  723. kubeadm upgrade node
  724. fi
  725. echo '[install] kubelet kubectl'
  726. kubectl version --client=true
  727. yum install -y "kubelet${version}" "kubectl${version}" --disableexcludes=kubernetes
  728. kubectl version --client=true
  729. [ -f /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf ] && \
  730. sed -i 's#^\[Service\]#[Service]\nCPUAccounting=true\nMemoryAccounting=true#g' /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf
  731. systemctl daemon-reload
  732. systemctl restart kubelet
  733. }
  734. # 安装 docker
  735. function script::install_docker() {
  736. #local version="-${1:-latest}"
  737. #version="${version#-latest}"
  738. cat << EOF > /etc/yum.repos.d/docker-ce.repo
  739. [docker-ce-stable]
  740. name=Docker CE Stable - \$basearch
  741. baseurl=https://mirrors.aliyun.com/docker-ce/linux/centos/$(rpm --eval '%{centos_ver}')/\$basearch/stable
  742. enabled=1
  743. gpgcheck=1
  744. gpgkey=https://mirrors.aliyun.com/docker-ce/linux/centos/gpg
  745. EOF
  746. if [[ "${OFFLINE_TAG:-}" != "1" ]];then
  747. [ -f "$(which docker)" ] && yum remove -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin docker-ce-rootless-extras
  748. yum install -y "docker-ce-20.10.24-3.el7" "docker-ce-cli-20.10.24-3.el7" containerd.io bash-completion
  749. fi
  750. [ -f /usr/share/bash-completion/completions/docker ] && \
  751. cp -f /usr/share/bash-completion/completions/docker /etc/bash_completion.d/
  752. [ ! -d /etc/docker ] && mkdir /etc/docker
  753. # /etc/docker/daemon.json
  754. cat << EOF > /etc/docker/daemon.json
  755. {
  756. "data-root": "/var/lib/docker",
  757. "log-driver": "json-file",
  758. "log-opts": {
  759. "max-size": "200m",
  760. "max-file": "3"
  761. },
  762. "default-ulimits": {
  763. "nofile": {
  764. "Name": "nofile",
  765. "Hard": 655360,
  766. "Soft": 655360
  767. },
  768. "nproc": {
  769. "Name": "nproc",
  770. "Hard": 655360,
  771. "Soft": 655360
  772. }
  773. },
  774. "live-restore": true,
  775. "max-concurrent-downloads": 10,
  776. "max-concurrent-uploads": 10,
  777. "exec-opts": ["native.cgroupdriver=systemd"],
  778. "registry-mirrors": [
  779. ],
  780. "insecure-registries": [
  781. ]
  782. }
  783. EOF
  784. sed -i 's|#oom_score = 0|oom_score = -999|' /etc/containerd/config.toml
  785. # /etc/crictl.yaml
  786. cat << EOF > /etc/crictl.yaml
  787. runtime-endpoint: unix:///var/run/dockershim.sock
  788. image-endpoint: unix:///var/run/dockershim.sock
  789. timeout: 2
  790. debug: false
  791. pull-image-on-create: true
  792. disable-pull-on-run: false
  793. EOF
  794. systemctl enable containerd
  795. systemctl restart containerd
  796. systemctl enable docker
  797. systemctl restart docker
  798. }
  799. # 安装 containerd
  800. function script::install_containerd() {
  801. local version="-${1:-latest}"
  802. version="${version#-latest}"
  803. # /etc/yum.repos.d/docker-ce.repo
  804. cat << EOF > /etc/yum.repos.d/docker-ce.repo
  805. [docker-ce-stable]
  806. name=Docker CE Stable - \$basearch
  807. baseurl=https://mirrors.aliyun.com/docker-ce/linux/centos/$(rpm --eval '%{centos_ver}')/\$basearch/stable
  808. enabled=1
  809. gpgcheck=1
  810. gpgkey=https://mirrors.aliyun.com/docker-ce/linux/centos/gpg
  811. EOF
  812. if [[ "${OFFLINE_TAG:-}" != "1" ]];then
  813. [ -f "$(which runc)" ] && yum remove -y runc
  814. [ -f "$(which containerd)" ] && yum remove -y containerd.io
  815. yum install -y containerd.io"${version}" containernetworking bash-completion
  816. fi
  817. [ -d /etc/bash_completion.d ] && crictl completion bash > /etc/bash_completion.d/crictl
  818. containerd config default > /etc/containerd/config.toml
  819. sed -i -e "s#k8s.gcr.io#registry.cn-hangzhou.aliyuncs.com/kainstall#g" \
  820. -e "s#SystemdCgroup = false#SystemdCgroup = true#g" \
  821. -e "s#oom_score = 0#oom_score = -999#" \
  822. -e "s#max_concurrent_downloads = 3#max_concurrent_downloads = 10#g" /etc/containerd/config.toml
  823. # /etc/crictl.yaml
  824. cat << EOF > /etc/crictl.yaml
  825. runtime-endpoint: unix:///run/containerd/containerd.sock
  826. image-endpoint: unix:///run/containerd/containerd.sock
  827. timeout: 2
  828. debug: false
  829. pull-image-on-create: true
  830. disable-pull-on-run: false
  831. EOF
  832. systemctl restart containerd
  833. systemctl enable containerd
  834. }
  835. # 安装 cri-o
  836. function script::install_cri-o() {
  837. local version="${1:-latest}"
  838. version="${version#-latest}"
  839. os="CentOS_$(rpm --eval '%{centos_ver}')" && echo "${os}"
  840. # /etc/yum.repos.d/devel_kubic_libcontainers_stable.repo
  841. cat << EOF > /etc/yum.repos.d/devel_kubic_libcontainers_stable.repo
  842. [devel_kubic_libcontainers_stable]
  843. name=Stable Releases of Upstream github.com/containers packages
  844. type=rpm-md
  845. baseurl=https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/
  846. gpgcheck=1
  847. gpgkey=https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/repodata/repomd.xml.key
  848. enabled=1
  849. [devel_kubic_libcontainers_stable_cri-o]
  850. name=devel:kubic:libcontainers:stable:cri-o
  851. type=rpm-md
  852. baseurl=https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/${version}/${os}/
  853. gpgcheck=1
  854. gpgkey=https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/${version}/${os}/repodata/repomd.xml.key
  855. enabled=1
  856. EOF
  857. if [[ "${OFFLINE_TAG:-}" != "1" ]];then
  858. [ -f "$(which runc)" ] && yum remove -y runc
  859. [ -f "$(which crio)" ] && yum remove -y cri-o
  860. [ -f "$(which docker)" ] && yum remove -y docker-ce docker-ce-cli containerd.io
  861. yum install -y runc cri-o bash-completion --disablerepo=docker-ce-stable || yum install -y runc cri-o bash-completion
  862. fi
  863. [ -d /etc/bash_completion.d ] && \
  864. { crictl completion bash > /etc/bash_completion.d/crictl; \
  865. crio completion bash > /etc/bash_completion.d/crio; \
  866. crio-status completion bash > /etc/bash_completion.d/crio-status; }
  867. [ ! -f /etc/crio/crio.conf ] && crio config --default > /etc/crio/crio.conf
  868. sed -i -e "s#k8s.gcr.io#registry.cn-hangzhou.aliyuncs.com/kainstall#g" \
  869. -e 's|#registries = \[|registries = ["docker.io", "quay.io"]|g' /etc/crio/crio.conf
  870. # /etc/crio/crio.conf
  871. cat << EOF >> /etc/crio/crio.conf
  872. [crio.image]
  873. pause_image = "registry.cn-hangzhou.aliyuncs.com/kainstall/pause:3.6"
  874. EOF
  875. # /etc/containers/registries.conf.d/000-dockerio.conf
  876. [ -d /etc/containers/registries.conf.d ] && cat << EOF > /etc/containers/registries.conf.d/000-dockerio.conf
  877. [[registry]]
  878. prefix = "docker.io"
  879. insecure = false
  880. blocked = false
  881. location = "docker.io"
  882. EOF
  883. # /etc/crictl.yaml
  884. cat << EOF > /etc/crictl.yaml
  885. runtime-endpoint: unix:///var/run/crio/crio.sock
  886. image-endpoint: unix:///var/run/crio/crio.sock
  887. timeout: 2
  888. debug: false
  889. pull-image-on-create: true
  890. disable-pull-on-run: false
  891. EOF
  892. # /etc/cni/net.d/100-crio-bridge.conf
  893. sed -i "s#10.85.0.0/16#${KUBE_POD_SUBNET:-10.85.0.0/16}#g" /etc/cni/net.d/100-crio-bridge.conf
  894. # /etc/cni/net.d/10-crio.conf
  895. cat << EOF > /etc/cni/net.d/10-crio.conf
  896. {
  897. $(grep cniVersion /etc/cni/net.d/100-crio-bridge.conf)
  898. "name": "crio",
  899. "type": "flannel"
  900. }
  901. EOF
  902. mv /etc/cni/net.d/100-crio-bridge.conf /etc/cni/net.d/10-crio.conf /etc/cni/net.d/200-loopback.conf /tmp/
  903. systemctl restart crio
  904. systemctl enable crio
  905. }
  906. # 安装kube组件
  907. function script::install_kube() {
  908. local version="-${1:-latest}"
  909. version="${version#-latest}"
  910. # /etc/yum.repos.d/kubernetes.repo
  911. cat <<EOF > /etc/yum.repos.d/kubernetes.repo
  912. [kubernetes]
  913. name=Kubernetes
  914. baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
  915. enabled=1
  916. gpgcheck=0
  917. repo_gpgcheck=0
  918. gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
  919. EOF
  920. if [[ "${OFFLINE_TAG:-}" != "1" ]];then
  921. [ -f /usr/bin/kubeadm ] && yum remove -y kubeadm
  922. [ -f /usr/bin/kubelet ] && yum remove -y kubelet
  923. [ -f /usr/bin/kubectl ] && yum remove -y kubectl
  924. yum install -y "kubeadm${version}" "kubelet${version}" "kubectl${version}" --disableexcludes=kubernetes
  925. fi
  926. [ -d /etc/bash_completion.d ] && \
  927. { kubectl completion bash > /etc/bash_completion.d/kubectl; \
  928. kubeadm completion bash > /etc/bash_completion.d/kubadm; }
  929. [ ! -d /usr/lib/systemd/system/kubelet.service.d ] && mkdir -p /usr/lib/systemd/system/kubelet.service.d
  930. cat << EOF > /usr/lib/systemd/system/kubelet.service.d/11-cgroup.conf
  931. [Service]
  932. CPUAccounting=true
  933. MemoryAccounting=true
  934. BlockIOAccounting=true
  935. ExecStartPre=/bin/bash -c '/bin/mkdir -p /sys/fs/cgroup/{cpuset,memory,hugetlb,systemd,pids,"cpu,cpuacct"}/{system,kube,kubepods}.slice||:'
  936. Slice=kube.slice
  937. EOF
  938. systemctl daemon-reload
  939. systemctl enable kubelet
  940. systemctl restart kubelet
  941. }
  942. # 安装haproxy
  943. function script::install_haproxy() {
  944. local api_servers="$*"
  945. if [[ "${OFFLINE_TAG:-}" != "1" ]];then
  946. [ -f /usr/bin/haproxy ] && yum remove -y haproxy
  947. yum install -y haproxy
  948. fi
  949. # /etc/haproxy/haproxy.cfg
  950. [ ! -f /etc/haproxy/haproxy.cfg_bak ] && cp /etc/haproxy/haproxy.cfg{,_bak}
  951. cat << EOF > /etc/haproxy/haproxy.cfg
  952. global
  953. log /dev/log local0
  954. log /dev/log local1 notice
  955. tune.ssl.default-dh-param 2048
  956. defaults
  957. log global
  958. mode http
  959. option dontlognull
  960. timeout connect 5000ms
  961. timeout client 600000ms
  962. timeout server 600000ms
  963. listen stats
  964. bind :19090
  965. mode http
  966. balance
  967. stats uri /haproxy_stats
  968. stats auth admin:admin123
  969. stats admin if TRUE
  970. frontend kube-apiserver-https
  971. mode tcp
  972. option tcplog
  973. bind :6443
  974. default_backend kube-apiserver-backend
  975. backend kube-apiserver-backend
  976. mode tcp
  977. balance roundrobin
  978. stick-table type ip size 200k expire 30m
  979. stick on src
  980. $(index=1;for h in $api_servers;do echo " server apiserver${index} $h:6443 check";index=$((index+1));done)
  981. EOF
  982. systemctl enable haproxy
  983. systemctl restart haproxy
  984. }
  985. # 安装helm
  986. function script::install_helm() {
  987. local version="${1:-3.10.1}"
  988. version="${version#-3.10.1}"
  989. local path="/tmp"
  990. cd $path
  991. # 下载软件(国内源)
  992. wget https://mirrors.huaweicloud.com/helm/v$version/helm-v$version-linux-amd64.tar.gz
  993. # 解压
  994. tar -zxvf helm-v$version-linux-amd64.tar.gz
  995. # 安装
  996. sudo mv linux-amd64/helm /usr/local/bin/
  997. # 清理
  998. rm -rf helm-v$version-linux-amd64.tar.gz linux-amd64
  999. # 验证
  1000. helm version
  1001. cd ~
  1002. }
  1003. # 检查用到的命令
  1004. function check::command() {
  1005. check::command_exists ssh openssh-clients
  1006. check::command_exists sshpass sshpass
  1007. check::command_exists wget wget
  1008. [[ "${OFFLINE_TAG:-}" == "1" ]] && check::command_exists tar tar
  1009. }
  1010. # 检查ssh连通性
  1011. function check::ssh_conn() {
  1012. for host in $MASTER_NODES $WORKER_NODES
  1013. do
  1014. [ "$host" == "127.0.0.1" ] && continue
  1015. command::exec "${host}" "echo 0"
  1016. check::exit_code "$?" "check" "ssh $host connection" "exit"
  1017. done
  1018. }
  1019. # 检查os系统支持
  1020. function check::os() {
  1021. log::info "[check]" "os support: ${OS_SUPPORT}"
  1022. for host in $MASTER_NODES $WORKER_NODES
  1023. do
  1024. command::exec "${host}" "
  1025. [ -f /etc/os-release ] && source /etc/os-release
  1026. echo client_os:\${ID:-}\${VERSION_ID:-}
  1027. if [[ \"${OS_SUPPORT}\" == *\"\${ID:-}\${VERSION_ID:-}\"* ]]; then
  1028. exit 0
  1029. fi
  1030. exit 1
  1031. "
  1032. check::exit_code "$?" "check" "$host os support" "exit"
  1033. done
  1034. }
  1035. # 检查os kernel 版本
  1036. function check::kernel() {
  1037. local version=${1:-}
  1038. log::info "[check]" "kernel version not less than ${version}"
  1039. version=$(echo "${version}" | awk -F. '{ printf("%d%03d%03d\n", $1,$2,$3); }')
  1040. for host in $MASTER_NODES $WORKER_NODES
  1041. do
  1042. command::exec "${host}" "
  1043. kernel_version=\$(uname -r)
  1044. kernel_version=\$(echo \${kernel_version/-*} | awk -F. '{ printf(\"%d%03d%03d\n\", \$1,\$2,\$3); }')
  1045. echo kernel_version \${kernel_version}
  1046. [[ \${kernel_version} -ge ${version} ]] && exit 0 || exit 1
  1047. "
  1048. check::exit_code "$?" "check" "$host kernel version" "exit"
  1049. done
  1050. }
  1051. # 检查api-server连通性
  1052. function check::apiserver_conn() {
  1053. command::exec "${MGMT_NODE}" "kubectl get node"
  1054. check::exit_code "$?" "check" "conn apiserver" "exit"
  1055. }
  1056. # 检查返回码
  1057. function check::exit_code() {
  1058. local code=${1:-}
  1059. local app=${2:-}
  1060. local desc=${3:-}
  1061. local exit_script=${4:-}
  1062. if [[ "${code}" == "0" ]]; then
  1063. log::info "[${app}]" "${desc} succeeded."
  1064. else
  1065. log::error "[${app}]" "${desc} failed."
  1066. [[ "$exit_script" == "exit" ]] && exit "$code"
  1067. fi
  1068. }
  1069. # 预检
  1070. function check::preflight() {
  1071. # check command
  1072. check::command
  1073. # check ssh conn
  1074. check::ssh_conn
  1075. # check os
  1076. check::os
  1077. # check os kernel
  1078. [[ "${KUBE_NETWORK:-}" == "cilium" ]] && check::kernel 4.9.17
  1079. # check api-server conn
  1080. if [[ $(( ${ADD_TAG:-0} + ${DEL_TAG:-0} + ${UPGRADE_TAG:-0} + ${RENEW_CERT_TAG:-0} )) -gt 0 ]]; then
  1081. check::apiserver_conn
  1082. fi
  1083. }
  1084. # 安装包
  1085. function install::package() {
  1086. # 检查k8s最新稳定版本
  1087. if [[ "${KUBE_CRI}" == "cri-o" && "${KUBE_CRI_VERSION}" == "latest" ]]; then
  1088. KUBE_CRI_VERSION="${KUBE_VERSION}"
  1089. if [[ "${KUBE_CRI_VERSION}" == "latest" ]]; then
  1090. if command::exec "127.0.0.1" "wget https://storage.googleapis.com/kubernetes-release/release/stable.txt -q -O -"; then
  1091. KUBE_CRI_VERSION="${COMMAND_OUTPUT#v}"
  1092. else
  1093. log::error "[install]" "get kubernetes stable version error. Please specify the version!"
  1094. exit 1
  1095. fi
  1096. fi
  1097. KUBE_CRI_VERSION="${KUBE_CRI_VERSION%.*}"
  1098. fi
  1099. # 安装 cri kube
  1100. for host in $MASTER_NODES $WORKER_NODES
  1101. do
  1102. # install cri
  1103. log::info "[install]" "install ${KUBE_CRI} on $host."
  1104. command::exec "${host}" "
  1105. export OFFLINE_TAG=${OFFLINE_TAG:-0}
  1106. $(declare -f script::install_"${KUBE_CRI}")
  1107. script::install_${KUBE_CRI} $KUBE_CRI_VERSION
  1108. "
  1109. check::exit_code "$?" "install" "install ${KUBE_CRI} on $host"
  1110. # install kube
  1111. log::info "[install]" "install kube on $host"
  1112. command::exec "${host}" "
  1113. export OFFLINE_TAG=${OFFLINE_TAG:-0}
  1114. $(declare -f script::install_kube)
  1115. script::install_kube $KUBE_VERSION
  1116. "
  1117. check::exit_code "$?" "install" "install kube on $host"
  1118. done
  1119. # 配置 kube
  1120. local apiservers=$MASTER_NODES
  1121. if [[ "$apiservers" == "127.0.0.1" ]]; then
  1122. command::exec "${MGMT_NODE}" "ip -o route get to 8.8.8.8 | sed -n 's/.*src \([0-9.]\+\).*/\1/p'"
  1123. get::command_output "apiservers" "$?"
  1124. fi
  1125. # 输出 api-servers 信息
  1126. if [[ "${ADD_TAG:-}" == "1" ]]; then
  1127. command::exec "${MGMT_NODE}" "
  1128. kubectl get node --selector='node-role.kubernetes.io/master' -o jsonpath='{$.items[*].status.addresses[?(@.type==\"InternalIP\")].address}'
  1129. "
  1130. get::command_output "apiservers" "$?"
  1131. fi
  1132. # 安装 haproxy
  1133. for host in $WORKER_NODES
  1134. do
  1135. # install haproxy
  1136. log::info "[install]" "install haproxy on $host"
  1137. command::exec "${host}" "
  1138. export OFFLINE_TAG=${OFFLINE_TAG:-0}
  1139. $(declare -f script::install_haproxy)
  1140. script::install_haproxy \"$apiservers\"
  1141. "
  1142. check::exit_code "$?" "install" "install haproxy on $host"
  1143. done
  1144. # 10年证书
  1145. if [[ "${CERT_YEAR_TAG:-}" == "1" ]]; then
  1146. local version="${KUBE_VERSION}"
  1147. if [[ "${version}" == "latest" ]]; then
  1148. if command::exec "127.0.0.1" "wget https://storage.googleapis.com/kubernetes-release/release/stable.txt -q -O -"; then
  1149. version="${COMMAND_OUTPUT#v}"
  1150. else
  1151. log::error "[install]" "get kubernetes stable version error. Please specify the version!"
  1152. exit 1
  1153. fi
  1154. fi
  1155. log::info "[install]" "download kubeadm 10 years certs client"
  1156. local certs_file="${OFFLINE_DIR}/bins/kubeadm-linux-amd64"
  1157. MGMT_NODE="127.0.0.1" utils::download_file "${GITHUB_PROXY}https://github.com/lework/kubeadm-certs/releases/download/v${version}/kubeadm-linux-amd64" "${certs_file}"
  1158. for host in $MASTER_NODES $WORKER_NODES
  1159. do
  1160. log::info "[install]" "scp kubeadm client to $host"
  1161. command::scp "${host}" "${certs_file}" "/tmp/kubeadm-linux-amd64"
  1162. check::exit_code "$?" "install" "scp kubeadm client to $host" "exit"
  1163. command::exec "${host}" "
  1164. set -e
  1165. if [[ -f /tmp/kubeadm-linux-amd64 ]]; then
  1166. [[ -f /usr/bin/kubeadm && ! -f /usr/bin/kubeadm_src ]] && mv -fv /usr/bin/kubeadm{,_src}
  1167. mv -fv /tmp/kubeadm-linux-amd64 /usr/bin/kubeadm
  1168. chmod +x /usr/bin/kubeadm
  1169. else
  1170. echo \"not found /tmp/kubeadm-linux-amd64\"
  1171. exit 1
  1172. fi
  1173. "
  1174. check::exit_code "$?" "install" "$host: use kubeadm 10 years certs client"
  1175. done
  1176. fi
  1177. # 安装helm
  1178. for host in $MASTER_NODES $WORKER_NODES
  1179. do
  1180. log::info "[install]" "install helm on $host"
  1181. command::exec "${host}" "
  1182. export OFFLINE_TAG=${OFFLINE_TAG:-0}
  1183. $(declare -f script::install_helm)
  1184. script::install_helm $HELM_VERSION
  1185. "
  1186. check::exit_code "$?" "install" "install helm on $host"
  1187. done
  1188. }
  1189. # 升级节点内核
  1190. function init::upgrade_kernel() {
  1191. [[ "${UPGRADE_KERNEL_TAG:-}" != "1" ]] && return
  1192. for host in $MASTER_NODES $WORKER_NODES
  1193. do
  1194. log::info "[init]" "upgrade kernel: $host"
  1195. command::exec "${host}" "
  1196. export OFFLINE_TAG=${OFFLINE_TAG:-0}
  1197. $(declare -f script::upgrade_kernel)
  1198. script::upgrade_kernel
  1199. "
  1200. check::exit_code "$?" "init" "upgrade kernel $host" "exit"
  1201. done
  1202. for host in $MASTER_NODES $WORKER_NODES
  1203. do
  1204. command::exec "${host}" "bash -c 'sleep 15 && reboot' &>/dev/null &"
  1205. check::exit_code "$?" "init" "$host: Wait for 15s to restart"
  1206. done
  1207. log::info "[notice]" "Please execute the command again!"
  1208. log::access "[command]" "bash $0 ${SCRIPT_PARAMETER// --upgrade-kernel/}"
  1209. exit 0
  1210. }
  1211. # 节点证书续期
  1212. function cert::renew_node() {
  1213. local role="${1:-master}"
  1214. local hosts=""
  1215. local kubelet_config=""
  1216. command::exec "${MGMT_NODE}" "
  1217. kubectl get node --selector='node-role.kubernetes.io/${role}' -o jsonpath='{range.items[*]}{.metadata.name } {end}'
  1218. "
  1219. get::command_output "hosts" "$?"
  1220. for host in ${hosts}
  1221. do
  1222. log::info "[cert]" "drain $host"
  1223. command::exec "${MGMT_NODE}" "kubectl drain $host --force --ignore-daemonsets --delete-local-data"
  1224. check::exit_code "$?" "cert" "$host: drain"
  1225. sleep 5
  1226. if [[ "${role}" == "master" ]]; then
  1227. command::exec "${host}" "cp -rf /etc/kubernetes /etc/kubernetes_\$(date +%Y-%m-%d)"
  1228. check::exit_code "$?" "cert" "$host: backup kubernetes config"
  1229. command::exec "${host}" "kubeadm certs renew all 2>/dev/null|| kubeadm alpha certs renew all"
  1230. check::exit_code "$?" "cert" "$host: renew certs"
  1231. command::exec "${host}" "
  1232. $(declare -f utils::retry)
  1233. kill -s SIGHUP \$(pidof etcd) && \
  1234. utils::retry 10 \"echo -n | openssl s_client -connect localhost:2379 2>&1 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | openssl x509 -text -noout | grep Not\"
  1235. "
  1236. check::exit_code "$?" "cert" "$host: restart etcd"
  1237. command::exec "${host}" "
  1238. $(declare -f utils::retry)
  1239. kill -s SIGHUP \$(pidof kube-apiserver) && \
  1240. utils::retry 10 \"echo -n | openssl s_client -connect localhost:6443 2>&1 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | openssl x509 -text -noout | grep Not\"
  1241. "
  1242. check::exit_code "$?" "cert" "$host: restart kube-apiserver"
  1243. command::exec "${host}" "
  1244. $(declare -f utils::retry)
  1245. kill -s SIGHUP \$(pidof kube-controller-manager) && \
  1246. utils::retry 10 \"echo -n | openssl s_client -connect localhost:10257 2>&1 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | openssl x509 -text -noout | grep Not\"
  1247. "
  1248. check::exit_code "$?" "cert" "$host: restart kube-controller-manager"
  1249. command::exec "${host}" "
  1250. $(declare -f utils::retry)
  1251. kill -s SIGHUP \$(pidof kube-scheduler) && \
  1252. utils::retry 10 \"echo -n | openssl s_client -connect localhost:10259 2>&1 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | openssl x509 -text -noout | grep Not\"
  1253. "
  1254. check::exit_code "$?" "cert" "$host: restart kube-scheduler"
  1255. fi
  1256. log::info "[cert]" "get kubelet config"
  1257. command::exec "${MGMT_NODE}" "
  1258. kubeadm kubeconfig user --org system:nodes --client-name system:node:${host} --config /etc/kubernetes/kubeadmcfg.yaml || kubeadm alpha kubeconfig user --org system:nodes --client-name system:node:${host} --config /etc/kubernetes/kubeadmcfg.yaml
  1259. "
  1260. get::command_output "kubelet_config" "$?" "exit"
  1261. if [[ "$kubelet_config" != "" ]]; then
  1262. log::info "[cert]" "copy kubelet config"
  1263. command::exec "${host}" "
  1264. cp /etc/kubernetes/kubelet.conf /etc/kubernetes/kubelet.conf_bak
  1265. echo '$(printf "%s" "${kubelet_config}" | sed 's#https://.*:#https://127.0.0.1:#g')' > /etc/kubernetes/kubelet.conf
  1266. "
  1267. check::exit_code "$?" "cert" "$host: copy kubelet config"
  1268. command::exec "${host}" "rm -rfv /var/lib/kubelet/pki/*"
  1269. check::exit_code "$?" "cert" "$host: delete kubelet pki files"
  1270. command::exec "${host}" "
  1271. $(declare -f utils::retry)
  1272. systemctl restart kubelet && \
  1273. utils::retry 10 \"echo -n | openssl s_client -connect localhost:10250 2>&1 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | openssl x509 -text -noout | grep Not\"
  1274. "
  1275. local status="$?"
  1276. check::exit_code "${status}" "cert" "$host: restart kubelet"
  1277. if [[ "${status}" == "0" ]]; then
  1278. sleep 5
  1279. command::exec "${MGMT_NODE}" "kubectl uncordon ${host}"
  1280. check::exit_code "$?" "cert" "uncordon ${host} node"
  1281. fi
  1282. fi
  1283. done
  1284. }
  1285. # 证书续期
  1286. function cert::renew() {
  1287. log::info "[cert]" "renew cluster cert"
  1288. cert::renew_node "master"
  1289. cert::renew_node "worker"
  1290. log::info "[cert]" "cluster status"
  1291. command::exec "${MGMT_NODE}" "
  1292. echo
  1293. kubectl get node
  1294. echo
  1295. kubeadm certs check-expiration 2>/dev/null || kubeadm alpha certs check-expiration
  1296. " && printf "%s" "${COMMAND_OUTPUT}"
  1297. }
  1298. # 初始化节点配置
  1299. function init::node_config() {
  1300. local master_index=${master_index:-1}
  1301. local worker_index=${worker_index:-1}
  1302. # 获取MGMT_NODE机器的内网IP
  1303. if [[ "$MGMT_NODE" == "127.0.0.1" || "$MGMT_NODE_IP" == "" ]]; then
  1304. log::info "[init]" "Get $MGMT_NODE InternalIP."
  1305. command::exec "${MGMT_NODE}" "
  1306. ip -4 route get 8.8.8.8 2>/dev/null | head -1 | awk '{print \$7}'
  1307. "
  1308. get::command_output "MGMT_NODE_IP" "$?" "exit"
  1309. if [[ "$MGMT_NODE" != "$MGMT_NODE_IP" ]]; then
  1310. log::warning "[init]" "ip不相同: $MGMT_NODE(MGMT_NODE) != $MGMT_NODE_IP(MGMT_NODE_IP)"
  1311. fi
  1312. else
  1313. MGMT_NODE_IP=$MGMT_NODE
  1314. fi
  1315. # master
  1316. for host in $MASTER_NODES
  1317. do
  1318. log::info "[init]" "master: $host"
  1319. command::exec "${host}" "
  1320. export OFFLINE_TAG=${OFFLINE_TAG:-0} KUBE_APISERVER=${KUBE_APISERVER} SKIP_SET_OS_REPO=${SKIP_SET_OS_REPO:-false}
  1321. $(declare -f script::init_node)
  1322. script::init_node
  1323. "
  1324. check::exit_code "$?" "init" "init master $host" "exit"
  1325. # 设置主机名和解析
  1326. command::exec "${host}" "
  1327. printf \"\\n${MGMT_NODE_IP} $KUBE_APISERVER\\n$node_hosts\" >> /etc/hosts
  1328. hostnamectl set-hostname ${HOSTNAME_PREFIX}-master-node${master_index}
  1329. "
  1330. check::exit_code "$?" "init" "$host set hostname and hostname resolution"
  1331. # set audit-policy
  1332. log::info "[init]" "$host: set audit-policy file."
  1333. command::exec "${host}" "
  1334. [ ! -d etc/kubernetes ] && mkdir -p /etc/kubernetes
  1335. cat << EOF > /etc/kubernetes/audit-policy.yaml
  1336. # Log all requests at the Metadata level.
  1337. apiVersion: audit.k8s.io/v1
  1338. kind: Policy
  1339. rules:
  1340. - level: Metadata
  1341. EOF
  1342. "
  1343. check::exit_code "$?" "init" "$host: set audit-policy file" "exit"
  1344. master_index=$((master_index + 1))
  1345. done
  1346. # worker
  1347. for host in $WORKER_NODES
  1348. do
  1349. log::info "[init]" "worker: $host"
  1350. command::exec "${host}" "
  1351. export OFFLINE_TAG=${OFFLINE_TAG:-0} KUBE_APISERVER=${KUBE_APISERVER} SKIP_SET_OS_REPO=${SKIP_SET_OS_REPO:-false}
  1352. $(declare -f script::init_node)
  1353. script::init_node
  1354. "
  1355. check::exit_code "$?" "init" "init worker $host" "exit"
  1356. # 设置主机名和解析
  1357. command::exec "${host}" "
  1358. printf \"\\n127.0.0.1 $KUBE_APISERVER\\n$node_hosts\" >> /etc/hosts
  1359. hostnamectl set-hostname ${HOSTNAME_PREFIX}-worker-node${worker_index}
  1360. "
  1361. worker_index=$((worker_index + 1))
  1362. done
  1363. }
  1364. # 初始化节点
  1365. function init::node() {
  1366. init::upgrade_kernel
  1367. local node_hosts=""
  1368. local i=1
  1369. for h in $MASTER_NODES
  1370. do
  1371. node_hosts="${node_hosts}\n$h ${HOSTNAME_PREFIX}-master-node${i}"
  1372. i=$((i + 1))
  1373. done
  1374. local i=1
  1375. for h in $WORKER_NODES
  1376. do
  1377. node_hosts="${node_hosts}\n$h ${HOSTNAME_PREFIX}-worker-node${i}"
  1378. i=$((i + 1))
  1379. done
  1380. init::node_config
  1381. }
  1382. # 初始化添加的节点
  1383. function init::add_node() {
  1384. init::upgrade_kernel
  1385. local master_index=0
  1386. local worker_index=0
  1387. local node_hosts=""
  1388. local add_node_hosts=""
  1389. command::exec "${MGMT_NODE}" "
  1390. kubectl get node --selector='node-role.kubernetes.io/master' -o jsonpath='{range.items[*]}{.status.addresses[?(@.type==\"InternalIP\")].address } {end}' | awk '{print \$1}'
  1391. "
  1392. get::command_output "MGMT_NODE" "$?" "exit"
  1393. # 获取现有集群节点主机名
  1394. command::exec "${MGMT_NODE}" "
  1395. kubectl get node -o jsonpath='{range.items[*]}{.status.addresses[?(@.type==\"InternalIP\")].address} {.metadata.name }\\n{end}'
  1396. "
  1397. get::command_output "node_hosts" "$?" "exit"
  1398. for host in $MASTER_NODES $WORKER_NODES
  1399. do
  1400. if [[ $node_hosts == *"$host"* ]]; then
  1401. log::error "[init]" "The host $host is already in the cluster!"
  1402. exit 1
  1403. fi
  1404. done
  1405. if [[ "$MASTER_NODES" != "" ]]; then
  1406. command::exec "${MGMT_NODE}" "
  1407. kubectl get node --selector='node-role.kubernetes.io/master' -o jsonpath='{\$.items[*].metadata.name}' |grep -Eo 'node[0-9]*'|grep -Eo '[0-9]*'|awk -F ' ' 'BEGIN {max = 0} {if (\$0+0 > max+0) max=\$0} END {print max}'
  1408. "
  1409. get::command_output "master_index" "$?" "exit"
  1410. master_index=$(( master_index + 1 ))
  1411. local i=$master_index
  1412. for host in $MASTER_NODES
  1413. do
  1414. add_node_hosts="${add_node_hosts}\n${host:-} ${HOSTNAME_PREFIX}-master-node${i}"
  1415. i=$((i + 1))
  1416. done
  1417. fi
  1418. if [[ "$WORKER_NODES" != "" ]]; then
  1419. command::exec "${MGMT_NODE}" "
  1420. kubectl get node --selector='node-role.kubernetes.io/worker' -o jsonpath='{\$.items[*].metadata.name}'| grep -Eo 'node[0-9]*'|grep -Eo '[0-9]*'|awk 'BEGIN {max = 0} {if (\$0+0 > max+0) max=\$0} END {print max}' || echo 0
  1421. "
  1422. get::command_output "worker_index" "$?" "exit"
  1423. worker_index=$(( worker_index + 1 ))
  1424. local i=$worker_index
  1425. for host in $WORKER_NODES
  1426. do
  1427. add_node_hosts="${add_node_hosts}\n${host:-} ${HOSTNAME_PREFIX}-worker-node${i}"
  1428. i=$((i + 1))
  1429. done
  1430. fi
  1431. # 向集群节点添加新增的节点主机名解析
  1432. for host in $(echo -ne "$node_hosts" | awk '{print $1}')
  1433. do
  1434. command::exec "${host}" "
  1435. printf \"$add_node_hosts\" >> /etc/hosts
  1436. "
  1437. check::exit_code "$?" "init" "$host add new node hostname resolution"
  1438. done
  1439. node_hosts="${node_hosts}\n${add_node_hosts}"
  1440. init::node_config
  1441. }
  1442. # 集群初始化
  1443. function kubeadm::init() {
  1444. log::info "[kubeadm init]" "kubeadm init on ${MGMT_NODE}"
  1445. log::info "[kubeadm init]" "${MGMT_NODE}: set kubeadmcfg.yaml"
  1446. command::exec "${MGMT_NODE}" "
  1447. PAUSE_VERSION=$(kubeadm config images list 2>/dev/null | awk -F: '/pause/ {print $2}')
  1448. cat << EOF > /etc/kubernetes/kubeadmcfg.yaml
  1449. ---
  1450. apiVersion: kubeadm.k8s.io/v1beta2
  1451. kind: InitConfiguration
  1452. ${kubelet_nodeRegistration}
  1453. ---
  1454. apiVersion: kubeproxy.config.k8s.io/v1alpha1
  1455. kind: KubeProxyConfiguration
  1456. mode: ipvs
  1457. ipvs:
  1458. minSyncPeriod: 5s
  1459. syncPeriod: 5s
  1460. # ipvs 负载策略
  1461. scheduler: 'wrr'
  1462. ---
  1463. apiVersion: kubelet.config.k8s.io/v1beta1
  1464. kind: KubeletConfiguration
  1465. maxPods: 200
  1466. cgroupDriver: systemd
  1467. runtimeRequestTimeout: 5m
  1468. # 此配置保证了 kubelet 能在 swap 开启的情况下启动
  1469. failSwapOn: false
  1470. nodeStatusUpdateFrequency: 5s
  1471. rotateCertificates: true
  1472. imageGCLowThresholdPercent: 70
  1473. imageGCHighThresholdPercent: 80
  1474. # 软驱逐阀值
  1475. evictionSoft:
  1476. imagefs.available: 15%
  1477. memory.available: 512Mi
  1478. nodefs.available: 15%
  1479. nodefs.inodesFree: 10%
  1480. # 达到软阈值之后,持续时间超过多久才进行驱逐
  1481. evictionSoftGracePeriod:
  1482. imagefs.available: 3m
  1483. memory.available: 1m
  1484. nodefs.available: 3m
  1485. nodefs.inodesFree: 1m
  1486. # 硬驱逐阀值
  1487. evictionHard:
  1488. imagefs.available: 10%
  1489. memory.available: 256Mi
  1490. nodefs.available: 10%
  1491. nodefs.inodesFree: 5%
  1492. evictionMaxPodGracePeriod: 30
  1493. # 节点资源预留
  1494. kubeReserved:
  1495. cpu: 200m\$(if [[ \$(cat /proc/meminfo | awk '/MemTotal/ {print \$2}') -gt 3670016 ]]; then echo -e '\n memory: 256Mi';fi)
  1496. ephemeral-storage: 1Gi
  1497. systemReserved:
  1498. cpu: 300m\$(if [[ \$(cat /proc/meminfo | awk '/MemTotal/ {print \$2}') -gt 3670016 ]]; then echo -e '\n memory: 512Mi';fi)
  1499. ephemeral-storage: 1Gi
  1500. kubeReservedCgroup: /kube.slice
  1501. systemReservedCgroup: /system.slice
  1502. enforceNodeAllocatable:
  1503. - pods
  1504. ---
  1505. apiVersion: kubeadm.k8s.io/v1beta2
  1506. kind: ClusterConfiguration
  1507. kubernetesVersion: $KUBE_VERSION
  1508. controlPlaneEndpoint: $KUBE_APISERVER:6443
  1509. networking:
  1510. dnsDomain: $KUBE_DNSDOMAIN
  1511. podSubnet: $KUBE_POD_SUBNET
  1512. serviceSubnet: $KUBE_SERVICE_SUBNET
  1513. imageRepository: $KUBE_IMAGE_REPO
  1514. apiServer:
  1515. certSANs:
  1516. - 127.0.0.1
  1517. - $KUBE_APISERVER
  1518. $(for h in $MASTER_NODES;do echo " - $h";done)
  1519. extraArgs:
  1520. event-ttl: '720h'
  1521. service-node-port-range: '30000-50000'
  1522. # 审计日志相关配置
  1523. audit-log-maxage: '20'
  1524. audit-log-maxbackup: '10'
  1525. audit-log-maxsize: '100'
  1526. audit-log-path: /var/log/kube-audit/audit.log
  1527. audit-policy-file: /etc/kubernetes/audit-policy.yaml
  1528. extraVolumes:
  1529. - name: audit-config
  1530. hostPath: /etc/kubernetes/audit-policy.yaml
  1531. mountPath: /etc/kubernetes/audit-policy.yaml
  1532. readOnly: true
  1533. pathType: File
  1534. - name: audit-log
  1535. hostPath: /var/log/kube-audit
  1536. mountPath: /var/log/kube-audit
  1537. pathType: DirectoryOrCreate
  1538. - name: localtime
  1539. hostPath: /etc/localtime
  1540. mountPath: /etc/localtime
  1541. readOnly: true
  1542. pathType: File
  1543. controllerManager:
  1544. extraArgs:
  1545. bind-address: 0.0.0.0
  1546. node-cidr-mask-size: '24'
  1547. deployment-controller-sync-period: '10s'
  1548. node-monitor-grace-period: '20s'
  1549. pod-eviction-timeout: '2m'
  1550. terminated-pod-gc-threshold: '30'
  1551. experimental-cluster-signing-duration: 87600h
  1552. feature-gates: RotateKubeletServerCertificate=true
  1553. extraVolumes:
  1554. - hostPath: /etc/localtime
  1555. mountPath: /etc/localtime
  1556. name: localtime
  1557. readOnly: true
  1558. pathType: File
  1559. scheduler:
  1560. extraArgs:
  1561. bind-address: 0.0.0.0
  1562. extraVolumes:
  1563. - hostPath: /etc/localtime
  1564. mountPath: /etc/localtime
  1565. name: localtime
  1566. readOnly: true
  1567. pathType: File
  1568. $(if [[ "${KUBE_VERSION}" == "1.21.1" ]]; then
  1569. echo "dns:
  1570. type: CoreDNS
  1571. imageRepository: docker.io
  1572. imageTag: 1.8.0"
  1573. fi)
  1574. EOF
  1575. "
  1576. check::exit_code "$?" "kubeadm init" "${MGMT_NODE}: set kubeadmcfg.yaml" "exit"
  1577. log::info "[kubeadm init]" "${MGMT_NODE}: kubeadm init start."
  1578. command::exec "${MGMT_NODE}" "kubeadm init --config=/etc/kubernetes/kubeadmcfg.yaml --upload-certs"
  1579. check::exit_code "$?" "kubeadm init" "${MGMT_NODE}: kubeadm init" "exit"
  1580. sleep 3
  1581. log::info "[kubeadm init]" "${MGMT_NODE}: set kube config."
  1582. command::exec "${MGMT_NODE}" "
  1583. mkdir -p \$HOME/.kube
  1584. sudo cp -f /etc/kubernetes/admin.conf \$HOME/.kube/config
  1585. "
  1586. check::exit_code "$?" "kubeadm init" "${MGMT_NODE}: set kube config" "exit"
  1587. if [[ "$(echo "$MASTER_NODES" | wc -w)" == "1" ]]; then
  1588. log::info "[kubeadm init]" "${MGMT_NODE}: delete master taint"
  1589. command::exec "${MGMT_NODE}" "kubectl taint nodes --all node-role.kubernetes.io/master-"
  1590. check::exit_code "$?" "kubeadm init" "${MGMT_NODE}: delete master taint"
  1591. fi
  1592. command::exec "${MGMT_NODE}" "
  1593. kubectl create clusterrolebinding node-client-auto-approve-csr --clusterrole=system:certificates.k8s.io:certificatesigningrequests:nodeclient --user=kubelet-bootstrap
  1594. kubectl create clusterrolebinding node-client-auto-renew-crt --clusterrole=system:certificates.k8s.io:certificatesigningrequests:selfnodeclient --group=system:nodes
  1595. kubectl create clusterrolebinding node-server-auto-renew-crt --clusterrole=system:certificates.k8s.io:certificatesigningrequests:selfnodeserver --group=system:nodes
  1596. "
  1597. check::exit_code "$?" "kubeadm init" "Auto-Approve kubelet cert csr" "exit"
  1598. }
  1599. # 加入集群
  1600. function kubeadm::join() {
  1601. log::info "[kubeadm join]" "master: get join token and cert info"
  1602. command::exec "${MGMT_NODE}" "
  1603. openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
  1604. "
  1605. get::command_output "CACRT_HASH" "$?" "exit"
  1606. command::exec "${MGMT_NODE}" "
  1607. kubeadm init phase upload-certs --upload-certs --config /etc/kubernetes/kubeadmcfg.yaml 2>> /dev/null | tail -1
  1608. "
  1609. get::command_output "INTI_CERTKEY" "$?" "exit"
  1610. command::exec "${MGMT_NODE}" "
  1611. kubeadm token create
  1612. "
  1613. get::command_output "INIT_TOKEN" "$?" "exit"
  1614. command::exec "${MGMT_NODE}" "
  1615. kubeadm config images list 2>/dev/null | awk -F: '/pause/ {print \$2}'
  1616. "
  1617. get::command_output "PAUSE_VERSION" "$?"
  1618. for host in $MASTER_NODES
  1619. do
  1620. [[ "${MGMT_NODE}" == "$host" ]] && continue
  1621. log::info "[kubeadm join]" "master $host join cluster."
  1622. command::exec "${host}" "
  1623. cat << EOF > /etc/kubernetes/kubeadmcfg.yaml
  1624. ---
  1625. apiVersion: kubeadm.k8s.io/v1beta2
  1626. kind: JoinConfiguration
  1627. discovery:
  1628. bootstrapToken:
  1629. apiServerEndpoint: $KUBE_APISERVER:6443
  1630. caCertHashes:
  1631. - sha256:${CACRT_HASH:-}
  1632. token: ${INIT_TOKEN}
  1633. timeout: 5m0s
  1634. controlPlane:
  1635. certificateKey: ${INTI_CERTKEY:-}
  1636. ${kubelet_nodeRegistration}
  1637. EOF
  1638. kubeadm join --config /etc/kubernetes/kubeadmcfg.yaml
  1639. "
  1640. check::exit_code "$?" "kubeadm join" "master $host join cluster"
  1641. log::info "[kubeadm join]" "$host: set kube config."
  1642. command::exec "${host}" "
  1643. mkdir -p \$HOME/.kube
  1644. sudo cp -f /etc/kubernetes/admin.conf \$HOME/.kube/config
  1645. "
  1646. check::exit_code "$?" "kubeadm join" "$host: set kube config" "exit"
  1647. command::exec "${host}" "
  1648. sed -i 's#.*$KUBE_APISERVER#127.0.0.1 $KUBE_APISERVER#g' /etc/hosts
  1649. "
  1650. done
  1651. for host in $WORKER_NODES
  1652. do
  1653. log::info "[kubeadm join]" "worker $host join cluster."
  1654. command::exec "${host}" "
  1655. mkdir -p /etc/kubernetes/manifests
  1656. cat << EOF > /etc/kubernetes/kubeadmcfg.yaml
  1657. ---
  1658. apiVersion: kubeadm.k8s.io/v1beta2
  1659. kind: JoinConfiguration
  1660. discovery:
  1661. bootstrapToken:
  1662. apiServerEndpoint: $KUBE_APISERVER:6443
  1663. caCertHashes:
  1664. - sha256:${CACRT_HASH:-}
  1665. token: ${INIT_TOKEN}
  1666. timeout: 5m0s
  1667. ${kubelet_nodeRegistration}
  1668. EOF
  1669. kubeadm join --config /etc/kubernetes/kubeadmcfg.yaml
  1670. "
  1671. check::exit_code "$?" "kubeadm join" "worker $host join cluster"
  1672. log::info "[kubeadm join]" "set $host worker node role."
  1673. command::exec "${MGMT_NODE}" "
  1674. kubectl get node --selector='!node-role.kubernetes.io/master' | grep '<none>' | awk '{print \"kubectl label node \" \$1 \" node-role.kubernetes.io/worker= --overwrite\" }' | bash
  1675. "
  1676. check::exit_code "$?" "kubeadm join" "set $host worker node role"
  1677. done
  1678. }
  1679. # 等待资源完成
  1680. function kube::wait() {
  1681. local app=$1
  1682. local namespace=$2
  1683. local resource=$3
  1684. local selector=${4:-}
  1685. sleep 3
  1686. log::info "[waiting]" "waiting $app"
  1687. command::exec "${MGMT_NODE}" "
  1688. $(declare -f utils::retry)
  1689. utils::retry 6 kubectl wait --namespace ${namespace} \
  1690. --for=condition=ready ${resource} \
  1691. --selector=$selector \
  1692. --timeout=60s
  1693. "
  1694. local status="$?"
  1695. check::exit_code "$status" "waiting" "$app ${resource} ready"
  1696. return "$status"
  1697. }
  1698. # 应用manifest
  1699. function kube::apply() {
  1700. local file=$1
  1701. log::info "[apply]" "$file"
  1702. command::exec "${MGMT_NODE}" "
  1703. $(declare -f utils::retry)
  1704. if [ -f \"$file\" ]; then
  1705. utils::retry 6 kubectl apply --wait=true --timeout=10s -f \"$file\"
  1706. else
  1707. utils::retry 6 \"cat <<EOF | kubectl apply --wait=true --timeout=10s -f -
  1708. \$(printf \"%s\" \"${2:-}\")
  1709. EOF
  1710. \"
  1711. fi
  1712. "
  1713. local status="$?"
  1714. check::exit_code "$status" "apply" "add $file" "exit"
  1715. return "$status"
  1716. }
  1717. # 集群状态
  1718. function kube::status() {
  1719. sleep 5
  1720. log::info "[cluster]" "cluster status"
  1721. command::exec "${MGMT_NODE}" "
  1722. echo
  1723. kubectl get node -o wide
  1724. echo
  1725. kubectl get pods -A
  1726. " && printf "%s" "${COMMAND_OUTPUT}"
  1727. }
  1728. # 添加或删除haproxy的后端server
  1729. function config::haproxy_backend() {
  1730. local action=${1:-add}
  1731. local action_cmd=""
  1732. local master_nodes
  1733. if [[ "$MASTER_NODES" == "" || "$MASTER_NODES" == "127.0.0.1" ]]; then
  1734. return
  1735. fi
  1736. command::exec "${MGMT_NODE}" "
  1737. kubectl get node --selector='node-role.kubernetes.io/master' -o jsonpath='{\$.items[*].status.addresses[?(@.type==\"InternalIP\")].address}'
  1738. "
  1739. get::command_output "master_nodes" "$?" "exit"
  1740. for m in $MASTER_NODES
  1741. do
  1742. if [[ "${action}" == "add" ]]; then
  1743. num=$(echo "${m}"| awk -F'.' '{print $4}')
  1744. action_cmd="${action_cmd}\necho \" server apiserver${num} ${m}:6443 check\" >> /etc/haproxy/haproxy.cfg"
  1745. else
  1746. [[ "${master_nodes}" == *"${m}"* ]] || return
  1747. action_cmd="${action_cmd}\n sed -i -e \"/${m}/d\" /etc/haproxy/haproxy.cfg"
  1748. fi
  1749. done
  1750. command::exec "${MGMT_NODE}" "
  1751. kubectl get node --selector='!node-role.kubernetes.io/master' -o jsonpath='{\$.items[*].status.addresses[?(@.type==\"InternalIP\")].address}'
  1752. "
  1753. get::command_output "worker_nodes" "$?"
  1754. for host in ${worker_nodes:-}
  1755. do
  1756. log::info "[config]" "worker ${host}: ${action} apiserver from haproxy"
  1757. command::exec "${host}" "
  1758. $(echo -ne "${action_cmd}")
  1759. haproxy -c -f /etc/haproxy/haproxy.cfg && systemctl reload haproxy
  1760. "
  1761. check::exit_code "$?" "config" "worker ${host}: ${action} apiserver(${m}) from haproxy"
  1762. done
  1763. }
  1764. # 更新 etcd 备份副本
  1765. function config::etcd_snapshot() {
  1766. command::exec "${MGMT_NODE}" "
  1767. count=\$(kubectl get node --selector='node-role.kubernetes.io/master' --no-headers | wc -l)
  1768. kubectl -n kube-system patch cronjobs etcd-snapshot --patch \"
  1769. spec:
  1770. jobTemplate:
  1771. spec:
  1772. completions: \${count:-1}
  1773. parallelism: \${count:-1}
  1774. \"
  1775. "
  1776. check::exit_code "$?" "config" "etcd-snapshot completions options"
  1777. }
  1778. # 获取命令的返回值
  1779. function get::command_output() {
  1780. local app="$1"
  1781. local status="$2"
  1782. local is_exit="${3:-}"
  1783. if [[ "$status" == "0" && "${COMMAND_OUTPUT}" != "" ]]; then
  1784. log::info "[command]" "get $app value succeeded."
  1785. eval "$app=\"${COMMAND_OUTPUT}\""
  1786. else
  1787. log::error "[command]" "get $app value failed."
  1788. [[ "$is_exit" == "exit" ]] && exit "$status"
  1789. fi
  1790. return "$status"
  1791. }
  1792. # 获取ingress连接地址
  1793. function get::ingress_conn() {
  1794. local port="${1:-80}"
  1795. local ingress_name="${2:-ingress-${KUBE_INGRESS}-controller}"
  1796. command::exec "${MGMT_NODE}" "
  1797. kubectl get node -o jsonpath='{range .items[*]}{ .status.addresses[?(@.type==\"InternalIP\")].address} {.status.conditions[?(@.status == \"True\")].status}{\"\\n\"}{end}' | awk '{if(\$2==\"True\")a=\$1}END{print a}'
  1798. "
  1799. get::command_output "node_ip" "$?"
  1800. command::exec "${MGMT_NODE}" "
  1801. kubectl get svc --all-namespaces -o go-template=\"{{range .items}}{{if eq .metadata.name \\\"${ingress_name}\\\"}}{{range.spec.ports}}{{if eq .port ${port}}}{{.nodePort}}{{end}}{{end}}{{end}}{{end}}\"
  1802. "
  1803. get::command_output "node_port" "$?"
  1804. INGRESS_CONN="${node_ip:-nodeIP}:${node_port:-nodePort}"
  1805. }
  1806. ######################################################################################################
  1807. # 主调用逻辑
  1808. ######################################################################################################
  1809. # 添加network组件
  1810. function add::network() {
  1811. if [[ "$KUBE_NETWORK" == "flannel" ]]; then
  1812. log::info "[network]" "add flannel"
  1813. local flannel_file="${OFFLINE_DIR}/manifests/kube-flannel.yml"
  1814. utils::download_file "https://cdn.jsdelivr.net/gh/coreos/flannel@v${FLANNEL_VERSION}/Documentation/kube-flannel.yml" "${flannel_file}"
  1815. command::exec "${MGMT_NODE}" "
  1816. sed -i -e 's#10.244.0.0/16#${KUBE_POD_SUBNET}#g' \
  1817. -e 's#quay.io/coreos#${KUBE_IMAGE_REPO}#g' \
  1818. -e 's#\"Type\": \"vxlan\"#\"Type\": \"${KUBE_FLANNEL_TYPE}\"#g' \"${flannel_file}\"
  1819. if [[ \"${KUBE_FLANNEL_TYPE}\" == \"vxlan\" ]]; then
  1820. sed -i 's#\"Type\": \"vxlan\"#\"Type\": \"vxlan\", \"DirectRouting\": true#g' \"${flannel_file}\"
  1821. fi
  1822. "
  1823. check::exit_code "$?" "flannel" "change flannel pod subnet"
  1824. kube::apply "${flannel_file}"
  1825. kube::wait "flannel" "kube-system" "pods" "app=flannel"
  1826. elif [[ "$KUBE_NETWORK" == "calico" ]]; then
  1827. log::info "[network]" "add calico"
  1828. utils::download_file "https://projectcalico.docs.tigera.io/archive/v${CALICO_VERSION%.*}/manifests/calico.yaml" "${OFFLINE_DIR}/manifests/calico.yaml"
  1829. utils::download_file "https://projectcalico.docs.tigera.io/archive/v${CALICO_VERSION%.*}/manifests/calicoctl.yaml" "${OFFLINE_DIR}/manifests/calicoctl.yaml"
  1830. command::exec "${MGMT_NODE}" "
  1831. sed -i \"s#:v.*#:v${CALICO_VERSION}#g\" \"${OFFLINE_DIR}/manifests/calico.yaml\"
  1832. sed -i 's#value: \"Always\"#value: \"CrossSubnet\"#g' \"${OFFLINE_DIR}/manifests/calico.yaml\"
  1833. sed -i \"s#:v.*#:v${CALICO_VERSION}#g\" \"${OFFLINE_DIR}/manifests/calicoctl.yaml\"
  1834. "
  1835. check::exit_code "$?" "network" "change calico version to ${CALICO_VERSION}"
  1836. kube::apply "${OFFLINE_DIR}/manifests/calico.yaml"
  1837. kube::apply "${OFFLINE_DIR}/manifests/calicoctl.yaml"
  1838. kube::wait "calico-kube-controllers" "kube-system" "pods" "k8s-app=calico-kube-controllers"
  1839. kube::wait "calico-node" "kube-system" "pods" "k8s-app=calico-node"
  1840. elif [[ "$KUBE_NETWORK" == "cilium" ]]; then
  1841. log::info "[network]" "add cilium"
  1842. local cilium_file="${OFFLINE_DIR}/manifests/cilium.yml"
  1843. local cilium_hubble_file="${OFFLINE_DIR}/manifests/cilium_hubble.yml"
  1844. utils::download_file "https://cdn.jsdelivr.net/gh/cilium/cilium@${CILIUM_VERSION}/install/kubernetes/quick-install.yaml" "${cilium_file}"
  1845. utils::download_file "https://cdn.jsdelivr.net/gh/cilium/cilium@${CILIUM_VERSION}/install/kubernetes/quick-hubble-install.yaml" "${cilium_hubble_file}"
  1846. local all_node=""
  1847. if [[ "${MASTER_NODES}" == "" && "${WORKER_NODES}" == "" ]]; then
  1848. command::exec "${MGMT_NODE}" "
  1849. kubectl get node -o jsonpath='{range.items[*]}{.status.addresses[?(@.type==\"InternalIP\")].address} {end}'
  1850. "
  1851. get::command_output "all_node" "$?"
  1852. else
  1853. all_node="${MASTER_NODES} ${WORKER_NODES}"
  1854. fi
  1855. for host in $all_node
  1856. do
  1857. command::exec "${host}" "mount bpffs -t bpf /sys/fs/bpf"
  1858. check::exit_code "$?" "network" "${host}: mount bpf filesystem"
  1859. done
  1860. command::exec "${MGMT_NODE}" "
  1861. sed -i \"s#10.0.0.0/8#${KUBE_POD_SUBNET}#g\" \"${cilium_file}\"
  1862. "
  1863. kube::apply "${cilium_file}"
  1864. kube::wait "cilium-node" "kube-system" "pods" "k8s-app=cilium"
  1865. kube::wait "cilium-operator" "kube-system" "pods" "name=cilium-operator"
  1866. kube::apply "${cilium_hubble_file}"
  1867. kube::wait "hubble-relay" "kube-system" "pods" "k8s-app=hubble-relay"
  1868. log::info "[monitor]" "add hubble-ui ingress"
  1869. kube::apply "hubble-ui ingress" "
  1870. ---
  1871. apiVersion: networking.k8s.io/v1
  1872. kind: Ingress
  1873. metadata:
  1874. name: hubble-ui
  1875. namespace: kube-system
  1876. annotations:
  1877. kubernetes.io/ingress.class: ${KUBE_INGRESS}
  1878. spec:
  1879. rules:
  1880. - host: hubble-ui.cluster.local
  1881. http:
  1882. paths:
  1883. - path: /
  1884. pathType: Prefix
  1885. backend:
  1886. service:
  1887. name: hubble-ui
  1888. port:
  1889. number: 80
  1890. "
  1891. # shellcheck disable=SC2181
  1892. if [[ "$?" == "0" ]]; then
  1893. get::ingress_conn
  1894. log::access "[ingress]" "curl -H 'Host:hubble-ui.cluster.local' http://${INGRESS_CONN}"
  1895. fi
  1896. else
  1897. log::warning "[network]" "No $KUBE_NETWORK config."
  1898. fi
  1899. }
  1900. # 添加ingress组件
  1901. function add::ingress() {
  1902. # 安装 ingress-nginx
  1903. log::info "[ingress]" "add ingress-nginx"
  1904. command::exec "${MGMT_NODE}" "
  1905. $(declare -f utils::retry)
  1906. helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
  1907. utils::retry 6 helm install ingress-nginx ingress-nginx/ingress-nginx \
  1908. --namespace ingress-nginx --create-namespace \
  1909. --version ${INGRESS_NGINX} \
  1910. --set controller.admissionWebhooks.patch.image.registry=registry.hub.docker.com \
  1911. --set controller.admissionWebhooks.patch.image.image=k8sgcrioingressnginx/kube-webhook-certgen \
  1912. --set controller.admissionWebhooks.patch.image.digest= \
  1913. --set controller.admissionWebhooks.enabled=true \
  1914. --set controller.admissionWebhooks.extraEnvs\[0\].name=\"TZ\" \
  1915. --set controller.admissionWebhooks.extraEnvs\[0\].value=\"Asia/Shanghai\" \
  1916. --set controller.kind=DaemonSet \
  1917. --set controller.replicaCount=1 \
  1918. --set controller.minAvailable=1 \
  1919. --set controller.image.registry=registry.hub.docker.com \
  1920. --set controller.image.image=k8sgcrioingressnginx/controller \
  1921. --set controller.image.digest= \
  1922. --set controller.ingressClassResource.name=nginx \
  1923. --set controller.ingressClassResource.enable=true \
  1924. --set controller.ingressClassResource.default=false \
  1925. --set controller.service.enabled=true \
  1926. --set controller.service.type=NodePort \
  1927. --set controller.service.enableHttp=true \
  1928. --set controller.service.enableHttps=true \
  1929. --set controller.service.nodePorts.http=30080 \
  1930. --set controller.service.nodePorts.https=30443 \
  1931. --set controller.extraEnvs\[0\].name=\"TZ\" \
  1932. --set controller.extraEnvs\[0\].value=\"Asia/Shanghai\" \
  1933. --set defaultBackend.image.registry=registry.hub.docker.com \
  1934. --set defaultBackend.image.image=gcmirrors/defaultbackend-amd64 \
  1935. --set defaultBackend.enabled=true \
  1936. --set defaultBackend.name=defaultbackend \
  1937. --set defaultBackend.replicaCount=1 \
  1938. --set defaultBackend.minAvailable=1 \
  1939. --set defaultBackend.extraEnvs\[0\].name=\"TZ\" \
  1940. --set defaultBackend.extraEnvs\[0\].value=\"Asia/Shanghai\" \
  1941. --set rbac.create=true \
  1942. --set serviceAccount.create=true \
  1943. --set podSecurityPolicy.enabled=true
  1944. kubectl get pod -n ingress-nginx -o wide
  1945. kubectl get svc -n ingress-nginx -o wide
  1946. "
  1947. # 安装 nginx
  1948. log::info "[nginx]" "add nginx"
  1949. command::exec "${MGMT_NODE}" "
  1950. sudo yum -y install nginx
  1951. nginx -v
  1952. sudo systemctl enable nginx
  1953. sudo service nginx start
  1954. cat << EOF > /etc/nginx/conf.d/k8s.ingress.conf
  1955. upstream k8s-ingress {
  1956. $(for h in $MASTER_NODES $WORKER_NODES;do echo " server $h:30080 max_fails=1 fail_timeout=15s;";done)
  1957. keepalive 128;
  1958. }
  1959. server {
  1960. listen ${NGINX_HTTP_PORT};
  1961. location / {
  1962. proxy_http_version 1.1;
  1963. proxy_set_header Connection \"\";
  1964. proxy_next_upstream error;
  1965. proxy_set_header X-Real-IP \\\$remote_addr;
  1966. proxy_set_header X-Forwarded-For \\\$proxy_add_x_forwarded_for;
  1967. proxy_set_header Host \\\$http_host;
  1968. proxy_set_header X-Nginx-Proxy true;
  1969. proxy_pass http://k8s-ingress/;
  1970. }
  1971. }
  1972. EOF
  1973. sudo nginx -s reload
  1974. "
  1975. }
  1976. # 添加addon组件
  1977. function add::addon() {
  1978. # TODO add addon
  1979. log::warning "[TODO]" "add addon"
  1980. }
  1981. # 添加监控组件
  1982. function add::monitor() {
  1983. # TODO add monitor
  1984. log::warning "[TODO]" "add monitor"
  1985. }
  1986. # 添加log组件
  1987. function add::log() {
  1988. # TODO add log
  1989. log::warning "[TODO]" "add log"
  1990. }
  1991. # 添加存储
  1992. function add::storage() {
  1993. # TODO add storage
  1994. log::warning "[TODO]" "add storage"
  1995. }
  1996. # 添加用户界面
  1997. function add::ui() {
  1998. local path="/tmp"
  1999. # 安装 rancher
  2000. log::info "[rancher]" "add rancher"
  2001. command::exec "${MGMT_NODE}" "
  2002. $(declare -f utils::retry)
  2003. cd ${path}
  2004. helm repo add rancher-stable https://releases.rancher.com/server-charts/stable
  2005. utils::retry 6 helm pull rancher-stable/rancher --version ${RANCHER_VERSION} --untar
  2006. cat << EOF > rancher/templates/service.yaml
  2007. apiVersion: v1
  2008. kind: Service
  2009. metadata:
  2010. name: {{ template \"rancher.fullname\" . }}
  2011. labels:
  2012. {{ include \"rancher.labels\" . | indent 4 }}
  2013. spec:
  2014. type: NodePort
  2015. ports:
  2016. - port: 80
  2017. targetPort: 80
  2018. protocol: TCP
  2019. name: http
  2020. # 使用nodePort端口
  2021. nodePort: 31080
  2022. - port: 443
  2023. targetPort: 444
  2024. protocol: TCP
  2025. name: https-internal
  2026. # 使用nodePort端口
  2027. nodePort: 31443
  2028. selector:
  2029. app: {{ template \"rancher.fullname\" . }}
  2030. EOF
  2031. helm install rancher ./rancher \
  2032. --namespace cattle-system --create-namespace \
  2033. --set replicas=1 \
  2034. --set extraEnv\[0\].name=\"TZ\" \
  2035. --set extraEnv\[0\].value=\"Asia/Shanghai\" \
  2036. --set ingress.tls.source=secret \
  2037. --set ingress.enabled=false
  2038. "
  2039. log::info "[rancher]" "获取初始密码 kubectl get secret --namespace cattle-system bootstrap-secret -o go-template='{{.data.bootstrapPassword|base64decode}}{{ \"\n\" }}'"
  2040. log::info "[rancher]" "重置初始密码 kubectl -n cattle-system exec \$(kubectl -n cattle-system get pods -l app=rancher | grep '1/1' | head -1 | awk '{ print \$1 }') -- reset-password"
  2041. }
  2042. # 运维操作
  2043. function add::ops() {
  2044. local master_num
  2045. master_num=$(awk '{print NF}' <<< "${MASTER_NODES}")
  2046. log::info "[ops]" "add anti-affinity strategy to coredns"
  2047. command::exec "${MGMT_NODE}" """
  2048. kubectl -n kube-system patch deployment coredns --patch '{\"spec\": {\"template\": {\"spec\": {\"affinity\":{\"podAntiAffinity\":{\"preferredDuringSchedulingIgnoredDuringExecution\":[{\"weight\":100,\"podAffinityTerm\":{\"labelSelector\":{\"matchExpressions\":[{\"key\":\"k8s-app\",\"operator\":\"In\",\"values\":[\"kube-dns\"]}]},\"topologyKey\":\"kubernetes.io/hostname\"}}]}}}}}}' --record
  2049. """
  2050. check::exit_code "$?" "ops" "add anti-affinity strategy to coredns"
  2051. log::info "[ops]" "add etcd snapshot cronjob"
  2052. command::exec "${MGMT_NODE}" "
  2053. kubeadm config images list --config=/etc/kubernetes/kubeadmcfg.yaml 2>/dev/null | grep etcd:
  2054. "
  2055. get::command_output "etcd_image" "$?"
  2056. command::exec "${MGMT_NODE}" "
  2057. kubectl get node --selector='node-role.kubernetes.io/master' --no-headers | wc -l
  2058. "
  2059. get::command_output "master_num" "$?"
  2060. [[ "${master_num:-0}" == "0" ]] && master_num=1
  2061. kube::apply "etcd-snapshot" """
  2062. ---
  2063. apiVersion: batch/v1beta1
  2064. kind: CronJob
  2065. metadata:
  2066. name: etcd-snapshot
  2067. namespace: kube-system
  2068. spec:
  2069. schedule: '0 */6 * * *'
  2070. successfulJobsHistoryLimit: 3
  2071. suspend: false
  2072. concurrencyPolicy: Allow
  2073. failedJobsHistoryLimit: 3
  2074. jobTemplate:
  2075. spec:
  2076. backoffLimit: 6
  2077. parallelism: ${master_num}
  2078. completions: ${master_num}
  2079. template:
  2080. metadata:
  2081. labels:
  2082. app: etcd-snapshot
  2083. spec:
  2084. affinity:
  2085. podAntiAffinity:
  2086. requiredDuringSchedulingIgnoredDuringExecution:
  2087. - labelSelector:
  2088. matchExpressions:
  2089. - key: app
  2090. operator: In
  2091. values:
  2092. - etcd-snapshot
  2093. topologyKey: 'kubernetes.io/hostname'
  2094. containers:
  2095. - name: etcd-snapshot
  2096. image: ${etcd_image:-${KUBE_IMAGE_REPO}/etcd:3.4.13-0}
  2097. imagePullPolicy: IfNotPresent
  2098. args:
  2099. - -c
  2100. - etcdctl --endpoints=https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt
  2101. --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key
  2102. snapshot save /backup/etcd-snapshot-\\\\\\\$(date +%Y-%m-%d_%H:%M:%S_%Z).db
  2103. && echo 'delete old backups' && { find /backup -type f -mtime +30 -exec rm -fv {} \\; || echo error; }
  2104. command:
  2105. - /usr/bin/bash
  2106. env:
  2107. - name: ETCDCTL_API
  2108. value: '3'
  2109. resources: {}
  2110. terminationMessagePath: /dev/termination-log
  2111. terminationMessagePolicy: File
  2112. volumeMounts:
  2113. - name: etcd-certs
  2114. mountPath: /etc/kubernetes/pki/etcd
  2115. readOnly: true
  2116. - name: backup
  2117. mountPath: /backup
  2118. - name: etc
  2119. mountPath: /etc
  2120. - name: bin
  2121. mountPath: /usr/bin
  2122. - name: lib64
  2123. mountPath: /lib64
  2124. dnsPolicy: ClusterFirst
  2125. hostNetwork: true
  2126. nodeSelector:
  2127. node-role.kubernetes.io/master: ''
  2128. tolerations:
  2129. - effect: NoSchedule
  2130. operator: Exists
  2131. restartPolicy: OnFailure
  2132. schedulerName: default-scheduler
  2133. securityContext: {}
  2134. terminationGracePeriodSeconds: 30
  2135. volumes:
  2136. - name: etcd-certs
  2137. hostPath:
  2138. path: /etc/kubernetes/pki/etcd
  2139. type: DirectoryOrCreate
  2140. - name: backup
  2141. hostPath:
  2142. path: /var/lib/etcd/backups
  2143. type: DirectoryOrCreate
  2144. - name: etc
  2145. hostPath:
  2146. path: /etc
  2147. - name: bin
  2148. hostPath:
  2149. path: /usr/bin
  2150. - name: lib64
  2151. hostPath:
  2152. path: /lib64
  2153. """
  2154. # shellcheck disable=SC2181
  2155. [[ "$?" == "0" ]] && log::access "[ops]" "etcd backup directory: /var/lib/etcd/backups"
  2156. command::exec "${MGMT_NODE}" "
  2157. jobname=\"etcd-snapshot-$(date +%s)\"
  2158. kubectl create job --from=cronjob/etcd-snapshot \${jobname} -n kube-system && \
  2159. kubectl wait --for=condition=complete job/\${jobname} -n kube-system
  2160. "
  2161. check::exit_code "$?" "ops" "trigger etcd backup"
  2162. }
  2163. # 重置节点
  2164. function reset::node() {
  2165. local host=$1
  2166. log::info "[reset]" "node $host"
  2167. command::exec "${host}" "
  2168. set +ex
  2169. cri_socket=\"\"
  2170. [ -S /var/run/crio/crio.sock ] && cri_socket=\"--cri-socket /var/run/crio/crio.sock\"
  2171. [ -S /run/containerd/containerd.sock ] && cri_socket=\"--cri-socket /run/containerd/containerd.sock\"
  2172. kubeadm reset -f \$cri_socket
  2173. [ -f \"\$(which kubelet)\" ] && { systemctl stop kubelet; find /var/lib/kubelet | xargs -n 1 findmnt -n -o TARGET -T | sort | uniq | xargs -r umount -v; yum remove -y kubeadm kubelet kubectl; }
  2174. [ -d /etc/kubernetes ] && rm -rf /etc/kubernetes/* /var/lib/kubelet/* /var/lib/etcd/* \$HOME/.kube /etc/cni/net.d/* /var/lib/dockershim/* /var/lib/cni/* /var/run/kubernetes/*
  2175. [ -f \"\$(which docker)\" ] && { docker rm -f -v \$(docker ps | grep kube | awk '{print \$1}'); systemctl stop docker; rm -rf \$HOME/.docker /etc/docker/* /var/lib/docker/*; yum remove -y docker; }
  2176. [ -f \"\$(which containerd)\" ] && { crictl rm \$(crictl ps -a -q); systemctl stop containerd; rm -rf /etc/containerd/* /var/lib/containerd/*; yum remove -y containerd.io; }
  2177. [ -f \"\$(which crio)\" ] && { crictl rm \$(crictl ps -a -q); systemctl stop crio; rm -rf /etc/crictl.yaml /etc/crio/* /var/run/crio/*; yum remove -y cri-o; }
  2178. [ -f \"\$(which runc)\" ] && { find /run/containers/ /var/lib/containers/ | xargs -n 1 findmnt -n -o TARGET -T | sort | uniq | xargs -r umount -v; rm -rf /var/lib/containers/* /var/run/containers/*; yum remove -y runc; }
  2179. [ -f \"\$(which haproxy)\" ] && { systemctl stop haproxy; rm -rf /etc/haproxy/*; yum remove -y haproxy; }
  2180. sed -i -e \"/$KUBE_APISERVER/d\" -e '/-worker-/d' -e '/-master-/d' /etc/hosts
  2181. sed -i '/## Kainstall managed start/,/## Kainstall managed end/d' /etc/security/limits.conf /etc/systemd/system.conf /etc/bashrc /etc/rc.local /etc/audit/rules.d/audit.rules
  2182. [ -d /var/lib/elasticsearch ] && rm -rf /var/lib/elasticsearch/*
  2183. [ -d /var/lib/longhorn ] && rm -rf /var/lib/longhorn/*
  2184. [ -d \"${OFFLINE_DIR:-/tmp/abc}\" ] && rm -rf \"${OFFLINE_DIR:-/tmp/abc}\"
  2185. for repo in kubernetes.repo docker-ce.repo devel_kubic_libcontainers_stable.repo elrepo.repo
  2186. do
  2187. [ -f /etc/yum.repos.d/\${repo} ] && rm -f /etc/yum.repos.d/\${repo}
  2188. done
  2189. ipvsadm --clear
  2190. iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
  2191. for int in kube-ipvs0 cni0 docker0 dummy0 flannel.1 cilium_host cilium_net cilium_vxlan lxc_health nodelocaldns
  2192. do
  2193. [ -d /sys/class/net/\${int} ] && ip link delete \${int}
  2194. done
  2195. modprobe -r ipip
  2196. echo done.
  2197. "
  2198. check::exit_code "$?" "reset" "$host: reset"
  2199. }
  2200. # 重置所有节点
  2201. function reset::cluster() {
  2202. local all_node=""
  2203. command::exec "${MGMT_NODE}" "
  2204. kubectl get node -o jsonpath='{range.items[*]}{.status.addresses[?(@.type==\"InternalIP\")].address} {end}'
  2205. "
  2206. get::command_output "all_node" "$?"
  2207. all_node=$(echo "${WORKER_NODES} ${MASTER_NODES} ${all_node}" | awk '{for (i=1;i<=NF;i++) if (!a[$i]++) printf("%s%s",$i,FS)}')
  2208. for host in $all_node
  2209. do
  2210. reset::node "$host"
  2211. done
  2212. }
  2213. # 节点加载离线包
  2214. function offline::load() {
  2215. local role="${1:-}"
  2216. local hosts=""
  2217. if [[ "${role}" == "master" ]]; then
  2218. hosts="${MASTER_NODES}"
  2219. elif [[ "${role}" == "worker" ]]; then
  2220. hosts="${WORKER_NODES}"
  2221. fi
  2222. for host in ${hosts}
  2223. do
  2224. log::info "[offline]" "${role} ${host}: load offline file"
  2225. command::exec "${host}" "[[ ! -d \"${OFFLINE_DIR}\" ]] && { mkdir -pv \"${OFFLINE_DIR}\"; chmod 777 \"${OFFLINE_DIR}\"; } ||:"
  2226. check::exit_code "$?" "offline" "$host: mkdir offline dir" "exit"
  2227. if [[ "${UPGRADE_KERNEL_TAG:-}" == "1" ]]; then
  2228. command::scp "${host}" "${TMP_DIR}/packages/kernel/*" "${OFFLINE_DIR}"
  2229. check::exit_code "$?" "offline" "scp kernel file to $host" "exit"
  2230. else
  2231. log::info "[offline]" "${role} ${host}: copy offline file"
  2232. command::scp "${host}" "${TMP_DIR}/packages/kubeadm/*" "${OFFLINE_DIR}"
  2233. check::exit_code "$?" "offline" "scp kube file to $host" "exit"
  2234. command::scp "${host}" "${TMP_DIR}/packages/all/*" "${OFFLINE_DIR}"
  2235. check::exit_code "$?" "offline" "scp all file to $host" "exit"
  2236. if [[ "${role}" == "worker" ]]; then
  2237. command::scp "${host}" "${TMP_DIR}/packages/worker/*" "${OFFLINE_DIR}"
  2238. check::exit_code "$?" "offline" "scp worker file to $host" "exit"
  2239. fi
  2240. command::scp "${host}" "${TMP_DIR}/images/${role}.tgz" "${OFFLINE_DIR}"
  2241. check::exit_code "$?" "offline" "scp ${role} images to $host" "exit"
  2242. command::scp "${host}" "${TMP_DIR}/images/all.tgz" "${OFFLINE_DIR}"
  2243. check::exit_code "$?" "offline" "scp all images to $host" "exit"
  2244. fi
  2245. log::info "[offline]" "${role} ${host}: install package"
  2246. command::exec "${host}" "yum localinstall -y --skip-broken ${OFFLINE_DIR}/*.rpm"
  2247. check::exit_code "$?" "offline" "${role} ${host}: install package" "exit"
  2248. if [[ "${UPGRADE_KERNEL_TAG:-}" != "1" ]]; then
  2249. command::exec "${host}" "
  2250. set -e
  2251. for target in firewalld python-firewall firewalld-filesystem iptables; do
  2252. systemctl stop \$target &>/dev/null || true
  2253. systemctl disable \$target &>/dev/null || true
  2254. done
  2255. systemctl start docker && \
  2256. cd ${OFFLINE_DIR} && \
  2257. gzip -d -c ${1}.tgz | docker load && gzip -d -c all.tgz | docker load
  2258. "
  2259. check::exit_code "$?" "offline" "$host: load images" "exit"
  2260. fi
  2261. command::exec "${host}" "rm -rf ${OFFLINE_DIR:-/tmp/abc}"
  2262. check::exit_code "$?" "offline" "$host: clean offline file"
  2263. done
  2264. command::scp "${MGMT_NODE}" "${TMP_DIR}/manifests" "${OFFLINE_DIR}"
  2265. check::exit_code "$?" "offline" "scp manifests file to ${MGMT_NODE}" "exit"
  2266. command::scp "${MGMT_NODE}" "${TMP_DIR}/bins" "${OFFLINE_DIR}"
  2267. check::exit_code "$?" "offline" "scp bins file to ${MGMT_NODE}" "exit"
  2268. }
  2269. # 集群节点加载离线包
  2270. function offline::cluster() {
  2271. [ ! -f "${OFFLINE_FILE}" ] && { log::error "[offline]" "not found ${OFFLINE_FILE}" ; exit 1; }
  2272. log::info "[offline]" "Unzip offline package on local."
  2273. tar zxf "${OFFLINE_FILE}" -C "${TMP_DIR}/"
  2274. check::exit_code "$?" "offline" "Unzip offline package"
  2275. offline::load "master"
  2276. offline::load "worker"
  2277. }
  2278. # 初始化集群
  2279. function init::cluster() {
  2280. MGMT_NODE=$(echo "${MASTER_NODES}" | awk '{print $1}')
  2281. # 加载离线包
  2282. [[ "${OFFLINE_TAG:-}" == "1" ]] && offline::cluster
  2283. # 1. 初始化节点
  2284. #init::node
  2285. # 2. 安装包
  2286. #install::package
  2287. # 3. 初始化kubeadm
  2288. #kubeadm::init
  2289. # 4. 加入集群
  2290. #kubeadm::join
  2291. # 5. 添加network
  2292. #add::network
  2293. # 6. 安装addon
  2294. #add::addon
  2295. # 7. 添加ingress
  2296. #add::ingress
  2297. # 8. 添加storage
  2298. [[ "${STORAGE_TAG:-}" == "1" ]] && add::storage
  2299. # 9. 添加web ui
  2300. add::ui
  2301. # 10. 添加monitor
  2302. [[ "${MONITOR_TAG:-}" == "1" ]] && add::monitor
  2303. # 11. 添加log
  2304. [[ "${LOG_TAG:-}" == "1" ]] && add::log
  2305. # 12. 运维操作
  2306. #add::ops
  2307. # 13. 查看集群状态
  2308. kube::status
  2309. }
  2310. # 添加节点
  2311. function add::node() {
  2312. # 加载离线包
  2313. [[ "${OFFLINE_TAG:-}" == "1" ]] && offline::cluster
  2314. # KUBE_VERSION未指定时,获取集群的版本
  2315. if [[ "${KUBE_VERSION}" == "" || "${KUBE_VERSION}" == "latest" ]]; then
  2316. command::exec "${MGMT_NODE}" "
  2317. kubectl get node --selector='node-role.kubernetes.io/master' -o jsonpath='{range.items[*]}{.status.nodeInfo.kubeletVersion } {end}' | awk -F'v| ' '{print \$2}'
  2318. "
  2319. get::command_output "KUBE_VERSION" "$?" "exit"
  2320. fi
  2321. # 1. 初始化节点
  2322. init::add_node
  2323. # 2. 安装包
  2324. install::package
  2325. # 3. 加入集群
  2326. kubeadm::join
  2327. # 4. haproxy添加apiserver
  2328. config::haproxy_backend "add"
  2329. # 5. 更新 etcd snapshot 副本
  2330. config::etcd_snapshot
  2331. # 6. 查看集群状态
  2332. kube::status
  2333. }
  2334. # 删除节点
  2335. function del::node() {
  2336. config::haproxy_backend "remove"
  2337. local cluster_nodes=""
  2338. local del_hosts_cmd=""
  2339. command::exec "${MGMT_NODE}" "
  2340. kubectl get node -o jsonpath='{range.items[*]}{.status.addresses[?(@.type==\"InternalIP\")].address} {.metadata.name }\\n{end}'
  2341. "
  2342. get::command_output "cluster_nodes" "$?" exit
  2343. for host in $MASTER_NODES
  2344. do
  2345. command::exec "${MGMT_NODE}" "
  2346. etcd_pod=\$(kubectl -n kube-system get pods -l component=etcd --field-selector=status.phase=Running -o jsonpath='{\$.items[0].metadata.name}')
  2347. etcd_node=\$(kubectl -n kube-system exec \$etcd_pod -- sh -c \"export ETCDCTL_API=3 ETCDCTL_CACERT=/etc/kubernetes/pki/etcd/ca.crt ETCDCTL_CERT=/etc/kubernetes/pki/etcd/server.crt ETCDCTL_KEY=/etc/kubernetes/pki/etcd/server.key ETCDCTL_ENDPOINTS=https://127.0.0.1:2379; etcdctl member list\"| grep $host | awk -F, '{print \$1}')
  2348. echo \"\$etcd_pod \$etcd_node\"
  2349. kubectl -n kube-system exec \$etcd_pod -- sh -c \"export ETCDCTL_API=3 ETCDCTL_CACERT=/etc/kubernetes/pki/etcd/ca.crt ETCDCTL_CERT=/etc/kubernetes/pki/etcd/server.crt ETCDCTL_KEY=/etc/kubernetes/pki/etcd/server.key ETCDCTL_ENDPOINTS=https://127.0.0.1:2379; etcdctl member remove \$etcd_node; etcdctl member list\"
  2350. "
  2351. check::exit_code "$?" "del" "remove $host etcd member"
  2352. done
  2353. for host in $MASTER_NODES $WORKER_NODES
  2354. do
  2355. log::info "[del]" "node $host"
  2356. local node_name; node_name=$(echo -ne "${cluster_nodes}" | grep "${host}" | awk '{print $2}')
  2357. if [[ "${node_name}" == "" ]]; then
  2358. log::warning "[del]" "node $host not found."
  2359. read -r -t 10 -n 1 -p "Do you need to reset the node (y/n)? " answer
  2360. [[ -z "$answer" || "$answer" != "y" ]] && exit || echo
  2361. else
  2362. log::info "[del]" "drain $host"
  2363. command::exec "${MGMT_NODE}" "kubectl drain $node_name --force --ignore-daemonsets --delete-local-data"
  2364. check::exit_code "$?" "del" "$host: drain"
  2365. log::info "[del]" "delete node $host"
  2366. command::exec "${MGMT_NODE}" "kubectl delete node $node_name"
  2367. check::exit_code "$?" "del" "$host: delete"
  2368. sleep 3
  2369. fi
  2370. reset::node "$host"
  2371. del_hosts_cmd="${del_hosts_cmd}\nsed -i "/$host/d" /etc/hosts"
  2372. done
  2373. for host in $(echo -ne "${cluster_nodes}" | awk '{print $1}')
  2374. do
  2375. log::info "[del]" "$host: remove del node hostname resolution"
  2376. command::exec "${host}" "
  2377. $(echo -ne "${del_hosts_cmd}")
  2378. "
  2379. check::exit_code "$?" "del" "remove del node hostname resolution"
  2380. done
  2381. [ "$MASTER_NODES" != "" ] && config::etcd_snapshot
  2382. kube::status
  2383. }
  2384. # 升级集群
  2385. function upgrade::cluster() {
  2386. log::info "[upgrade]" "upgrade to $KUBE_VERSION"
  2387. log::info "[upgrade]" "backup cluster"
  2388. add::ops
  2389. local stable_version="2"
  2390. command::exec "127.0.0.1" "wget https://storage.googleapis.com/kubernetes-release/release/stable.txt -q -O -"
  2391. get::command_output "stable_version" "$?" && stable_version="${stable_version#v}"
  2392. local node_hosts="$MASTER_NODES $WORKER_NODES"
  2393. if [[ "$node_hosts" == " " ]]; then
  2394. command::exec "${MGMT_NODE}" "
  2395. kubectl get node -o jsonpath='{range.items[*]}{.metadata.name } {end}'
  2396. "
  2397. get::command_output "node_hosts" "$?" exit
  2398. fi
  2399. local skip_plan=${SKIP_UPGRADE_PLAN,,}
  2400. for host in ${node_hosts}
  2401. do
  2402. log::info "[upgrade]" "node: $host"
  2403. local local_version=""
  2404. command::exec "${host}" "kubectl version --client --short | awk '{print \$3}'"
  2405. get::command_output "local_version" "$?" && local_version="${local_version#v}"
  2406. if [[ "${KUBE_VERSION}" != "latest" ]]; then
  2407. if [[ "${KUBE_VERSION}" == "${local_version}" ]];then
  2408. log::warning "[check]" "The specified version(${KUBE_VERSION}) is consistent with the local version(${local_version})!"
  2409. continue
  2410. fi
  2411. if [[ $(utils::version_to_number "$KUBE_VERSION") -lt $(utils::version_to_number "${local_version}") ]];then
  2412. log::warning "[check]" "The specified version($KUBE_VERSION) is less than the local version(${local_version})!"
  2413. continue
  2414. fi
  2415. if [[ $(utils::version_to_number "$KUBE_VERSION") -gt $(utils::version_to_number "${stable_version}") ]];then
  2416. log::warning "[check]" "The specified version($KUBE_VERSION) is more than the stable version(${stable_version})!"
  2417. continue
  2418. fi
  2419. else
  2420. if [[ $(utils::version_to_number "${local_version}") -ge $(utils::version_to_number "${stable_version}") ]];then
  2421. log::warning "[check]" "The local version($local_version) is greater or equal to the stable version(${stable_version})!"
  2422. continue
  2423. fi
  2424. fi
  2425. command::exec "${MGMT_NODE}" "kubectl drain ${host} --ignore-daemonsets --delete-local-data"
  2426. check::exit_code "$?" "upgrade" "drain ${host} node" "exit"
  2427. sleep 5
  2428. if [[ "${skip_plan}" == "false" ]]; then
  2429. command::exec "${host}" "$(declare -f script::upgrage_kube); script::upgrage_kube 'init' '$KUBE_VERSION'"
  2430. check::exit_code "$?" "upgrade" "plan and upgrade cluster on ${host}" "exit"
  2431. command::exec "${host}" "$(declare -f utils::retry); utils::retry 10 kubectl get node"
  2432. check::exit_code "$?" "upgrade" "${host}: upgrade" "exit"
  2433. skip_plan=true
  2434. else
  2435. command::exec "${host}" "$(declare -f script::upgrage_kube); script::upgrage_kube 'node' '$KUBE_VERSION'"
  2436. check::exit_code "$?" "upgrade" "upgrade ${host} node" "exit"
  2437. fi
  2438. command::exec "${MGMT_NODE}" "kubectl wait --for=condition=Ready node/${host} --timeout=120s"
  2439. check::exit_code "$?" "upgrade" "${host} ready"
  2440. sleep 5
  2441. command::exec "${MGMT_NODE}" "$(declare -f utils::retry); utils::retry 6 kubectl uncordon ${host}"
  2442. check::exit_code "$?" "upgrade" "uncordon ${host} node"
  2443. sleep 5
  2444. done
  2445. kube::status
  2446. }
  2447. # 脚本文件更新
  2448. function update::self() {
  2449. log::info "[update]" "download kainstall script to $0"
  2450. command::exec "127.0.0.1" "
  2451. wget --timeout=10 --waitretry=3 --tries=5 --retry-connrefused https://cdn.jsdelivr.net/gh/lework/kainstall@master/kainstall-centos.sh -O /tmp/kainstall-centos.sh || exit 1
  2452. /bin/mv -fv /tmp/kainstall-centos.sh \"$0\"
  2453. chmod +x \"$0\"
  2454. "
  2455. check::exit_code "$?" "update" "kainstall script"
  2456. }
  2457. # 数据处理及限制
  2458. function transform::data() {
  2459. MASTER_NODES=$(echo "${MASTER_NODES}" | tr ',' ' ')
  2460. WORKER_NODES=$(echo "${WORKER_NODES}" | tr ',' ' ')
  2461. if ! utils::is_element_in_array "$KUBE_CRI" docker containerd cri-o ; then
  2462. log::error "[limit]" "$KUBE_CRI is not supported, only [docker,containerd,cri-o]"
  2463. exit 1
  2464. fi
  2465. [[ "$KUBE_CRI" != "docker" && "${OFFLINE_TAG:-}" == "1" ]] && { log::error "[limit]" "$KUBE_CRI is not supported offline, only docker"; exit 1; }
  2466. [[ "$KUBE_CRI" == "containerd" && "${KUBE_CRI_ENDPOINT}" == "/var/run/dockershim.sock" ]] && KUBE_CRI_ENDPOINT="unix:///run/containerd/containerd.sock"
  2467. [[ "$KUBE_CRI" == "cri-o" && "${KUBE_CRI_ENDPOINT}" == "/var/run/dockershim.sock" ]] && KUBE_CRI_ENDPOINT="unix:///var/run/crio/crio.sock"
  2468. kubelet_nodeRegistration="nodeRegistration:
  2469. criSocket: ${KUBE_CRI_ENDPOINT:-/var/run/dockershim.sock}
  2470. kubeletExtraArgs:
  2471. runtime-cgroups: /system.slice/${KUBE_CRI//-/}.service
  2472. pod-infra-container-image: ${KUBE_IMAGE_REPO}/pause:${PAUSE_VERSION:-3.6}
  2473. "
  2474. }
  2475. # 使用帮助
  2476. function help::usage() {
  2477. cat << EOF
  2478. Install kubernetes cluster using kubeadm.
  2479. Usage:
  2480. $(basename "$0") [command]
  2481. Available Commands:
  2482. init 初始化Kubernetes集群
  2483. reset 重置Kubernetes集群
  2484. add 将节点添加到群集
  2485. del 从群集中删除节点
  2486. renew-cert 续订所有可用的证书
  2487. upgrade 升级kubeadm集群
  2488. update 更新脚本文件
  2489. Flag:
  2490. -m,--master master节点(逗号分隔), 默认: ''
  2491. -w,--worker work节点(逗号分隔), 默认: ''
  2492. -u,--user ssh用户, 默认: ${SSH_USER}
  2493. -p,--password ssh密码
  2494. --private-key ssh私钥
  2495. -P,--port ssh端口, 默认: ${SSH_PORT}
  2496. -v,--version kube版本, 默认: ${KUBE_VERSION}
  2497. -n,--network 群集网络, 选择: [flannel,calico,cilium], 默认: ${KUBE_NETWORK}
  2498. -i,--ingress ingress controller, choose: [nginx], 默认: ${KUBE_INGRESS}
  2499. -ui,--ui 群集web ui, use: [rancher]
  2500. -a,--addon 群集附加组件, use: []
  2501. -M,--monitor 群集监控, use: [prometheus]
  2502. -l,--log 群集日志, choose: [elasticsearch]
  2503. --cri cri tools, choose: [docker,containerd,cri-o], 默认: ${KUBE_CRI}
  2504. --cri-version cri version, 默认: ${KUBE_CRI_VERSION}
  2505. --cri-endpoint cri endpoint, 默认: ${KUBE_CRI_ENDPOINT}
  2506. -U,--upgrade-kernel 升级内核
  2507. -of,--offline-file 指定要加载的离线文件
  2508. --10years 证书期限为10年
  2509. --sudo sudo模式
  2510. --sudo-user sudo用户
  2511. --sudo-password sudo用户密码
  2512. Example:
  2513. [init cluster]
  2514. $0 init \\
  2515. --master 192.168.77.130,192.168.77.131,192.168.77.132 \\
  2516. --worker 192.168.77.133,192.168.77.134,192.168.77.135 \\
  2517. --user root \\
  2518. --password 123456 \\
  2519. --version 1.20.4
  2520. [reset cluster]
  2521. $0 reset \\
  2522. --user root \\
  2523. --password 123456
  2524. [add node]
  2525. $0 add \\
  2526. --master 192.168.77.140,192.168.77.141 \\
  2527. --worker 192.168.77.143,192.168.77.144 \\
  2528. --user root \\
  2529. --password 123456 \\
  2530. --version 1.20.4
  2531. [del node]
  2532. $0 del \\
  2533. --master 192.168.77.140,192.168.77.141 \\
  2534. --worker 192.168.77.143,192.168.77.144 \\
  2535. --user root \\
  2536. --password 123456
  2537. [other]
  2538. $0 renew-cert --user root --password 123456
  2539. $0 upgrade --version 1.20.4 --user root --password 123456
  2540. $0 update
  2541. $0 add --ingress traefik
  2542. $0 add --monitor prometheus
  2543. $0 add --log elasticsearch
  2544. $0 add --storage rook
  2545. $0 add --ui dashboard
  2546. $0 add --addon nodelocaldns
  2547. EOF
  2548. exit 1
  2549. }
  2550. ######################################################################################################
  2551. # main
  2552. ######################################################################################################
  2553. [ "$#" == "0" ] && help::usage
  2554. while [ "${1:-}" != "" ]; do
  2555. case $1 in
  2556. init ) INIT_TAG=1
  2557. ;;
  2558. reset ) RESET_TAG=1
  2559. ;;
  2560. add ) ADD_TAG=1
  2561. ;;
  2562. del ) DEL_TAG=1
  2563. ;;
  2564. renew-cert ) RENEW_CERT_TAG=1
  2565. ;;
  2566. upgrade ) UPGRADE_TAG=1
  2567. ;;
  2568. update ) UPDATE_TAG=1
  2569. ;;
  2570. -m | --master ) shift
  2571. MASTER_NODES=${1:-$MASTER_NODES}
  2572. ;;
  2573. -w | --worker ) shift
  2574. WORKER_NODES=${1:-$WORKER_NODES}
  2575. ;;
  2576. -u | --user ) shift
  2577. SSH_USER=${1:-$SSH_USER}
  2578. ;;
  2579. -p | --password ) shift
  2580. SSH_PASSWORD=${1:-$SSH_PASSWORD}
  2581. ;;
  2582. --private-key ) shift
  2583. SSH_PRIVATE_KEY=${1:-$SSH_SSH_PRIVATE_KEY}
  2584. ;;
  2585. -P | --port ) shift
  2586. SSH_PORT=${1:-$SSH_PORT}
  2587. ;;
  2588. -v | --version ) shift
  2589. KUBE_VERSION=${1:-$KUBE_VERSION}
  2590. ;;
  2591. -n | --network ) shift
  2592. NETWORK_TAG=1
  2593. KUBE_NETWORK=${1:-$KUBE_NETWORK}
  2594. ;;
  2595. -i | --ingress ) shift
  2596. INGRESS_TAG=1
  2597. KUBE_INGRESS=${1:-$KUBE_INGRESS}
  2598. ;;
  2599. -M | --monitor ) shift
  2600. MONITOR_TAG=1
  2601. KUBE_MONITOR=${1:-$KUBE_MONITOR}
  2602. ;;
  2603. -l | --log ) shift
  2604. LOG_TAG=1
  2605. KUBE_LOG=${1:-$KUBE_LOG}
  2606. ;;
  2607. -s | --storage ) shift
  2608. STORAGE_TAG=1
  2609. KUBE_STORAGE=${1:-$KUBE_STORAGE}
  2610. ;;
  2611. -ui | --ui ) shift
  2612. UI_TAG=1
  2613. ;;
  2614. -a | --addon ) shift
  2615. ADDON_TAG=1
  2616. ;;
  2617. --cri ) shift
  2618. KUBE_CRI=${1:-$KUBE_CRI}
  2619. ;;
  2620. --cri-version ) shift
  2621. KUBE_CRI_VERSION=${1:-$KUBE_CRI_VERSION}
  2622. ;;
  2623. --cri-endpoint ) shift
  2624. KUBE_CRI_ENDPOINT=${1:-$KUBE_CRI_ENDPOINT}
  2625. ;;
  2626. -U | --upgrade-kernel ) UPGRADE_KERNEL_TAG=1
  2627. ;;
  2628. --mgmt-node-ip ) MGMT_NODE_IP="first_master_ip"
  2629. ;;
  2630. -of | --offline-file ) shift
  2631. OFFLINE_TAG=1
  2632. OFFLINE_FILE=${1:-$OFFLINE_FILE}
  2633. ;;
  2634. --10years ) CERT_YEAR_TAG=1
  2635. ;;
  2636. --sudo ) SUDO_TAG=1
  2637. ;;
  2638. --sudo-user ) shift
  2639. SUDO_USER=${1:-$SUDO_USER}
  2640. ;;
  2641. --sudo-password ) shift
  2642. SUDO_PASSWORD=${1:-}
  2643. ;;
  2644. * ) help::usage
  2645. exit 1
  2646. esac
  2647. shift
  2648. done
  2649. # 开始
  2650. log::info "[start]" "bash $0 ${SCRIPT_PARAMETER//${SSH_PASSWORD:-${SUDO_PASSWORD:-}}/zzzzzz}"
  2651. # 数据处理
  2652. transform::data
  2653. # 预检
  2654. check::preflight
  2655. # 动作
  2656. if [[ "${INIT_TAG:-}" == "1" ]]; then
  2657. [[ "$MASTER_NODES" == "" ]] && MASTER_NODES="127.0.0.1"
  2658. init::cluster
  2659. elif [[ "${ADD_TAG:-}" == "1" ]]; then
  2660. [[ "${NETWORK_TAG:-}" == "1" ]] && { add::network; add=1; }
  2661. [[ "${INGRESS_TAG:-}" == "1" ]] && { add::ingress; add=1; }
  2662. [[ "${STORAGE_TAG:-}" == "1" ]] && { add::storage; add=1; }
  2663. [[ "${MONITOR_TAG:-}" == "1" ]] && { add::monitor; add=1; }
  2664. [[ "${LOG_TAG:-}" == "1" ]] && { add::log; add=1; }
  2665. [[ "${UI_TAG:-}" == "1" ]] && { add::ui; add=1; }
  2666. [[ "${ADDON_TAG:-}" == "1" ]] && { add::addon; add=1; }
  2667. [[ "$MASTER_NODES" != "" || "$WORKER_NODES" != "" ]] && { add::node; add=1; }
  2668. [[ "${add:-}" != "1" ]] && help::usage
  2669. elif [[ "${DEL_TAG:-}" == "1" ]]; then
  2670. if [[ "$MASTER_NODES" != "" || "$WORKER_NODES" != "" ]]; then del::node; else help::usage; fi
  2671. elif [[ "${RESET_TAG:-}" == "1" ]]; then
  2672. reset::cluster
  2673. elif [[ "${RENEW_CERT_TAG:-}" == "1" ]]; then
  2674. cert::renew
  2675. elif [[ "${UPGRADE_TAG:-}" == "1" ]]; then
  2676. upgrade::cluster
  2677. elif [[ "${UPDATE_TAG:-}" == "1" ]]; then
  2678. update::self
  2679. else
  2680. help::usage
  2681. fi
  2682. # bash <(curl -s http://git.yvanui.com/lizhiwei/jztd-deploy/raw/master/sd_dsl/02k8s-install-centos.sh) [cmd]