1、概述

本文档主要介绍如何在k8s集群中部署prometheus server用来作为监控的数据采集服务器,这样做可以很方便的对k8s集群中的指标、pod的、节点的指标进行采集和监控。

2、下载镜像并且上传到本地的habor

可以先将prometheus server的镜像下载并且导入到本地的镜像仓库中。

镜像版本信息

docker pull prom/prometheus:v2.33.1

3、节点添加标签

为了保证prometheus的数据可以在重启或者重新调度之后依然在一个主机上,可以考虑设置节点标签,然后通过标签的方式将pod绑定到该节点上,当然也可以用pvc的方式。

3.1、查看节点的标签

[root@nccztsjb-node-23 ~]# kubectl get nodes --show-labels
NAME               STATUS   ROLES                  AGE   VERSION   LABELS
nccztsjb-node-23   Ready    control-plane,master   23d   v1.23.2   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=nccztsjb-node-23,kubernetes.io/os=linux,node-role.kubernetes.io/control-plane=,node-role.kubernetes.io/master=,node.kubernetes.io/exclude-from-external-load-balancers=
nccztsjb-node-24   Ready    <none>                 23d   v1.23.2   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=nccztsjb-node-24,kubernetes.io/os=linux
nccztsjb-node-25   Ready    ingress                23d   v1.23.2   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=nccztsjb-node-25,kubernetes.io/os=linux,node-role.kubernetes.io/ingress=
[root@nccztsjb-node-23 ~]# 

3.2、在nccztsjb-node-25加入标签

[root@nccztsjb-node-23 ~]# kubectl label node nccztsjb-node-25 node-role.kubernetes.io/prometheus-server=
node/nccztsjb-node-25 labeled
[root@nccztsjb-node-23 ~]# kubectl get nodes
NAME               STATUS   ROLES                       AGE   VERSION
nccztsjb-node-23   Ready    control-plane,master        23d   v1.23.2
nccztsjb-node-24   Ready    <none>                      23d   v1.23.2
nccztsjb-node-25   Ready    ingress,prometheus-server   23d   v1.23.2

标签已经增加。

4、创建yaml配置文件

cat >prometheus.yaml <<EOF
---
# 创建命名空间
apiVersion: v1
kind: Namespace
metadata:
  name: monitoring
---
# 创建serviceaccount
apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus
  namespace: monitoring
---
# 对serviceaccount进行授权绑定,给cluster-admin角色权限
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: prometheus
  namespace: monitoring
---
# 创建configmap用来指定配置,本例主要是获取cadvisor数据即容器数据
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  labels:
    name: prometheus-config
  namespace: monitoring
data:
  prometheus.yml: |-
    global:
      scrape_interval: 5s
      evaluation_interval: 5s
    rule_files:
      - /etc/prometheus/prometheus.rules
    alerting:
      alertmanagers:
      - scheme: http
        static_configs:
        - targets:
          - "alertmanager.monitoring.svc:9093"

    scrape_configs:
      - job_name: "kubernetes-cadvisor"

        scrape_interval: 10s
        scrape_timeout: 10s

        scheme: https  #要加,否则默认使用http的协议

        metrics_path: /metrics/cadvisor

        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
          insecure_skip_verify: true

        authorization:
          credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token

        kubernetes_sd_configs:
          - role: node

        relabel_configs:
          - action: labelmap
            regex: __meta_kubernetes_node_label_(.+)
---
#创建deployment
apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus
  namespace: monitoring
spec:
  selector:
    matchLabels:
      app: prometheus
  replicas: 1
  template:
    metadata:
      labels:
        app: prometheus
    spec:
      securityContext:
        runAsUser: 0
        runAsGroup: 0
        fsGroup: 0
      serviceAccountName: prometheus
      containers:
      - name: prometheus
        image: 172.20.58.152/prom/prometheus:v2.33.1
        ports:
        - containerPort: 9090
          name: default
        resources:
          requests:
            cpu: 500m
            memory: 500M
          limits:
            cpu: 1
            memory: 1Gi
        volumeMounts:
        - name: config-volume
          mountPath: /etc/prometheus
        - name: prometheus-data
          mountPath: /prometheus
      volumes:
      - name: config-volume
        configMap:
         name: prometheus-config
      - name: prometheus-data  #设置数据存储盘在本地
        hostPath:
          path: /data/prometheus/data
          type: ""
      nodeSelector:
        node-role.kubernetes.io/prometheus-server: "" #使用3中创建的标签
---
# 创建service可以用于外部的访问,端口30909
kind: Service
apiVersion: v1
metadata:
  name: prometheus
  namespace: monitoring
spec:
  selector:
    app: prometheus
  type: LoadBalancer
  ports:
  - protocol: TCP
    port: 9090
    targetPort: 9090
    nodePort: 30909
EOF

5、安装delployment

基于4中创建的yaml配置文件来部署deploymetn

[root@nccztsjb-node-23 ~]# kubectl apply -f prometheus.yaml
namespace/monitoring created
serviceaccount/prometheus created
clusterrolebinding.rbac.authorization.k8s.io/prometheus created
configmap/prometheus-config created
deployment.apps/prometheus created
service/prometheus created

查看创建的pod

[root@nccztsjb-node-23 ~]# kubectl get pod -n monitoring -o wide
NAME                          READY   STATUS    RESTARTS   AGE   IP             NODE               NOMINATED NODE   READINESS GATES
prometheus-55b9769449-2tq4l   1/1     Running   0          51s   172.39.21.81   nccztsjb-node-25   <none>           <none>
[root@nccztsjb-node-23 ~]# 

已经运行并且是运行在节点nccztsjb-node-25上的。

6、访问prometheus

prometheus本身是包含了一个web界面的,可以通过service直接访问。

查看service

[root@nccztsjb-node-23 ~]# kubectl get svc -n monitoring
NAME         TYPE           CLUSTER-IP     EXTERNAL-IP   PORT(S)          AGE
prometheus   LoadBalancer   10.98.172.41   <pending>     9090:30909/TCP   2m28s
[root@nccztsjb-node-23 ~]# 

使用任意的节点IP:30909进行访问

status->targets可以看到target的信息

OK,可以看到配置的job的信息。

以上就是在k8s中部署prometheus server的过程,如果想要更改prometheus.yml的配置,修改configmap就可以。

后面会有部署node export和通过grafana显示的部署过程。

02-17 22:53