1.在管理节点上,进入刚创建的放置配置文件的目录,用 ceph-deploy 执行如下步骤
mkdir /opt/cluster-ceph
cd /opt/cluster-ceph
ceph-deploy new master1 master2 master3
2.添加epel源
yum install -y yum-utils && yum-config-manager --add-repo https://dl.fedoraproject.org/pub/epel/7/x86_64/ && yum install --nogpgcheck -y epel-release && rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-7 && rm -f /etc/yum.repos.d/dl.fedoraproject.org*
直接进行ceph的安装,会报如下的错误:
--> Finished Dependency Resolution
Error: Package: 2:ceph-common-10.2.11-0.el7.x86_64 (ceph)
Requires: libbabeltrace-ctf.so.1()(64bit)
Error: Package: 2:ceph-osd-10.2.11-0.el7.x86_64 (ceph)
Requires: libleveldb.so.1()(64bit)
Error: Package: 2:ceph-mon-10.2.11-0.el7.x86_64 (ceph)
Requires: libleveldb.so.1()(64bit)
Error: Package: 2:librbd1-10.2.11-0.el7.x86_64 (ceph)
Requires: liblttng-ust.so.0()(64bit)
Error: Package: 2:ceph-base-10.2.11-0.el7.x86_64 (ceph)
Requires: liblttng-ust.so.0()(64bit)
Error: Package: 2:librgw2-10.2.11-0.el7.x86_64 (ceph)
Requires: libfcgi.so.0()(64bit)
Error: Package: 2:ceph-common-10.2.11-0.el7.x86_64 (ceph)
Requires: libbabeltrace.so.1()(64bit)
Error: Package: 2:librados2-10.2.11-0.el7.x86_64 (ceph)
Requires: liblttng-ust.so.0()(64bit)
2. 安装 Ceph
[root@localhost ~]# yum install --downloadonly --downloaddir=/tmp/ceph ceph
在每台主机上安装ceph
[root@localhost ~]# yum localinstall -C -y --disablerepo=* /tmp/ceph/*.rpm
配置初始 monitor(s)、并收集所有密钥
# 请务必在 ceph-cluster 目录下
[root@admin ceph-cluster]# ceph-deploy mon create-initial
初始化 ceph.osd 节点
创建存储空间
[root@osd1 ~]# mkdir -p /data/ceph-osd
[root@osd1 ~]# chown ceph.ceph /data/ceph-osd/ -R
[root@osd2 ~]# mkdir -p /data/ceph-osd
[root@osd2 ~]# chown ceph.ceph /data/ceph-osd/ -R
[root@osd3 ~]# mkdir -p /data/ceph-osd
[root@osd3 ~]# chown ceph.ceph /data/ceph-osd/ -R
[root@osd4 ~]# mkdir -p /data/ceph-osd
[root@osd4 ~]# chown ceph.ceph /data/ceph-osd/ -R
创建OSD:
[root@admin ceph-cluster]# ceph-deploy osd prepare node1:/data/ceph-osd node2:/data/ceph-osd node3:/data/ceph-osd node4:/data/ceph-osd
激活 OSD
[root@admin ceph-cluster]# ceph-deploy osd activate node1:/data/ceph-osd node2:/data/ceph-osd node3:/data/ceph-osd node4:/data/ceph-osd
用 ceph-deploy 把配置文件和 admin 密钥拷贝到管理节点和 Ceph 节点,这样你每次执行 Ceph 命令行时就无需指定 monitor 地址和 ceph.client.admin.keyring 了
[root@admin ceph-cluster]# ceph-deploy admin master1 master2 master3 node1 node2 node3 node4
确保你对 ceph.client.admin.keyring 有正确的操作权限。
chmod +r /etc/ceph/ceph.client.admin.keyring (所有机器)
如果配置文件更改,需要同步配置文件到所有节点
[root@admin ceph-cluster]# ceph-deploy --overwrite-conf admin master1 master2 master3 node1 node2 node3 node4
官网的zookeeper yaml,去掉了亲和性
apiVersion: v1
kind: Service
metadata:
namespace: testsubject
name: zk-hs
labels:
app: zk
spec:
ports:
- port: 2888
name: server
- port: 3888
name: leader-election
clusterIP: None
selector:
app: zk
---
apiVersion: v1
kind: Service
metadata:
namespace: testsubject
name: zk-cs
labels:
app: zk
spec:
ports:
- port: 2181
name: client
selector:
app: zk
---
apiVersion: policy/v1beta1
kind: PodDisruptionBudget
metadata:
namespace: testsubject
name: zk-pdb
spec:
selector:
matchLabels:
app: zk
maxUnavailable: 1
---
apiVersion: apps/v1beta2
kind: StatefulSet
metadata:
namespace: testsubject
name: zk
spec:
selector:
matchLabels:
app: zk
serviceName: zk-hs
replicas: 3
updateStrategy:
type: RollingUpdate
podManagementPolicy: Parallel
template:
metadata:
labels:
app: zk
spec:
containers:
- name: kubernetes-zookeeper
imagePullPolicy: Always
image: "192.168.200.10/senyint/kubernetes-zookeeper:1.0-3.4.10"
resources:
requests:
memory: "4Gi"
cpu: "1"
ports:
- containerPort: 2181
name: client
- containerPort: 2888
name: server
- containerPort: 3888
name: leader-election
command:
- sh
- -c
- "start-zookeeper \
--servers=3 \
--data_dir=/var/lib/zookeeper/data \
--data_log_dir=/var/lib/zookeeper/data/log \
--conf_dir=/opt/zookeeper/conf \
--client_port=2181 \
--election_port=3888 \
--server_port=2888 \
--tick_time=2000 \
--init_limit=10 \
--sync_limit=5 \
--heap=512M \
--max_client_cnxns=60 \
--snap_retain_count=3 \
--purge_interval=12 \
--max_session_timeout=40000 \
--min_session_timeout=4000 \
--log_level=OFF"
readinessProbe:
exec:
command:
- sh
- -c
- "zookeeper-ready 2181"
initialDelaySeconds: 10
timeoutSeconds: 5
livenessProbe:
exec:
command:
- sh
- -c
- "zookeeper-ready 2181"
initialDelaySeconds: 10
timeoutSeconds: 5
volumeMounts:
- name: datazk
mountPath: /var/lib/zookeeper
volumeClaimTemplates:
- metadata:
name: datazk
spec:
accessModes: [ "ReadWriteOnce" ]
storageClassName: ceph-rbd-database
resources:
requests:
storage: 20Gi
ceph-secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: ceph-secret-admin-testsubject
namespace: testsubject
type: "kubernetes.io/rbd"
data:
key: QVFERkcvQmF5ckFkSnhBQVVkM2VCdC82K3dOTnZIM3V0ZHpnTnc9PQo=
rbd-storage-data-class.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ceph-rbd-database
namespace: testsubject
provisioner: kubernetes.io/rbd
parameters:
monitors: 192.168.200.11:6789,192.168.200.12:6789,192.168.200.13:6789
adminId: admin
adminSecretName: ceph-secret-admin-testsubject
adminSecretNamespace: "testsubject"
pool: fengjian
userId: admin
userSecretName: ceph-secret-admin-testsubject
imageFormat: "2"
imageFeatures: "layering"
不建立pv, 直接 使用storageclass,然后建立pvc, deployment 指定 claimName
ceph-secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: ceph-secret-admin
type: "kubernetes.io/rbd"
data:
key: QVFERkcvQmF5ckFkSnhBQVVkM2VCdC82K3dOTnZIM3V0ZHpnTnc9PQo=
rbd-storage-data-class.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ceph-rbd-provisioner
provisioner: kubernetes.io/rbd
parameters:
monitors: 192.168.200.11:6789,192.168.200.12:6789,192.168.200.13:6789 adminId: admin adminSecretName: ceph-secret-admin adminSecretNamespace: default pool: fengjian userId: admin userSecretName: ceph-secret-admin imageFormat: "2" imageFeatures: "layering"
redis.yaml
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
name: redis-master
spec:
selector:
matchLabels:
app: redis
role: master
tier: backend
replicas: 1
template:
metadata:
labels:
app: redis
role: master
tier: backend
spec:
containers:
- name: master
image: 192.168.200.10/redis/redis:master
resources:
requests:
cpu: 100m
memory: 100Mi
ports:
- containerPort: 6379
volumeMounts:
- name: datadir
mountPath: /data
volumes:
- name: datadir
persistentVolumeClaim:
claimName: redis-master-rbd-pvc
---
apiVersion: v1
kind: Service
metadata:
name: redis-master
labels:
app: redis
role: master
tier: backend
spec:
ports:
- port: 6379
targetPort: 6379
selector:
app: redis
role: master
tier: backend
kafka配置文件
参考 : https://kow3ns.github.io/kubernetes-kafka/manifests/
[root@master1 ceph_rbd]# cat kafka.yaml
apiVersion: v1
kind: Service
metadata:
name: kafka-hs
labels:
app: kafka
spec:
ports:
- port: 9093
name: server
clusterIP: None
selector:
app: kafka
---
apiVersion: policy/v1beta1
kind: PodDisruptionBudget
metadata:
name: kafka-pdb
spec:
selector:
matchLabels:
app: kafka
maxUnavailable: 1
---
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: kafka
spec:
serviceName: kafka-hs
replicas: 3
podManagementPolicy: Parallel
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
app: kafka
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: "app"
operator: In
values:
- kafka
topologyKey: "kubernetes.io/hostname"
podAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
podAffinityTerm:
labelSelector:
matchExpressions:
- key: "app"
operator: In
values:
- zk
topologyKey: "kubernetes.io/hostname"
terminationGracePeriodSeconds: 300
containers:
- name: k8skafka
imagePullPolicy: Always
image: 192.168.200.10/source/kubernetes-kafka:1.0-10.2.1
resources:
requests:
memory: "12Gi"
cpu: 4
ports:
- containerPort: 9093
name: server
command:
- sh
- -c
- "exec kafka-server-start.sh /opt/kafka/config/server.properties --override broker.id=${HOSTNAME##*-} \
--override listeners=PLAINTEXT://:9093 \
--override zookeeper.connect=zk-cs.default.svc.cluster.local:2181 \
--override log.dir=/var/lib/kafka \
--override auto.create.topics.enable=true \
--override auto.leader.rebalance.enable=true \
--override background.threads=10 \
--override compression.type=producer \
--override delete.topic.enable=false \
--override leader.imbalance.check.interval.seconds=300 \
--override leader.imbalance.per.broker.percentage=10 \
--override log.flush.interval.messages=9223372036854775807 \
--override log.flush.offset.checkpoint.interval.ms=60000 \
--override log.flush.scheduler.interval.ms=9223372036854775807 \
--override log.retention.bytes=-1 \
--override log.retention.hours=168 \
--override log.roll.hours=168 \
--override log.roll.jitter.hours=0 \
--override log.segment.bytes=1073741824 \
--override log.segment.delete.delay.ms=60000 \
--override message.max.bytes=1000012 \
--override min.insync.replicas=1 \
--override num.io.threads=8 \
--override num.network.threads=3 \
--override num.recovery.threads.per.data.dir=1 \
--override num.replica.fetchers=1 \
--override offset.metadata.max.bytes=4096 \
--override offsets.commit.required.acks=-1 \
--override offsets.commit.timeout.ms=5000 \
--override offsets.load.buffer.size=5242880 \
--override offsets.retention.check.interval.ms=600000 \
--override offsets.retention.minutes=1440 \
--override offsets.topic.compression.codec=0 \
--override offsets.topic.num.partitions=50 \
--override offsets.topic.replication.factor=3 \
--override offsets.topic.segment.bytes=104857600 \
--override queued.max.requests=500 \
--override quota.consumer.default=9223372036854775807 \
--override quota.producer.default=9223372036854775807 \
--override replica.fetch.min.bytes=1 \
--override replica.fetch.wait.max.ms=500 \
--override replica.high.watermark.checkpoint.interval.ms=5000 \
--override replica.lag.time.max.ms=10000 \
--override replica.socket.receive.buffer.bytes=65536 \
--override replica.socket.timeout.ms=30000 \
--override request.timeout.ms=30000 \
--override socket.receive.buffer.bytes=102400 \
--override socket.request.max.bytes=104857600 \
--override socket.send.buffer.bytes=102400 \
--override unclean.leader.election.enable=true \
--override zookeeper.session.timeout.ms=6000 \
--override zookeeper.set.acl=false \
--override broker.id.generation.enable=true \
--override connections.max.idle.ms=600000 \
--override controlled.shutdown.enable=true \
--override controlled.shutdown.max.retries=3 \
--override controlled.shutdown.retry.backoff.ms=5000 \
--override controller.socket.timeout.ms=30000 \
--override default.replication.factor=1 \
--override fetch.purgatory.purge.interval.requests=1000 \
--override group.max.session.timeout.ms=300000 \
--override group.min.session.timeout.ms=6000 \
--override inter.broker.protocol.version=0.10.2-IV0 \
--override log.cleaner.backoff.ms=15000 \
--override log.cleaner.dedupe.buffer.size=134217728 \
--override log.cleaner.delete.retention.ms=86400000 \
--override log.cleaner.enable=true \
--override log.cleaner.io.buffer.load.factor=0.9 \
--override log.cleaner.io.buffer.size=524288 \
--override log.cleaner.io.max.bytes.per.second=1.7976931348623157E308 \
--override log.cleaner.min.cleanable.ratio=0.5 \
--override log.cleaner.min.compaction.lag.ms=0 \
--override log.cleaner.threads=1 \
--override log.cleanup.policy=delete \
--override log.index.interval.bytes=4096 \
--override log.index.size.max.bytes=10485760 \
--override log.message.timestamp.difference.max.ms=9223372036854775807 \
--override log.message.timestamp.type=CreateTime \
--override log.preallocate=false \
--override log.retention.check.interval.ms=300000 \
--override max.connections.per.ip=2147483647 \
--override num.partitions=1 \
--override producer.purgatory.purge.interval.requests=1000 \
--override replica.fetch.backoff.ms=1000 \
--override replica.fetch.max.bytes=1048576 \
--override replica.fetch.response.max.bytes=10485760 \
--override reserved.broker.max.id=1000 "
env:
- name: KAFKA_HEAP_OPTS
value : "-Xmx2G -Xms2G"
- name: KAFKA_OPTS
value: "-Dlogging.level=INFO"
volumeMounts:
- name: datadir
mountPath: /var/lib/kafka
readinessProbe:
tcpSocket:
port: 9093
initialDelaySeconds: 30
periodSeconds: 10
securityContext:
runAsUser: 1000
fsGroup: 1000
volumeClaimTemplates:
- metadata:
name: datadir
annotations:
volume.beta.kubernetes.io/storage-class: "ceph-rbd-provisioner"
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 100Gi
zookeeper.yaml
https://github.com/kow3ns
apiVersion: v1
kind: Service
metadata:
name: zk-hs
labels:
app: zk
spec:
ports:
- port: 2888
name: server
- port: 3888
name: leader-election
clusterIP: None
selector:
app: zk
---
apiVersion: v1
kind: Service
metadata:
name: zk-cs
labels:
app: zk
spec:
ports:
- port: 2181
name: client
selector:
app: zk
---
apiVersion: policy/v1beta1
kind: PodDisruptionBudget
metadata:
name: zk-pdb
spec:
selector:
matchLabels:
app: zk
maxUnavailable: 1
---
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: zk
spec:
serviceName: zk-hs
replicas: 3
podManagementPolicy: Parallel
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
app: zk
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: "app"
operator: In
values:
- zk
topologyKey: "kubernetes.io/hostname"
containers:
- name: kubernetes-zookeeper
imagePullPolicy: Always
image: "gcr.io/google_containers/kubernetes-zookeeper:1.0-3.4.10"
resources:
requests:
memory: "4Gi"
cpu: "2"
ports:
- containerPort: 2181
name: client
- containerPort: 2888
name: server
- containerPort: 3888
name: leader-election
command:
- sh
- -c
- "start-zookeeper \
--servers=3 \
--data_dir=/var/lib/zookeeper/data \
--data_log_dir=/var/lib/zookeeper/data/log \
--conf_dir=/opt/zookeeper/conf \
--client_port=2181 \
--election_port=3888 \
--server_port=2888 \
--tick_time=2000 \
--init_limit=10 \
--sync_limit=5 \
--heap=3G \
--max_client_cnxns=60 \
--snap_retain_count=3 \
--purge_interval=12 \
--max_session_timeout=40000 \
--min_session_timeout=4000 \
--log_level=INFO"
readinessProbe:
exec:
command:
- sh
- -c
- "zookeeper-ready 2181"
initialDelaySeconds: 10
timeoutSeconds: 5
livenessProbe:
exec:
command:
- sh
- -c
- "zookeeper-ready 2181"
initialDelaySeconds: 10
timeoutSeconds: 5
volumeMounts:
- name: datadir
mountPath: /var/lib/zookeeper
securityContext:
runAsUser: 1000
fsGroup: 1000
volumeClaimTemplates:
- metadata:
name: datadir
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 250Gi