vsm add new osd 流程
后台获取可用设备
|
选择可用设备及osd相关信息等
|
点击add按钮,把准备部署的osd信息添加到需要部署的item列表中
|
点击submit按钮,添加item列表中的设备到ceph集群中
下面我们分析一下 后台获取可用设备流程,点击add按钮和点击submit按钮后台触发的流程
后台获取可用设备流程 代码分析
选择 Select Server 后 会促发js函数ChangeServer
virtual-storage-manager-2.2.0\source\vsm-dashboard\static\dashboard\js\addosd.js
function ChangeServer(){
//reset the add osd form
ResetForm();
server = Server.Create();
//Update the upload field post url
//$formFileUpload.action="/dashboard/vsm/devices-management/add_new_osd2/?service_id="+server.server_id;
//Update the OSD form data
PostData("get_available_disks",{"server_id":server.node_id});
}
从ChangeServer通过PostData去获取可用的disks
连接
/dashboard/vsm/devices-management/get_available_disks/
代码:source\vsm-dashboard\vsm_dashboard\dashboards\vsm\devices-management\views.py:get_available_disks
def get_available_disks(request):
search_opts = json.loads(request.body)
server_id = search_opts["server_id"]
ret = vsmapi.get_available_disks(request, search_opts={
"server_id": server_id
,"result_mode":"get_disks",
})
disks = ret['disks']
if disks is None:disks=[]
disk_data = json.dumps(disks)
return HttpResponse(disk_data)
在views.py:get_available_disks里,调用source\vsm-dashboard\vsm_dashboard\api\vsm.py:get_available_disks
代码:
source\vsm-dashboard\vsm_dashboard\api\vsm.py:get_available_disks
def get_available_disks(request, search_opts):
return vsmclient(request).devices.get_available_disks( search_opts=search_opts)
然后就到vsmclient中的virtual-storage-manager-2.2.0\source\python-vsmclient\vsmclient\v1\devices.py:DeviceManager.get_available_disks
在get_available_disks函数中,会通过rest 请求到vsm-api 里获取可用disk
vsm\vsm\api\v1\devices.py:Controller.get_available_disks
def get_available_disks(self,req,):
context = req.environ['vsm.context']
server_id = req.GET.get('server_id',None)
body = {'server_id':server_id}
disk_list = self.scheduler_api.get_available_disks(context, body)
return {"available_disks":disk_list}
在Controller.get_available_disks 会调用
source\vsm\vsm\scheduler\api.py:API.get_available_disks->source\vsm\vsm\scheduler\rpcapi.py:SchedulerAPI.get_available_disks
def get_available_disks(self, ctxt, body=None):
return self.call(ctxt, self.make_msg('get_available_disks', body=body))
这样 vsm-api进程就远程调用scheduler进程的get_available_disks,进入到
source\vsm\vsm\scheduler\manager.py:SchedulerManager.get_available_disks,在这个SchedulerManager.get_available_disks函数中,
1. 首先获取server_id .
2. 根据server_id 去db中查询server的状态。
3. 如果server的状态为Active,再调source\vsm\vsm\agent\rpcapi.py:AgentAPI.get_available_disks
SchedulerManager.get_available_disks 函数
def get_available_disks(self, context, body):
server_id = body['server_id']
server = db.init_node_get_by_id(context,id=server_id)
status = server['status']
if status == 'Active':
res = self._agent_rpcapi.get_available_disks(context,server['host'])
return res
AgentAPI.get_available_disks函数
def get_available_disks(self, context, host):
topic = rpc.queue_get_for(context, self.topic, host)
res = self.call(context,
self.make_msg('get_available_disks'),
topic, version='1.0', timeout=6000)
return res
在AgentAPI.get_available_disks中实现远程调用agent进程get_available_disks,进入到G:\工作\2017年\VSM\code\virtual-storage-manager-2.2.0\source\vsm\vsm\agent\manager.py:AgentManager.get_available_disks
def get_available_disks(self, context):
#LOG.info('333333333')
available_disk_name = self.ceph_driver.get_available_disks(context)
LOG.info('available_disk_name=====%s'%available_disk_name)
devices = db.device_get_all_by_service_id(context,self._service_id)
dev_used_by_ceph = [dev.journal for dev in devices]
available_disk_info_list = []
name_by_path_dict = self.ceph_driver.get_disks_name_by_path_dict(available_disk_name)
name_by_uuid_dict = self.ceph_driver.get_disks_name_by_uuid_dict(available_disk_name)
for disk in available_disk_name:
by_path_name = name_by_path_dict.get(disk,'')
by_uuid_name = name_by_uuid_dict.get(disk,'')
if not disk in dev_used_by_ceph and not by_path_name in dev_used_by_ceph and not by_uuid_name in dev_used_by_ceph:
available_disk_info_list.append({'disk_name':disk,
'by_path':by_path_name,
'by_uuid':by_uuid_name,})
LOG.info('available_disk_info_list=====%s'%available_disk_info_list)
return available_disk_info_list
1.执行“blockdev --report“ 获取块设备
2.去除/dev/sda 这样的裸设备
3.执行“mount -l“ 获取挂载的设备
4.去除挂载的设备
5.执行“pvs --rows" 获取 做了lvm 的设备
6.去除做了lvm 的设备
add_new_osd 页面 add 按钮代码分析
把页面添加osd所需填写的要素填写后,点击add 按钮,触发js CheckOSDForm 函数
virtual-storage-manager-2.2.0\source\vsm-dashboard\static\dashboard\js\addosd.js
function CheckOSDForm(){
//Check the field is should not null
if( $ctrlJournalDevice.value == ""
|| $ctrlDataDevice.value == ""
|| $ctrlWeight.value == ""
|| $ctrlStorageGroup.value == ""){
showTip("error","The field is marked as '*' should not be empty");
return false;
}
//Check the device path is avaliable or not
var path_data = {
"server_id":Server.Create().node_id,
"journal_device_path":$ctrlJournalDevice.value,
"data_device_path":$ctrlDataDevice.value
}
//Post the data and check
//if the check result is successful, add the osd
PostData("check_device_path",path_data);
}
1.检测device path 是否是可用的disk 的 path
2.如果条件1符合,把刚刚在页填写的要素放到item列表中
3.如果条件1不符合,报错
检测device path 是否是可用的disk 的 path 流程分析
下面我们来分析一下 检测device path 是否是可用的disk 的 path 这个流程
从CheckOSDForm通过PostData去检查设备路径
连接
/dashboard/vsm/devices-management/check_device_path/
代码:
source\vsm-dashboard\vsm_dashboard\dashboards\vsm\devices-management\views.py:check_device_path
def check_device_path(request):
search_opts = json.loads(request.body)
server_id = search_opts["server_id"]
data_device_path = search_opts["data_device_path"]
journal_device_path = search_opts["journal_device_path"]
if data_device_path == journal_device_path:
status_json = {"status":"Failed",'message':'data_device_path and journal_device_path can not be the same hard disk'}
else:
ret = vsmapi.get_available_disks(request, search_opts={
"server_id": server_id
,"path":[data_device_path,journal_device_path]
})
if ret["ret"] == 1 :
status_json = {"status":"OK"}
else:
status_json = {"status":"Failed",'message':ret.get('message')}
status_data = json.dumps(status_json)
return HttpResponse(status_data)
在vsm.py:check_device_path里,调用source\vsm-dashboard\vsm_dashboard\api\vsm.py:get_available_disks
代码:
source\vsm-dashboard\vsm_dashboard\api\vsm.py:get_available_disks
def get_available_disks(request, search_opts):
return vsmclient(request).devices.get_available_disks( search_opts=search_opts)
然后就到vsmclient中的virtual-storage-manager-2.2.0\source\python-vsmclient\vsmclient\v1\devices.py:DeviceManager.get_available_disks
在get_available_disks函数中,会
1.通过rest 请求到vsm-api 里获取可用disk
2.如果result_mode=get_disks,返回可用的disks(前面所说的获取可用设备流程)
3.如果是path模式,逐一去查询path是否在可用disks的path里。
def get_available_disks(self, search_opts=None):
"""
Get a list of available disks
"""
if search_opts is None:
search_opts = {}
qparams = {}
for opt, val in search_opts.iteritems():
if val:
qparams[opt] = val
query_string = "?%s" % urllib.urlencode(qparams) if qparams else ""
resp, body = self.api.client.get("/devices/get_available_disks%s" % (query_string))
body = body.get("available_disks")
result_mode = search_opts.get('result_mode')
if result_mode == 'get_disks':
return {'disks': body}
ret = {"ret":1}
message = []
paths = search_opts.get("path")
disks = []
for disk in body:
disk_name = disk.get('disk_name','')
by_path = disk.get('by_path','')
by_uuid = disk.get('by_uuid','')
if disk_name:
disks.append(disk_name)
if by_path:
disks.append(by_path)
if by_uuid:
disks.append(by_uuid)
if paths:
unaviable_paths = [path for path in paths if path not in disks]
if unaviable_paths:
message.append('There is no %s '%(','.join(unaviable_paths)))
if message:
ret = {"ret":0,'message':'.'.join(message)}
return ret
add_new_osd 页面 submit 按钮代码分析
点击submit按钮,触发js AddOSD() 函数
virtual-storage-manager-2.2.0\source\vsm-dashboard\static\dashboard\js\addosd.js
function AddOSD(){
var osd_list = [];
var OSD_Items = $(".osd-item");
if(OSD_Items.length == 0){
showTip("error","Please add the OSD");
return false;
}
for(var i=0;i<OSD_Items.length;i++){
var osd = {
"server_name":OSD_Items[i].children[1].innerHTML,
"storage_group_name":OSD_Items[i].children[3].innerHTML,
"osd_location":OSD_Items[i].children[4].innerHTML,
"weight":OSD_Items[i].children[2].innerHTML,
"journal":OSD_Items[i].children[5].innerHTML,
"data":OSD_Items[i].children[6].innerHTML
}
osd_list.push(osd);
}
var post_data = {
"disks":[]
}
//generate the server data
var server_list = []
for(var i=0;i<osd_list.length;i++){
var isExsitServer = false;
for(var j=0;j<server_list.length;j++){
if(osd_list[i].server_name == server_list[j].server_name){
isExsitServer = true;
break;
}
}
if(isExsitServer == false){
server = {
"server_name":osd_list[i].server_name,
"osdinfo":[]
};
server_list.push(server)
}
}
//generate the osd data
for(var i=0;i<osd_list.length;i++){
for(var j=0;j<server_list.length;j++){
if(osd_list[i].server_name == server_list[j].server_name){
var osd = {
"storage_group_name":osd_list[i].storage_group_name,
"osd_location":osd_list[i].osd_location,
"weight":osd_list[i].weight,
"journal":osd_list[i].journal,
"data":osd_list[i].data
}
server_list[j].osdinfo.push(osd);
}
}
}
//exe add osd
post_data.disks = server_list;
console.log(post_data);
PostData("add_new_osd_action",post_data);
}
1.整理osditem中osd和server
2.执行add_new_osd_action
从AddOSD通过PostData去检查设备路径
连接
/dashboard/vsm/devices-management/add_new_osd_action/
代码:
source\vsm-dashboard\vsm_dashboard\dashboards\vsm\devices-management\views.py:add_new_osd_action
from vsm_dashboard.api import vsm as vsmapi
...
def add_new_osd_action(request):
data = json.loads(request.body)
print 'data----7777==',data
#vsmapi.add_new_disks_to_cluster(request,data)
vsmapi.add_batch_new_disks_to_cluster(request,data)
status_json = {"status":"OK"}
status_data = json.dumps(status_json)
return HttpResponse(status_data)
从views.py 的import的头文件中vsmapi 就是source\vsm-dashboard\vsm_dashboard\api\vsm.py:add_batch_new_disks_to_cluster
def add_batch_new_disks_to_cluster(request, body):
return vsmclient(request).osds.add_batch_new_disks_to_cluster(body)
然后就到vsmclient中的source\python-vsmclient\vsmclient\v1\osds.py:OsdManager.add_batch_new_disks_to_cluster
def add_batch_new_disks_to_cluster(self, body):
"""
:param context:
:param body: {"disks":[
{'server_id':'1','osdinfo':[{'storage_group_id':
"weight":
"jounal":
"data":},{}]},
{'server_id':'2','osdinfo':[{'storage_group_id':
"weight":
"jounal":
"data":},{}]},
]
}
:return:
"""
url = '/osds/add_batch_new_disks_to_cluster'
return self.api.client.post(url, body=body)
在OsdManager.add_batch_new_disks_to_cluster函数中,会通过rest 请求到vsm-api 里批量添加osd
vsm\vsm\api\v1\osds.py:Controller.add_batch_new_disks_to_cluster
def add_batch_new_disks_to_cluster(self, req, body):
context = req.environ['vsm.context']
LOG.info("batch_osd_add body= %s" % body)
ret = self.scheduler_api.add_batch_new_disks_to_cluster(context, body)
LOG.info("batch_osd_add ret= %s" % ret)
return ret
在Controller.add_batch_new_disks_to_cluster 会调用
source\vsm\vsm\scheduler\api.py:API.add_batch_new_disks_to_cluster->source\vsm\vsm\scheduler\rpcapi.py:SchedulerAPI.add_batch_new_disks_to_cluster
def add_batch_new_disks_to_cluster(self, ctxt, body=None):
return self.call(ctxt, self.make_msg('add_batch_new_disks_to_cluster', body=body))
这样 vsm-api进程就远程调用scheduler进程的get_available_disks,进入到
source\vsm\vsm\scheduler\manager.py:SchedulerManager.add_batch_new_disks_to_cluster,在这个SchedulerManager.add_batch_new_disks_to_cluster,
它把获取每一server所需要添加osd ,然后通过调用SchedulerManager.add_new_disks_to_cluster来添加osd
def add_batch_new_disks_to_cluster(self, context, body):
"""
:param context:
:param body: {"disks":[
{'server_id':'1','osdinfo':[{'storage_group_id':
"weight":
"journal":
"data":},{}]},
{'server_id':'2','osdinfo':[{'storage_group_id':
"weight":
"journal":
"data":},{}]},
]
}
:return:
"""
disks = body.get('disks',[])
try:
for disk_in_same_server in disks:
self.add_new_disks_to_cluster(context, disk_in_same_server)
except:
return {"message":"data error"}
return {"message": "success"}
def add_new_disks_to_cluster(self, context, body):
server_id = body.get('server_id',None)
server_name = body.get('server_name',None)
if server_id is not None:
server = db.init_node_get_by_id(context,id=server_id)
elif server_name is not None:
server = db.init_node_get_by_host(context,host=server_name)
self._agent_rpcapi.add_new_disks_to_cluster(context, body, server['host'])
new_osd_count = int(server["data_drives_number"]) + len(body['osdinfo'])
values = {"data_drives_number": new_osd_count}
self._conductor_rpcapi.init_node_update(context,
server["id"],
values)
在SchedulerManager.add_new_disks_to_cluster中,执行以下操作
1. 到数据库中查询server 信息
2. 调用source\vsm\vsm\agent\rpcapi.py:AgentAPI.add_new_disks_to_cluster,把osd添加到ceph集群
3. 调用source\vsm\vsm\conductor\rpcapi.py:ConductorAPI.init_node_get_cluster_nodes , 更新数据中server的osd个数。
下面我们来分析一下source\vsm\vsm\agent\rpcapi.py:AgentAPI.add_new_disks_to_cluster,把osd添加到ceph集群 部分
def add_new_disks_to_cluster(self, context, body, host):
topic = rpc.queue_get_for(context, self.topic, host)
res = self.call(context,
self.make_msg('add_new_disks_to_cluster',
body=body),
topic,
version='1.0', timeout=6000)
在AgentAPI.add_new_disks_to_cluster,通过同步远程调用vsm-agent进程的add_new_disks_to_cluster,就是调用source\vsm\vsm\agent\manager.py:AgentManager.add_new_disks_to_cluster
def add_new_disks_to_cluster(self, context, body):
all_disk = glob.glob('/dev/disk/by-path/*')
all_disk_name_dict = self.ceph_driver.get_disks_name(context,all_disk)
for disk in body['osdinfo']:
for key,value in all_disk_name_dict.items():
if value == disk['data']:
disk['data'] = key
if value == disk['journal']:
disk['journal'] = key
osd_id = self.add_disk_to_db(context,disk)
self.osd_add(context,osd_id)
return True
在这个函数中
1.self.ceph_driver.get_disks_name(context,all_disk)
执行"ls -l /dev/disk/by-path/*" 命令 获取服务器中的所有块设备放到字典中
如
key=/dev/disk/by-path/pci-0000:00:10.0-scsi-0:0:1:0
value=/dev/sdb
2.比对需要添加的osdinfo列表,把设备转换为/dev/disk/by-path/pci-0000:00:10.0-scsi-0:0:1:0 模式。
3.osd_id = self.add_disk_to_db(context,disk)
把osd相关内容添加到数据库的osd表中。
4.self.osd_add(context,osd_id) ,下面详细分析
下面我们分析一下osd_add函数流程
def osd_add(self, context, osd_id):
return self.ceph_driver.add_osd(context, host_id=None, osd_id_in=osd_id)
它调用的是
source\vsm\vsm\agent\driver.py:CephDriver.add_osd,这个函数执行一下操作
1.如果osd_id_in不为空,到数据中获取osd和host的相关信息
2.添加osd
1)通过远程调用conductor的host_storage_groups_devices函数获取strg_list
2)更新initnode 表
3)执行“ceph osd create” 命令返回ceph集群分配的osdid
4)更新数据库的crushmap关系
5)更新ceph集群的curshmap
6)调用_add_osd函数 在ceph集群中添加osd
i.获取osd 的path和osd的keyring path
ii.确保设备data 分区没有被挂载
iii.对data 分区进行格式化
iv.把data 目录和journal目录的用户和用户组改为ceph
v.挂载data分区到osd path中,并把osd path中的目前清空
vi.更新ceph.conf
vii. ceph-osd -i $osd_id --mkfs --mmkey
ceph auth del osd.$osd_id
ceph auth add osd.$osd_id osd allow * mon allow rwx -i $osd_keyring_pth
ceph osd crush add osd.$osd_id $weight $osd_location_str
viii. 启动osd 进程
ceph osd crush create-or-move osd.$osd_id $weight $osd_location_str
def add_osd(self, context, host_id, osd_id_in=None):
if osd_id_in is not None:
osd_obj = db.osd_get(context, osd_id_in)
host_obj = db.init_node_get_by_device_id(context, osd_obj.device_id)
host_id = host_obj.id
LOG.info("begin to add osd %s from host %s"%(osd_obj.device_id,host_id))
LOG.info('start to ceph osd on %s' % host_id)
strg_list = self._conductor_api.\
host_storage_groups_devices(context, host_id)
LOG.info('strg_list %s' % strg_list)
#added_to_crushmap = False
osd_cnt = len(strg_list)
if osd_id_in is not None:
osd_cnt = 1
count = 0
for strg in strg_list:
if osd_id_in is not None and strg.get("dev_id") != osd_obj.device_id:
continue
LOG.info('>> Step 1: start to ceph osd %s' % strg)
count = count + 1
if osd_id_in is None:
self._conductor_api.init_node_update(context, host_id, {"status": "add_osd %s/%s"%(count,osd_cnt)})
# Create osd from # ceph osd create
stdout = utils.execute("ceph",
"osd",
"create",
run_as_root=True)[0]
osd_id = str(int(stdout))
LOG.info(' gen osd_id success: %s' % osd_id)
# step 1 end
host = strg['host']
zone = strg['zone']
#TODO strg['storage_group']
# stands for the storage_group_name fetch from DB.
if strg.get('storage_group',None) is None:
default_storage_group = db.storage_group_get_all(context)[0]
strg['storage_group'] = default_storage_group['name']
strg['storage_group_id'] = default_storage_group['id']
storage_group = strg['storage_group']
crush_dict = {"root": "vsm",
"storage_group": storage_group,
"zone": "_".join([zone, storage_group]),
"host": "_".join([host, storage_group, zone]),}
osd_conf_dict = {"host": host,
"primary_public_ip": strg['primary_public_ip'],
"secondary_public_ip": strg['secondary_public_ip'],
"cluster_ip": strg['cluster_ip'],
"dev_name": strg['dev_name'],
"dev_journal": strg['dev_journal'],
"file_system": strg['file_system']}
osd_state = {}
osd_state['osd_name'] = 'osd.%s' % osd_id
osd_state['device_id'] = strg['dev_id']
osd_state['storage_group_id'] = strg['storage_group_id']
osd_state['service_id'] = strg['service_id']
osd_state['cluster_id'] = strg['cluster_id']
osd_state['state'] = FLAGS.osd_in_up
# modify by yanghc 2017-03-21
# the osd_state did nod need set in here,
# should is the value of osd_state_ref['weight']
#osd_state['weight'] = 1.0
osd_state['operation_status'] = FLAGS.vsm_status_present
osd_state['public_ip'] = strg['secondary_public_ip']
osd_state['cluster_ip'] = strg['cluster_ip']
osd_state['deleted'] = 0
osd_state['zone_id'] = strg['zone_id']
if osd_id_in is not None:
osd_state_ref = db.osd_state_update(context,osd_id_in,osd_state)
else:
osd_state_ref = self._conductor_api.osd_state_create(context, osd_state)
osd_state['osd_location'] = osd_state_ref['osd_location']
osd_state['weight'] = osd_state_ref['weight'] and float(osd_state_ref['weight']) or 1.0
LOG.info('>> crush_dict %s' % crush_dict)
LOG.info('>> osd_conf_dict %s' % osd_conf_dict)
LOG.info('>> osd_state %s' % osd_state)
values = {}
#if not added_to_crushmap:
# LOG.info('>> add crushmap ')
crushmap = self.get_crushmap_json_format()
types = crushmap.get_all_types()
types.sort(key=operator.itemgetter('type_id'))
if self.is_new_storage_group(crush_dict['storage_group']):
self._crushmap_mgmt.add_storage_group(crush_dict['storage_group'],\
crush_dict['root'],types=types)
# zones = db.zone_get_all_not_in_crush(context)
# for item in zones:
# zone_item = item['name'] + '_' + crush_dict['storage_group']
# self._crushmap_mgmt.add_zone(zone_item, \
# crush_dict['storage_group'],types=types)
#
if zone == FLAGS.default_zone:
self._crushmap_mgmt.add_rule(crush_dict['storage_group'], 'host')
else:
self._crushmap_mgmt.add_rule(crush_dict['storage_group'], 'zone')
#TODO update rule_id and status in DB
rule_dict = self.get_crush_rule_dump_by_name(crush_dict['storage_group'])
LOG.info("rule_dict:%s" % rule_dict)
values['rule_id'] = rule_dict['rule_id']
if self.is_new_zone(crush_dict['zone']):
self._crushmap_mgmt.add_zone(crush_dict['zone'], \
crush_dict['storage_group'], types=types)
self._crushmap_mgmt.add_host(crush_dict['host'],
crush_dict['zone'], types=types)
# added_to_crushmap = True
#There must be at least 3 hosts in every storage group when the status is "IN"
zones, hosts = self._conductor_rpcapi.zones_hosts_get_by_storage_group(context, \
crush_dict['storage_group'])
#LOG.info("storage group:%s" % crush_dict['storage_group'])
#LOG.info("zones:%s" % zones)
#LOG.info("hosts:%s" % hosts)
#no zone and zone version
if zones:
if zones[0] == FLAGS.default_zone:
if host not in hosts and len(hosts) >= 2:
values['status'] = FLAGS.storage_group_in
else:
if zone not in zones and len(zones) >= 2:
values['status'] = FLAGS.storage_group_in
if values:
db.storage_group_update_by_name(context, crush_dict['storage_group'], values)
# other steps
LOG.info('>> _add_osd start ')
self._add_osd(context,
osd_id,
crush_dict,
osd_conf_dict,
osd_state)
try:
self.run_add_disk_hook(context)
except:
LOG.info('run add_disk error')