python执行sql文件
之前也找了不少时间,没找到python执行sql文件的方法,有需求就自己写了一个
正则水平不足,将就用了

不过原则上是需要导入有大量数据的sql文件不要使用python来做,mysql自身效率会高很多
写这个东西目的
1、安全初始化数据库结构
2、避免使用系统中的mysql程序,达到完全由python实现初始化数据库功能


修正了一下,不再返回字符串、返回列表
最新修正,加入了一些新参数和校验


点击(此处)折叠或打开

  1. def format_sql_file(init_sql_file, drop_table=False, with_data=False):
  2.     """
  3.     格式化mysqldump导出的sql文件
  4.     @param init_sql_file: sql文件地址
  5.     @param drop_table: 是否执行drop table命令
  6.     @param with_data: 是否输出对数据有操作的语句
  7.     @raise ParameterIllegal: 通用错误抛出
  8.     @return: 可以直接被python exec的sql语句组成的列表
  9.     """
  10.     if drop_table not in (True, False):
  11.         raise ParameterIllegal('drop_table value error')
  12.     if not os.path.exists(init_sql_file):
  13.         raise ParameterIllegal('init_sql_file can not be found')
  14.     try:
  15.         size = os.path.getsize(init_sql_file)
  16.     except OSError:
  17.         raise ParameterIllegal('init_sql_file get size of init_sql_file catch error')
  18.     if size >= util.MAX_SQL_FILE_SIZE:
  19.         raise ParameterIllegal('size of init_sql_file too large')
  20.     try:
  21.         f = open(init_sql_file, 'r')
  22.     except OSError:
  23.         raise ParameterIllegal('open init_sql_file catch error')
  24.     # drop table的语句列表
  25.     drop_table_list = []
  26.     # 其他drop语句(存储过程,函数,触发器等
  27.     other_drop_list = []
  28.     # 创建语句列表
  29.     create_list = []
  30.     # 包含在delimiter中的sql语句
  31.     sql_delimiter_list = []
  32.     # 创建view语句
  33.     view_sql_list = []
  34.     # alter 语句
  35.     alter_list = []
  36.     # insert 语句
  37.     insert_list = []
  38.     # update 语句
  39.     update_list = []
  40.     # delete 语句
  41.     delete_list = []
  42.     # truncate 语句
  43.     truncate_list = []
  44.     try:
  45.         line_list = f.readlines()
  46.     except IOError:
  47.         f.close()
  48.         raise ParameterIllegal('read from file io error')
  49.     except OSError:
  50.         f.close()
  51.         raise ParameterIllegal('read from file os error')
  52.     f.close()
  53.     checked_line_list = []
  54.     for line in line_list:
  55.         # 标准sql结束符为单独一行直接报错,其实可以处理一下 直接塞到前一行的结尾
  56.         # 但是还要对前一行内容做校验 太麻烦
  57.         if line.strip() == ';':
  58.             raise ParameterIllegal('first string is [;],error sql file')
  59.         if len(line) >= 2:
  60.             if line[:2] == '--':
  61.                 continue
  62.         checked_line_list.append(line)
  63.     temp_string = ''.join(checked_line_list)
  64.     # 去除BOMB头
  65.     if temp_string[:3] == BOM_HEAD:
  66.         temp_string = temp_string[3:]
  67.     if len(temp_string) < 10:
  68.         raise ParameterIllegal('query_string_in_one_line less then 10')
  69.     # 表分区语句
  70.     partion_in_annotations = re.compile('(/\*\!50100\s+(PARTITION\s+BY\s+[\S\s]+?)\s+\*/ {0,1});{0,1}')
  71.     # drop 和 view 开头的语句
  72.     drop_and_view_in_annotations = \
  73.         re.compile('(drop.+?(\*/){0,1};)|(VIEW\s+?[\S]+?\s+?as\s+?select[\s\S]+?(\*/){0,1};)', re.IGNORECASE)
  74.     # 包含在以DELIMITER开头DELIMITER结尾的部分
  75.     delimiter_and_annotations = re.compile('(DELIMITER[\s\S]+?DELIMITER\s+?;)|(/\*[\s\S]+?\*/ {0,1};{0,1})',
  76.                                            re.IGNORECASE)
  77.     # DELIMITER结束标记 group(2) 表示存在DELIMITER后没有定义分割符就换行
  78.     delimiter_end_intance = re.compile('(DELIMITER\s+?;\s*)|(DELIMITER\s*)', re.IGNORECASE)
  79.     # 获取在DELIMITER与注释中的字符串
  80.     delimiter_and_annotations_res = re.findall(delimiter_and_annotations, temp_string)
  81.     # create语句正则
  82.     create_intance = re.compile('(/\*!\d{1,10}\s+){0,1}create\s+?(DEFINER=\S+ ){0,1}\s*', re.IGNORECASE)
  83.     delimiter_start = 0
  84.     # 结束正则
  85.     end_intance = None
  86.     # 常用的4中 DELIMITER重定义结束符
  87.     end_intance_1 = re.compile('(end (\*/){0,1}\s*\$\$)|(end\s*$)|(\$\$)', re.IGNORECASE)
  88.     end_intance_2 = re.compile('(end (\*/){0,1}\s*\;;)|(end\s*$)|(;;)', re.IGNORECASE)
  89.     end_intance_3 = re.compile('(end (\*/){0,1}\s*!!)|(end\s*$)|(!!)', re.IGNORECASE)
  90.     end_intance_4 = re.compile('(end (\*/){0,1}\s*!!)|(end\s*$)|(//)', re.IGNORECASE)
  91.     temp_list = []
  92.     if len(delimiter_and_annotations_res) > 0:
  93.         for res_tuple in delimiter_and_annotations_res:
  94.             delimiter = res_tuple[0]
  95.             annotations = res_tuple[1]
  96.             if delimiter != '': # DELIMITER字符串
  97.                 delimiter_list = delimiter.split('\n')
  98.                 for line in delimiter_list:
  99.                     if line.strip() == '':
  100.                         continue
  101.                     if not delimiter_start:
  102.                         # delimiter开始标记
  103.                         delimiter_start = 1
  104.                         delimiter_mark = re.sub('delimiter', '', line, flags=re.IGNORECASE).strip()
  105.                         if delimiter_mark == '$$':
  106.                             end_intance = end_intance_1
  107.                         elif delimiter_mark == ';;':
  108.                             end_intance = end_intance_2
  109.                         elif delimiter_mark == '!!':
  110.                             end_intance = end_intance_3
  111.                         elif delimiter_mark == '//':
  112.                             end_intance = end_intance_4
  113.                         else:
  114.                             raise ParameterIllegal('unknown delimiter_mark')
  115.                         continue
  116.                     else:
  117.                         delimiter_end = re.search(delimiter_end_intance, line)
  118.                         if delimiter_end is not None:
  119.                              # delimiter结束
  120.                             delimiter_start = 0
  121.                             if delimiter_end.group(1) is None:
  122.                                 raise ParameterIllegal('find a delimiter with out start or end mark')
  123.                             end_intance = None
  124.                             continue
  125.                     search_res = re.search(create_intance, line)
  126.                     if search_res is not None:
  127.                         temp_list.append('CREATE %s' % re.sub(create_intance, '', line))
  128.                         continue
  129.                     if end_intance is not None:
  130.                         end_res = re.search(end_intance, line)
  131.                         if end_res is not None:
  132.                             # end字符单独一行
  133.                             if end_res.group(3) is not None:
  134.                                 raise ParameterIllegal('find key word end in one line')
  135.                             # 其他行commit_mark字符串
  136.                             if end_res.group(4) is not None:
  137.                                 raise ParameterIllegal('find commit_mark line')
  138.                             # 修改存储过程结尾
  139.                             temp_list.append('END')
  140.                             sql_string = '\n'.join(temp_list)
  141.                             temp_list = []
  142.                             sql_delimiter_list.append(sql_string)
  143.                             continue
  144.                     temp_list.append(line)
  145.             if annotations != '': # 注释字符串
  146.                 # mysql 5.1的dump语句会把DROP PROCEDURE放注释中
  147.                 # 获取在注释中的drop语句
  148.                 drop_and_view_in_annotations_res = re.search(drop_and_view_in_annotations, annotations)
  149.                 if drop_and_view_in_annotations_res is not None:
  150.                     drop_in_annotations_string = drop_and_view_in_annotations_res.group(1)
  151.                     drop_ex_string = drop_and_view_in_annotations_res.group(2)
  152.                     view_string = drop_and_view_in_annotations_res.group(3)
  153.                     view_ex_string = drop_and_view_in_annotations_res.group(4)
  154.                     # drop语句
  155.                     if drop_in_annotations_string is not None:
  156.                         if drop_ex_string is not None:
  157.                             drop_in_annotations_string = drop_in_annotations_string.replace(drop_ex_string, '')
  158.                         other_drop_list.append(drop_in_annotations_string)
  159.                     # view语句
  160.                     if view_string is not None:
  161.                         if view_ex_string is not None:
  162.                             view_string = view_string.replace(view_ex_string, '')
  163.                         view_sql_list.append('CREATE %s' % view_string)
  164.     # 还在匹配到delimiter开始的部分,说明没有匹配到delimiter结束符
  165.     if delimiter_start:
  166.         raise ParameterIllegal('delimiter not end')
  167.     partion_string_list = re.findall(partion_in_annotations, temp_string)
  168.     # 去除partion语句的注释
  169.     for find_string in partion_string_list:
  170.         temp_string = temp_string.replace(find_string[0], find_string[1], 1)
  171.     # 获取去除注释和delimiter的部分
  172.     no_annotations_string = re.sub(delimiter_and_annotations, '', temp_string)
  173.     del line_list
  174.     del checked_line_list
  175.     del temp_string
  176.     create_table_intance \
  177.         = re.compile('CREATE\s+(TABLE|VIEW)\s+?(([\s\S]*?)CREATE\s+(TABLE|VIEW)){0,}[\s\S]+?;\s*$', re.IGNORECASE)
  178.     re_string = '(^CREATE DATABASE.+)|(^use .+?;\s*$)|' + \
  179.                 '(^insert into .+?;\s*$)|(^update .+?set .+?;\s*$)|(^alter table .+?;\s*$)|' + \
  180.                 '(^delete from .+?where .+?;\s*$)|(^truncate table\s+?.+?;\s*$)|' + \
  181.                 '(CREATE\s+?(DEFINER=.+?\s+?){0,1}(PROCEDURE|FUNCTION|TRIGGER){1}.*)|' + \
  182.                 '(DEFINER=.+?\s)|(^drop table.+?;\s*$)|(^drop\s+?database\s+?.+?;\s*$)|(drop {1}.+?;\s*$)|(.+?;\s*$)'
  183.     split_intance = re.compile(re_string, re.IGNORECASE)
  184.     auto_increment_intance = re.compile('AUTO_INCREMENT=\d{1,20} {1}', re.IGNORECASE)
  185.     view_intance = re.compile('create\s+?(ALGORITHM=.+?\s+?){0,1}' +
  186.                               '(DEFINER=.+?\s+?){0,1}(SQL\s+?SECURITY\s+?DEFINER\s+?)view', re.IGNORECASE)
  187.     # 分行校验
  188.     no_annotations_string_list = no_annotations_string.split('\n')
  189.     temp_list = []
  190.     for line in no_annotations_string_list:
  191.         if line.strip() == '':
  192.             continue
  193.         match_res = re.search(split_intance, line)
  194.         if match_res is not None:
  195.             # 包含创建数据库语句
  196.             if match_res.group(1) is not None:
  197.                 raise ParameterIllegal('create database key word find, error')
  198.             # 包含use 数据库语句
  199.             if match_res.group(2) is not None:
  200.                 raise ParameterIllegal('use databse key word find, error')
  201.             # ===========这里可以一定程度保证存储过程和函数在sql_delimiter_list中
  202.             # 包含insert语句
  203.             if match_res.group(3) is not None:
  204.                 insert_list.append(match_res.group(3))
  205.                 continue
  206.              # 包含update语句
  207.             if match_res.group(4) is not None:
  208.                 update_list.append(match_res.group(4))
  209.                 continue
  210.             # 包含alter语句
  211.             if match_res.group(5) is not None:
  212.                 alter_list.append(match_res.group(5))
  213.                 continue
  214.             # 包含delete from语句
  215.             if match_res.group(6) is not None:
  216.                 delete_list.append(match_res.group(6))
  217.                 continue
  218.             # 包含truncate table语句
  219.             if match_res.group(7) is not None:
  220.                 truncate_list.append(match_res.group(7))
  221.                 continue
  222.              # 包含PROCEDURE|FUNCTION|trigger语句
  223.              # 这些语句必须被包含在上边已经处理过的DELIMITER语句中,否则报错
  224.             if match_res.group(8) is not None:
  225.                 print match_res.group(8)
  226.                 raise ParameterIllegal('CREATE PROCEDURE|FUNCTION|TRIGGER not between two DELIMITER')
  227.              # 包含DEFINER=字段
  228.             if match_res.group(11) is not None:
  229.                 if re.search(view_intance, line) is not None:
  230.                     view_sql_list.append(re.sub(view_intance, 'CREATE VIEW', line))
  231.                     continue
  232.                 else:
  233.                     raise ParameterIllegal('DEFINER key word find')
  234.             # 表drop语句
  235.             if match_res.group(12) is not None:
  236.                 drop_table_list.append(match_res.group(12))
  237.                 continue
  238.             # drop database 语句
  239.             if match_res.group(13) is not None:
  240.                 raise ParameterIllegal('drop database sql find')
  241.             # 其他drop语句
  242.             if match_res.group(14) is not None:
  243.                 other_drop_list.append(match_res.group(14))
  244.                 continue
  245.             # 匹配到结束符号;
  246.             if match_res.group(15) is not None:
  247.                 temp_list.append(line)
  248.                 # 合并成sql语句
  249.                 sql_string = '\n'.join(temp_list)
  250.                 temp_list = []
  251.                 # sql语句检查
  252.                 create_match = re.match(create_table_intance, sql_string)
  253.                 if create_match is None:
  254.                     raise ParameterIllegal('sql [%s] not sql of create table or view' % sql_string)
  255.                 # 发现多个create在当前语句中
  256.                 if create_match.group(2) is not None:
  257.                     raise ParameterIllegal('sql [%s] find over one create in one' % sql_string)
  258.                 # 创建的是view
  259.                 if create_match.group(1).lower() == 'view':
  260.                     view_sql_list.append(sql_string)
  261.                 else:
  262.                     create_list.append(sql_string)
  263.                 continue
  264.         # 除去auto_increment
  265.         line = re.sub(auto_increment_intance, '', line)
  266.         temp_list.append(line)
  267.     full_string_list = other_drop_list
  268.     if drop_table:
  269.         full_string_list += drop_table_list
  270.     full_string_list = full_string_list + create_list + alter_list + view_sql_list
  271.     if len(sql_delimiter_list) > 0:
  272.         # 插入源文件中DELIMITER包含的内容
  273.         full_string_list += sql_delimiter_list
  274.     if with_data:
  275.         full_string_list = full_string_list + truncate_list + delete_list + insert_list + update_list
  276.     if len(full_string_list) == 0:
  277.         raise ParameterIllegal('no sql line find in file')
  278.     return ['set autocommit=1;'] + full_string_list



10-05 02:57