.
点
- 不包括换行
import re
ret = re.match(".*","hello\\\nhello")
print(ret.group())
- 包括换行
import re
ret = re.match(".*","hello\\\nhello",re.DOTALL)
print(ret.group())
re.DOTALL表示匹配所以字符。
^
行首
- 普通行首
import re
ret = re.match("^hel.*","hello\\\nhello")
print(ret.group())
- 多行匹配
import re
ret = re.match("^hel.*\n^hel.*","hello\\\nhello",re.MULTILINE)
print(ret.group())
注意:这里有两个^
,以及添加了re.MULTILINE
$
行末尾
- 非多行
import re
ret = re.findall(".+$","hello world\\\nhello")
print(ret)
- 多行
import re
ret = re.findall(".+$","hello world\\\nhello",re.MULTILINE)
print(ret)
*
贪婪匹配
import re
ret = re.findall(".*","hello world\\\nhello")
print(ret)
+
多个
import re
ret = re.findall(".+","hello world\\\nhello")
print(ret)
?
0或1
import re
ret = re.findall(".?","hello world\\\nhello")
print(ret)
*? +? ??
非贪婪匹配,尽量少的匹配
- 贪婪模式
import re
ret = re.findall("<.*>","<html></html>")
print(ret)
- 非贪婪模式
import re
ret = re.findall("<.*?>","<html></html>")
print(ret)
{m}
严格匹配m个重复字符
import re
ret = re.findall("a{1}","a<html>a</html>")
print(ret)
{m,n}
至少m个,最多n个。
import re
ret = re.findall("a{1,5}","aaaaa<html>a</html>")
print(ret)
{m,n}?
非贪婪模式匹配
import re
ret = re.findall("a{1,5}?","aaaaa<html>a</html>")
print(ret)
[]
集合中任选一个
- 一一列举
import re
ret = re.findall("[a<]+","aaaaa<html>a</html>")
print(ret)
- 范围罗列
import re
ret = re.findall("[a-z]+","aaaaa<html>a</html>")
print(ret)
- 集合
import re
ret = re.findall("[\\w]+","aaaaa<html>a</html>")
print(ret)
- 取反
import re
ret = re.findall("[^\\w]+","aaaaa<html>a</html>")
print(ret)
|
多个表达式,从左往右匹配。
import re
ret = re.findall("(a|h)+","aaaaa<html>a</html>")
print(ret)
(...)
创建组
import re
ret = re.search("(<html>)a\\1","aaaaa<html>a<html>")
print(ret)
(?...)
不创建组
(?aiLmsux)
局部按照特殊规则处理
(?:...)
import re
ret = re.search("(?:<html>)a(</html>)","aaaaa<html>a</html>")
print(ret.groups())
(?aiLmsux-imsx:...)
嵌套与局部删除
import re
ret = re.search("(?i:(?:a|b)+(?-i:c+))","aaaaaAAAbbbccC")
print(ret)
(?P<name>...)
创建一个命名组
- 在同一个规则中引用
import re
ret = re.search("(?P<header><html>)a\\1","aaaaa<html>a<html>")
print(ret,ret.groups())
import re
ret = re.search("(?P<header><html>)a(?P=header)","aaaaa<html>a<html>")
print(ret,ret.groups())
- 在结果中访问
import re
ret = re.search("(?P<header><html>)a(?P=header)","aaaaa<html>a<html>")
print(ret,ret.group("header"))
- 在
subn
方法中使用 import re
ret = re.subn("(?P<header><html>)","<\\g<header>>","aaaaa<html>a<html>")
print(ret)
import re
ret = re.subn("(?P<header><html>)","<\\g<1>>","aaaaa<html>a<html>")
print(ret)
import re
ret = re.subn("(?P<header><html>)","<\\1>","aaaaa<html>a<html>")
print(ret)
(?P=name)
与前面的相呼应。
(?#...)
import re
ret = re.search("(?P<header><html>)(?# this is a comment)","aaaaa<html>a<html>")
print(ret,ret.groups())
(?=...)
import re
ret = re.findall("(?P<header><html>)","aaaaa<html>a<html>")
print(ret)
ret = re.findall("(?P<header><html>(?=$))","aaaaa<html>a<html>")
print(ret)
(?!...)
(?<=...)
import re
ret = re.findall("(?<=aa|bb)<html>","aaaaa<html>a<html>")
print(ret)
(?<!...)
(?(id/name)yes-pattern|no-pattern)
import re
ret = re.match("(<html>)(<html>)?(<html>)?a(?(3)</html>|)(?(2)</html>|)(?(1)</html>|$)",
"<html><html>a</html></html>")
print(ret)
上面的(<html>)?
需要与后面的进行搭配。不然会出错。