from xml.sax import saxutils
html_str = """<!DOCTYPE html>
<html>
<head>
<title>name</title>
</head>
<body>
<h1>namejr</h1>
<p>my name is namejr</p>
<span>my age is <b>22</b> years old</span>
<p>other string, such as "*", "@"</p>
</body>
</html>"""
# xml.sax.saxutils.escape(data[, entities={}])将html代码进行转义
# xml.sax.saxutils.escape(data) # 按照规定的进行转义,转义的内容:"<"/">"/"&" 对应"<"/">"/"&"
print(saxutils.escape(html_str))
"""
D:\笔记\python电子书\Python3>python index.py
<!DOCTYPE html>
<html>
<head>
<title>name</title>
</head>
<body>
<h1>namejr</h1>
<p>my name is namejr</p>
<span>my age is <b>22</b> years old</span>
<p>other string, such as "*", "@"</p>
</body>
</html>
"""
# 如果想转义自己定义的字符就要使用到entities参数
entities = {'*':'不知道写啥', '@':'更不知道写啥'}
print(saxutils.escape(html_str, entities=entities))
"""
D:\笔记\python电子书\Python3>python index.py
<!DOCTYPE html>
...
<span>my age is <b>22</b> years old</span>
<p>other string, such as "不知道写啥", "更不知道写啥"</p>
</body>
</html>
"""
# xml.sax.saxutils.unescape() # 与xml.sax.saxutils.escape()相反
from xml.sax import saxutils
# xml.sax.saxutils.quoteattr() 与escape()相似,区别是quoteattr()将根据数据的内容选择引号,试图避免对字符串中的任何引号进行编码(只使用一种引号不编码,如果单双引号都在使用,会对双引号进行编码)
html_str = "<element attr={}>".format(saxutils.quoteattr("ab' cd\"ef"))
print(html_str)