文章目录
一、命令UI界面进化和Plugins的发展及失败
-
UI接口进化
①命令行,简称CLI,例如UNIX/Linux Shell
②图形界面,简称GUI,例如WINDOWS
③语言界面,CUI或LUI ,例如GPT
④脑机接口,简称BCI -
补充
自然语音接口,简称NLI -
插件Plugin调用流程
-
Plugins失败分析
1、缺少强Agent调度,只能手工选三个Plugins
2、不在场景中,不能提供端到端一揽子服务
3、延迟非常高(至少两次GPT-4生成,和一次Web API调用)
二、Function Calling的机制
- 原理图
1)示例 1:调用本地函数
- 需求
需求:实现一个回答问题的 AI。题目中如果有加法,必须能精确计算。 - 代码
# 初始化
from openai import OpenAI
from dotenv import load_dotenv, find_dotenv
import os
import json
_ = load_dotenv(find_dotenv())
client = OpenAI()
def print_json(data):
"""
打印参数。如果参数是有结构的(如字典或列表),则以格式化的 JSON 形式打印;
否则,直接打印该值。
"""
if hasattr(data, 'model_dump_json'):
data = json.loads(data.model_dump_json())
if (isinstance(data, (list, dict))):
print(json.dumps(
data,
indent=4,
ensure_ascii=False
))
else:
print(data)
def get_completion(messages, model="gpt-3.5-turbo"):
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7, # 模型输出的随机性,0 表示随机性最小
tools=[{ # 用 JSON 描述函数。可以定义多个。由大模型决定调用谁。也可能都不调用
"type": "function",
"function": {
"name": "sum",
"description": "加法器,计算一组数的和",
"parameters": {
"type": "object",
"properties": {
"numbers": {
"type": "array",
"items": {
"type": "number"
}
}
}
}
}
}],
)
return response.choices[0].message
from math import *
prompt = "Tell me the sum of 1, 2, 3, 4, 5, 6, 7, 8, 9, 10."
# prompt = "桌上有 2 个苹果,四个桃子和 3 本书,一共有几个水果?"
# prompt = "1+2+3...+99+100"
# prompt = "1024 乘以 1024 是多少?" # Tools 里没有定义乘法,会怎样?
# prompt = "太阳从哪边升起?" # 不需要算加法,会怎样?
messages = [
{"role": "system", "content": "你是一个数学家"},
{"role": "user", "content": prompt}
]
response = get_completion(messages)
# 把大模型的回复加入到对话历史中
print_json(response)
messages.append(response)
print("=====GPT回复=====")
print_json(response)
# 如果返回的是函数调用结果,则打印出来
if (response.tool_calls is not None):
# 是否要调用 sum
tool_call = response.tool_calls[0]
if (tool_call.function.name == "sum"):
# 调用 sum
args = json.loads(tool_call.function.arguments)
result = sum(args["numbers"])
print("=====函数返回=====")
print(result)
# 把函数调用结果加入到对话历史中
messages.append(
{
"tool_call_id": tool_call.id, # 用于标识函数调用的 ID
"role": "tool",
"name": "sum",
"content": str(result) # 数值 result 必须转成字符串
}
)
# 再次调用大模型
print("=====最终回复=====")
print(get_completion(messages).content)
- 答复
2)示例 2:多 Function 调用
- 需求
查询某个地点附近的酒店、餐厅、景点等信息。即,查询某个 POI 附近的 POI。 - 代码
def get_completion(messages, model="gpt-3.5-turbo"):
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0, # 模型输出的随机性,0 表示随机性最小
seed=1024, # 随机种子保持不变,temperature 和 prompt 不变的情况下,输出就会不变
tool_choice="auto", # 默认值,由 GPT 自主决定返回 function call 还是返回文字回复。也可以强制要求必须调用指定的函数,详见官方文档
tools=[{
"type": "function",
"function": {
"name": "get_location_coordinate",
"description": "根据POI名称,获得POI的经纬度坐标",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "POI名称,必须是中文",
},
"city": {
"type": "string",
"description": "POI所在的城市名,必须是中文",
}
},
"required": ["location", "city"],
}
}
},
{
"type": "function",
"function": {
"name": "search_nearby_pois",
"description": "搜索给定坐标附近的poi",
"parameters": {
"type": "object",
"properties": {
"longitude": {
"type": "string",
"description": "中心点的经度",
},
"latitude": {
"type": "string",
"description": "中心点的纬度",
},
"keyword": {
"type": "string",
"description": "目标poi的关键字",
}
},
"required": ["longitude", "latitude", "keyword"],
}
}
}],
)
return response.choices[0].message
---------------------------------------------------
import requests
amap_key = "6d672e6194caa3b639fccf2caf06c342"
def get_location_coordinate(location, city):
url = f"https://restapi.amap.com/v5/place/text?key={amap_key}&keywords={location}®ion={city}"
print(url)
r = requests.get(url)
result = r.json()
if "pois" in result and result["pois"]:
return result["pois"][0]
return None
def search_nearby_pois(longitude, latitude, keyword):
url = f"https://restapi.amap.com/v5/place/around?key={amap_key}&keywords={keyword}&location={longitude},{latitude}"
print(url)
r = requests.get(url)
result = r.json()
ans = ""
if "pois" in result and result["pois"]:
for i in range(min(3, len(result["pois"]))):
name = result["pois"][i]["name"]
address = result["pois"][i]["address"]
distance = result["pois"][i]["distance"]
ans += f"{name}\n{address}\n距离:{distance}米\n\n"
return ans
-------------------------------------------------------
prompt = "我想在北京五道口附近喝咖啡,给我推荐几个"
# prompt = "我到北京出差,给我推荐三里屯的酒店,和五道口附近的咖啡"
messages = [
{"role": "system", "content": "你是一个地图通,你可以找到任何地址。"},
{"role": "user", "content": prompt}
]
response = get_completion(messages)
messages.append(response) # 把大模型的回复加入到对话中
print("=====GPT回复=====")
print_json(response)
while (response.tool_calls is not None):
# 1106 版新模型支持一次返回多个函数调用请求,所以要考虑到这种情况
for tool_call in response.tool_calls:
args = json.loads(tool_call.function.arguments)
print("函数参数展开:")
print_json(args)
if (tool_call.function.name == "get_location_coordinate"):
print("Call: get_location_coordinate")
result = get_location_coordinate(**args)
elif (tool_call.function.name == "search_nearby_pois"):
print("Call: search_nearby_pois")
result = search_nearby_pois(**args)
print("=====函数返回=====")
print_json(result)
messages.append({
"tool_call_id": tool_call.id, # 用于标识函数调用的 ID
"role": "tool",
"name": tool_call.function.name,
"content": str(result) # 数值result 必须转成字符串
})
response = get_completion(messages)
messages.append(response) # 把大模型的回复加入到对话中
print("=====最终回复=====")
print(response.content)
- 答复
=====GPT回复=====
{
"content": null,
"role": "assistant",
"function_call": null,
"tool_calls": [
{
"id": "call_WVpqDkc09ATorLhUqWOr155W",
"function": {
"arguments": "{\n \"location\": \"北京五道口\",\n \"city\": \"北京\"\n}",
"name": "get_location_coordinate"
},
"type": "function"
}
]
}
函数参数展开:
{
"location": "北京五道口",
"city": "北京"
}
Call: get_location_coordinate
https://restapi.amap.com/v5/place/text?key=6d672e6194caa3b639fccf2caf06c342&keywords=北京五道口®ion=北京
=====函数返回=====
{
"parent": "",
"address": "海淀区",
"distance": "",
"pcode": "110000",
"adcode": "110108",
"pname": "北京市",
"cityname": "北京市",
"type": "地名地址信息;热点地名;热点地名",
"typecode": "190700",
"adname": "海淀区",
"citycode": "010",
"name": "五道口",
"location": "116.338611,39.992552",
"id": "B000A8WSBH"
}
函数参数展开:
{
"longitude": "116.338611",
"latitude": "39.992552",
"keyword": "咖啡"
}
Call: search_nearby_pois
https://restapi.amap.com/v5/place/around?key=6d672e6194caa3b639fccf2caf06c342&keywords=咖啡&location=116.338611,39.992552
=====函数返回=====
星巴克(北京五道口购物中心店)
成府路28号1层101-10B及2层201-09号
距离:40米
瑞幸咖啡(五道口购物中心店)
成府路28号五道口购物中心负一层101号
距离:67米
MANNER COFFEE(五道口购物中心店)
成府路28号五道口购物中心一层东侧L1-04
距离:82米
=====最终回复=====
我为您找到了几个在北京五道口附近的咖啡店:
1. 星巴克(北京五道口购物中心店)
地址:成府路28号1层101-10B及2层201-09号
距离:40米
2. 瑞幸咖啡(五道口购物中心店)
地址:成府路28号五道口购物中心负一层101号
距离:67米
3. MANNER COFFEE(五道口购物中心店)
地址:成府路28号五道口购物中心一层东侧L1-04
距离:82米
您可以根据距离和个人喜好选择其中一家前往品尝咖啡。祝您享受愉快的咖啡时光!
3)示例 3:用 Function Calling 获取 JSON 结构
- 备注
Function calling 生成 JSON 的稳定性比较高。 - 需求
从一段文字中抽取联系人姓名、地址和电话 - 代码
def get_completion(messages, model="gpt-3.5-turbo"):
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0, # 模型输出的随机性,0 表示随机性最小
tools=[{
"type": "function",
"function": {
"name": "add_contact",
"description": "添加联系人",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "联系人姓名"
},
"address": {
"type": "string",
"description": "联系人地址"
},
"tel": {
"type": "string",
"description": "联系人电话"
},
}
}
}
}],
)
return response.choices[0].message
prompt = "帮我寄给王卓然,地址是北京市朝阳区亮马桥外交办公大楼,电话13012345678。"
messages = [
{"role": "system", "content": "你是一个联系人录入员。"},
{"role": "user", "content": prompt}
]
response = get_completion(messages)
print("====GPT回复====")
print_json(response)
args = json.loads(response.tool_calls[0].function.arguments)
print("====函数参数====")
print_json(args)
- 答复
4)示例 4:通过 Function Calling 查询数据库
- 需求
需求:从订单表中查询各种信息,比如某个用户的订单数量、某个商品的销量、某个用户的消费总额等等。 - 代码
def get_sql_completion(messages, model="gpt-3.5-turbo"):
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
tools=[{ # 摘自 OpenAI 官方示例 https://github.com/openai/openai-cookbook/blob/main/examples/How_to_call_functions_with_chat_models.ipynb
"type": "function",
"function": {
"name": "ask_database",
"description": "Use this function to answer user questions about business. \
Output should be a fully formed SQL query.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": f"""
SQL query extracting info to answer the user's question.
SQL should be written using this database schema:
{database_schema_string}
The query should be returned in plain text, not in JSON.
The query should only contain grammars supported by SQLite.
""",
}
},
"required": ["query"],
}
}
}],
)
return response.choices[0].message
--------------------------------------
# 描述数据库表结构
database_schema_string = """
CREATE TABLE orders (
id INT PRIMARY KEY NOT NULL, -- 主键,不允许为空
customer_id INT NOT NULL, -- 客户ID,不允许为空
product_id STR NOT NULL, -- 产品ID,不允许为空
price DECIMAL(10,2) NOT NULL, -- 价格,不允许为空
status INT NOT NULL, -- 订单状态,整数类型,不允许为空。0代表待支付,1代表已支付,2代表已退款
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- 创建时间,默认为当前时间
pay_time TIMESTAMP -- 支付时间,可以为空
);
"""
---------------------------------------------
import sqlite3
# 创建数据库连接
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
# 创建orders表
cursor.execute(database_schema_string)
# 插入5条明确的模拟记录
mock_data = [
(1, 1001, 'TSHIRT_1', 50.00, 0, '2023-10-12 10:00:00', None),
(2, 1001, 'TSHIRT_2', 75.50, 1, '2023-10-16 11:00:00', '2023-08-16 12:00:00'),
(3, 1002, 'SHOES_X2', 25.25, 2, '2023-10-17 12:30:00', '2023-08-17 13:00:00'),
(4, 1003, 'HAT_Z112', 60.75, 1, '2023-10-20 14:00:00', '2023-08-20 15:00:00'),
(5, 1002, 'WATCH_X001', 90.00, 0, '2023-10-28 16:00:00', None)
]
for record in mock_data:
cursor.execute('''
INSERT INTO orders (id, customer_id, product_id, price, status, create_time, pay_time)
VALUES (?, ?, ?, ?, ?, ?, ?)
''', record)
# 提交事务
conn.commit()
----------------------------------------------
def ask_database(query):
cursor.execute(query)
records = cursor.fetchall()
return records
prompt = "10月的销售额"
# prompt = "统计每月每件商品的销售额"
# prompt = "哪个用户消费最高?消费多少?"
messages = [
{"role": "system", "content": "基于 order 表回答用户问题"},
{"role": "user", "content": prompt}
]
response = get_sql_completion(messages)
if response.content is None:
response.content = ""
messages.append(response)
print("====Function Calling====")
print_json(response)
if response.tool_calls is not None:
tool_call = response.tool_calls[0]
if tool_call.function.name == "ask_database":
arguments = tool_call.function.arguments
args = json.loads(arguments)
print("====SQL====")
print(args["query"])
result = ask_database(args["query"])
print("====DB Records====")
print(result)
messages.append({
"tool_call_id": tool_call.id,
"role": "tool",
"name": "ask_database",
"content": str(result)
})
response = get_sql_completion(messages)
print("====最终回复====")
print(response.content)
- 返回结果
====Function Calling====
{
"content": "",
"role": "assistant",
"function_call": null,
"tool_calls": [
{
"id": "call_6K8wpQeTWXg3ka1G1x0nbERl",
"function": {
"arguments": "{\n \"query\": \"SELECT SUM(price) FROM orders WHERE strftime('%m', create_time) = '10' AND status = 1\"\n}",
"name": "ask_database"
},
"type": "function"
}
]
}
====SQL====
SELECT SUM(price) FROM orders WHERE strftime('%m', create_time) = '10' AND status = 1
====DB Records====
[(136.25,)]
====最终回复====
10月的销售额为136.25。
5)示例 5:用 Function Calling 实现多表查询
- 代码
# 描述数据库表结构
database_schema_string = """
CREATE TABLE customers (
id INT PRIMARY KEY NOT NULL, -- 主键,不允许为空
customer_name VARCHAR(255) NOT NULL, -- 客户名,不允许为空
email VARCHAR(255) UNIQUE, -- 邮箱,唯一
register_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP -- 注册时间,默认为当前时间
);
CREATE TABLE products (
id INT PRIMARY KEY NOT NULL, -- 主键,不允许为空
product_name VARCHAR(255) NOT NULL, -- 产品名称,不允许为空
price DECIMAL(10,2) NOT NULL -- 价格,不允许为空
);
CREATE TABLE orders (
id INT PRIMARY KEY NOT NULL, -- 主键,不允许为空
customer_id INT NOT NULL, -- 客户ID,不允许为空
product_id INT NOT NULL, -- 产品ID,不允许为空
price DECIMAL(10,2) NOT NULL, -- 价格,不允许为空
status INT NOT NULL, -- 订单状态,整数类型,不允许为空。0代表待支付,1代表已支付,2代表已退款
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- 创建时间,默认为当前时间
pay_time TIMESTAMP -- 支付时间,可以为空
);
"""
prompt = "统计每月每件商品的销售额"
# prompt = "这星期消费最高的用户是谁?他买了哪些商品? 每件商品买了几件?花费多少?"
messages = [
{"role": "system", "content": "基于 order 表回答用户问题"},
{"role": "user", "content": prompt}
]
response = get_sql_completion(messages)
print(response.tool_calls[0].function.arguments)
- 返回
{
"query": "SELECT strftime('%Y-%m', create_time) AS month, product_name, SUM(price) AS total_sales FROM orders JOIN products ON orders.product_id = products.id WHERE status = 1 GROUP BY month, product_name ORDER BY month, product_name"
}
6)示例 6:Stream 模式
- 注意
流式(stream)输出不会一次返回完整 JSON 结构,所以需要拼接后再使用。 - 代码
def get_completion(messages, model="gpt-3.5-turbo"):
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
tools=[{
"type": "function",
"function": {
"name": "sum",
"description": "计算一组数的加和",
"parameters": {
"type": "object",
"properties": {
"numbers": {
"type": "array",
"items": {
"type": "number"
}
}
}
}
}
}],
stream=True, # 启动流式输出
)
return response
prompt = "1+2+3"
# prompt = "你是谁"
messages = [
{"role": "system", "content": "你是一个小学数学老师,你要教学生加法"},
{"role": "user", "content": prompt}
]
response = get_completion(messages)
function_name, args, text = "", "", ""
print("====Streaming====")
# 需要把 stream 里的 token 拼起来,才能得到完整的 call
for msg in response:
delta = msg.choices[0].delta
if delta.tool_calls:
if not function_name:
function_name = delta.tool_calls[0].function.name
args_delta = delta.tool_calls[0].function.arguments
print(args_delta) # 打印每次得到的数据
args = args + args_delta
elif delta.content:
text_delta = delta.content
print(text_delta)
text = text + text_delta
print("====done!====")
if function_name or args:
print(function_name)
print_json(args)
if text:
print(text)
- 返回
====Streaming====
{
"
numbers
":
[
1
,
2
,
3
]
}
====done!====
sum
{
"numbers": [1, 2, 3]
}
三、Function Calling的注释事项
1、只有 gpt-3.5-turbo-1106 和 gpt-4-1106-preview 可用本次课介绍的方法
2、gpt-3.5-turbo 是 gpt-3.5-turbo-1106 的别名
3、gpt-4 和 gpt-4-1106-preview 是两个不同的模型
4、OpenAI 针对 Function Calling 做了 fine-tuning,以尽可能保证函数调用参数的正确。
4、函数声明是消耗 token 的。要在功能覆盖、省钱、节约上下文窗口之间找到最佳平衡
Function Calling 不仅可以调用读函数,也能调用写函数。但官方强烈建议,在写之前,一定要有人做确认
四、支持 Function Calling 的国产大模型
1、百度文心大模型
官方文档:https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html
2、MiniMax
官方文档:https://api.minimax.chat/document/guides/chat-pro?id=64b79fa3e74cddc5215939f4
- 这是个公众不大知道,但其实挺强的大模型,尤其角色扮演能力
- 如果你曾经在一个叫 Glow 的 app 流连忘返,那么你已经用过它了
- 应该是最早支持 Function Calling 的国产大模型
- Function Calling 的 API 和 OpenAI 1106 版之前完全一样,但其它 API 有很大的特色
3、ChatGLM3-6B
官方文档:https://github.com/THUDM/ChatGLM3/blob/main/tool_using/README.md
- 最著名的国产开源大模型,生态最好
- 早就使用
tools
而不是function
来做参数,其它和 OpenAI 1106 版之前完全一样
4、讯飞星火 3.0
官方文档:https://www.xfyun.cn/doc/spark/Web.html#_2-function-call%E8%AF%B4%E6%98%8E
和 OpenAI 1106 版之前完全一样