简介

用爬虫获取目标网站数据后可能会遇见时间为处理刚刚,分钟,小时,天前等时间格式,如图
Python处理刚刚,分钟,小时,天前等时间-LMLPHP

解决问题:

写了一个工具类来处理该问题,其中封装了两个函数

1. 将时间中的中文数字转换成阿拉伯数字

    def exchange_num(date):
        '''
        将输入字符串中的中文数字转换成阿拉伯数字
        '''
        zh_num = '零一二三四五六七八九'
        for num in range(len(zh_num)):
            date = date.replace(zh_num[num], str(num))
        return date

2.将时间按照刚刚,分钟,小时,天前等时间格式进行判定跟转换对应时间

    def deal_time(zh_time):
        '''
        对时间进行分类处理,对不同情况进行判断
        '''
        zh_time = exchange_num(zh_time.strip())
        time_list = [
            "小时前",
            "分钟前",
            "刚刚",
            "天前"
        ]
        now_time = datetime.strftime(datetime.today(), '%Y-%m-%d')
        time_key_dict = {}
        [time_key_dict.update({key: operator.contains(zh_time, key)}) for key in time_list]
        if time_key_dict['刚刚'] or time_key_dict['分钟前']:
            return now_time
        elif time_key_dict['小时前']:
            now_hour = datetime.strftime(datetime.today(), "%H")
            pattern = re.search(r"(\d+)", zh_time)
            if pattern.group(1) <= now_hour:
                # 小于24小时转换成当日时间
                return now_time
            else:
                return datetime.strftime(datetime.today() + timedelta(days=-1), '%Y-%m-%d')  # 获取前1天日期

        elif time_key_dict['天前']:
            try:
                pattern = re.search(r"(\d+)", zh_time)
                zh_time = datetime.strftime(datetime.today() + timedelta(days=(int(pattern.group(1))) * -1),'%Y-%m-%d')
                return zh_time
            except Exception:
                return f"转换失败 当前时间{now_time},网站中的时间{zh_time}"
        else:
            return zh_time

完整代码如下

import operator
from datetime import datetime, timedelta
import re


class DealTime:
    '''
    处理刚刚,分钟,小时,天前等时间
    '''

    def exchange_num(self, date):
        '''
        将输入字符串中的中文数字转换成阿拉伯数字
        '''
        zh_num = '零一二三四五六七八九'
        for num in range(len(zh_num)):
            date = date.replace(zh_num[num], str(num))
        return date

    def deal_time(self, zh_time):
        '''
        对时间进行分类处理,对不同情况进行判断
        '''
        zh_time = self.exchange_num(zh_time.strip())
        time_list = [
            "小时前",
            "分钟前",
            "刚刚",
            "天前"
        ]
        now_time = datetime.strftime(datetime.today(), '%Y-%m-%d')
        time_key_dict = {}
        [time_key_dict.update({key: operator.contains(zh_time, key)}) for key in time_list]
        if time_key_dict['刚刚'] or time_key_dict['分钟前']:
            return now_time
        elif time_key_dict['小时前']:
            now_hour = datetime.strftime(datetime.today(), "%H")
            pattern = re.search(r"(\d+)", zh_time)
            if pattern.group(1) <= now_hour:
                # 小于24小时转换成当日时间
                return now_time
            else:
                return datetime.strftime(datetime.today() + timedelta(days=-1), '%Y-%m-%d')  # 获取前1天日期

        elif time_key_dict['天前']:
            try:
                pattern = re.search(r"(\d+)", zh_time)
                zh_time = datetime.strftime(datetime.today() + timedelta(days=(int(pattern.group(1))) * -1), '%Y-%m-%d')
                return zh_time
            except Exception:
                return f"转换失败 当前时间{now_time},网站中的时间{zh_time}"
        else:
            return zh_time


if __name__ == '__main__':
    print(DealTime().deal_time("刚刚"))
    print(DealTime().deal_time("一天前"))
    print(DealTime().deal_time("1天前"))
    print(DealTime().deal_time("5分钟前"))

11-02 16:52