我有10个CSV文件,每个CSV文件都有相同数量的列,我从这些列中以pandas数据框的形式逐个读取数据。我希望这些数据以窗口或类似的表格形式显示。而且应该是每次数据进入新行时。有什么建议吗?
下面是我的CSV文件示例:
像这样,有10个或更多的CSV文件,我将从这些文件中逐个读取数据,并希望显示在GUI中。
我的申请简介
我有一台机器在一段时间后将CSV文件生成到一个文件夹中。我正在使用Watchdog库来监视生成CSV文件的文件夹。当我收到一个CSV文件时,我将其读入pandas数据帧。上面给出了CSV文件示例。
只要机器正在运行,它就会继续生成CSV文件。因此,如果我想看到我需要打开每个CSV文件的数据,相反,我想要一个视图,当有一个新的CSV文件生成时,数据会在其中更新。
所以从技术上讲,一个CSV文件被读取,转换成一个数据帧,然后插入到某种表格视图中。当生成一个新的CSV文件时,这个过程再次发生,但是现在数据应该保存在同一个表视图的下一行中。
这是我的主要文件:
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
import pandas as pd
from Append_Function import append_df_to_excel
import os.path
import sys
class Watcher:
def __init__(self, args):
self.watch_dir = os.getcwd()
print(args[0])
self.directory_to_watch = os.path.join(self.watch_dir, args[1])
self.observer = Observer()
self.event_handler = Handler(patterns=["*.CSV"], ignore_patterns=["*.tmp"], ignore_directories=True)
def run(self):
self.observer.schedule(self.event_handler, self.directory_to_watch, recursive=False)
self.observer.start()
try:
while True:
time.sleep(1)
except:
self.observer.stop()
print("Error")
self.observer.join()
class Handler(PatternMatchingEventHandler):
@staticmethod
def on_any_event(event):
if event.is_directory:
return None
elif event.event_type == 'created':
# Take any action here when a file is first created.
print("Received created event - %s." % event.src_path)
df = pd.read_csv(event.src_path, header=1, index_col=0)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
elif event.event_type == 'modified':
# Taken any actionc here when a file is modified.
df = pd.read_csv(event.src_path, header=0, index_col=0)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
print("Received modified event - %s." % event.src_path)
if __name__ == '__main__':
print(sys.argv)
w = Watcher(sys.argv)
w.run()
这是我的Append函数:
import pandas as pd
import openpyxl as ox
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = ox.load_workbook(filename,keep_vba=True)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title: ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs, header=True)
# save the workbook
writer.save()
最佳答案
必须通过循环添加数据帧:
import pandas as pd
from PyQt5 import QtCore, QtWidgets
class DataFrameTableWidget(QtWidgets.QTableWidget):
def append_dataframe(self, df):
df = df.copy()
if df.columns.size > self.columnCount():
self.setColumnCount(df.columns.size)
r = self.rowCount()
self.insertRow(r)
for c, column in enumerate(df):
it = QtWidgets.QTableWidgetItem(column)
self.setItem(r, c, it)
i = self.rowCount()
for r, row in df.iterrows():
self.insertRow(self.rowCount())
for c, (column, value) in enumerate(row.iteritems()):
it = QtWidgets.QTableWidgetItem(str(value))
self.setItem(i+r , c, it)
if __name__ == '__main__':
import sys
app = QtWidgets.QApplication(sys.argv)
import numpy as np
w = DataFrameTableWidget()
df = pd.DataFrame(np.random.randint(0, 100,size=(4, 4)), columns=list('ABCD'))
w.append_dataframe(df)
def after_show():
df = pd.DataFrame(np.random.randint(0, 100,size=(4, 4)), columns=list('ABCD'))
w.append_dataframe(df)
QtCore.QTimer.singleShot(2*1000, after_show)
w.resize(640, 480)
w.show()
sys.exit(app.exec_())
更新:
观察器在另一个线程上运行,因此它无法从该线程更新GUI,因此必须使用信号来传输信息:
import os
import time
import pandas as pd
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
from PyQt5 import QtCore, QtWidgets
from Append_Function import append_df_to_excel
class Emitter(QtCore.QObject):
newDataFrameSignal = QtCore.pyqtSignal(pd.DataFrame)
class Watcher:
def __init__(self, filename):
self.watch_dir = os.getcwd()
self.directory_to_watch = os.path.join(self.watch_dir, filename)
self.emitter = Emitter()
self.observer = Observer()
self.event_handler = Handler(
emitter=self.emitter,
patterns=["*.CSV"],
ignore_patterns=["*.tmp"],
ignore_directories=True
)
def run(self):
self.observer.schedule(self.event_handler, self.directory_to_watch, recursive=False)
self.observer.start()
class Handler(PatternMatchingEventHandler):
def __init__(self, *args, emitter=None, **kwargs):
super(Handler, self).__init__(*args, **kwargs)
self._emitter = emitter
def on_any_event(self, event):
if event.is_directory:
return None
elif event.event_type == 'created':
# Take any action here when a file is first created.
print("Received created event - %s." % event.src_path)
df = pd.read_csv(event.src_path, header=1)
self._emitter.newDataFrameSignal.emit(df.copy())
df.set_index(df.columns.values.tolist()[0], inplace=True)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
elif event.event_type == 'modified':
# Taken any actionc here when a file is modified.
df = pd.read_csv(event.src_path, header=1)
self._emitter.newDataFrameSignal.emit(df.copy())
df.set_index(df.columns.values.tolist()[0], inplace=True)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
print("Received modified event - %s." % event.src_path)
class DataFrameTableWidget(QtWidgets.QTableWidget):
@QtCore.pyqtSlot(pd.DataFrame)
def append_dataframe(self, df):
df = df.copy()
if df.columns.size > self.columnCount():
self.setColumnCount(df.columns.size)
r = self.rowCount()
self.insertRow(r)
for c, column in enumerate(df):
it = QtWidgets.QTableWidgetItem(column)
self.setItem(r, c, it)
i = self.rowCount()
for r, row in df.iterrows():
self.insertRow(self.rowCount())
for c, (column, value) in enumerate(row.iteritems()):
it = QtWidgets.QTableWidgetItem(str(value))
self.setItem(i+r , c, it)
if __name__ == '__main__':
import sys
app = QtWidgets.QApplication(sys.argv)
w = DataFrameTableWidget()
w.resize(640, 480)
w.show()
watcher = Watcher(sys.argv[1])
watcher.run()
watcher.emitter.newDataFrameSignal.connect(w.append_dataframe)
sys.exit(app.exec_())
关于python - 用于打印 Pandas 数据框的GUI,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/55496704/