카테고리 없음
parquet 뷰어
유키공
2025. 4. 29. 14:58
pip install pyarrow pandas PyQt5==5.15.4 PyQt5-sip==12.8.1 PyQt5==5.15.2
pyinstaller --onefile --hidden-import=fastparquet --noconsole parquet_redy.py
a = Analysis(
['parquet_viewer.py'],
pathex=[],
binaries=[],
datas=[],
hiddenimports=[
'fastparquet',
'fastparquet.speedups', # fastparquet의 C 확장 모듈
'pandas',
'pyarrow'
],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False,
)
import sys
import pandas as pd
from PyQt5.QtWidgets import (
QApplication, QMainWindow, QTableView, QFileDialog,
QVBoxLayout, QWidget, QPushButton, QLabel,
QStatusBar, QMessageBox
)
from PyQt5.QtCore import Qt, QAbstractTableModel
class PandasModel(QAbstractTableModel):
"""Pandas DataFrame을 QTableView에 표시하기 위한 모델"""
def __init__(self, data):
QAbstractTableModel.__init__(self)
self._data = data
def rowCount(self, parent=None):
return self._data.shape[0]
def columnCount(self, parent=None):
return self._data.shape[1]
def data(self, index, role=Qt.DisplayRole):
if index.isValid():
if role == Qt.DisplayRole:
return str(self._data.iloc[index.row(), index.column()])
return None
def headerData(self, section, orientation, role):
if orientation == Qt.Horizontal and role == Qt.DisplayRole:
return self._data.columns[section]
if orientation == Qt.Vertical and role == Qt.DisplayRole:
return str(self._data.index[section])
return None
class ParquetViewer(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Parquet File Viewer (PyArrow Only)")
self.setGeometry(100, 100, 1000, 800)
# 메인 위젯과 레이아웃 설정
self.main_widget = QWidget()
self.setCentralWidget(self.main_widget)
self.layout = QVBoxLayout(self.main_widget)
# 파일 열기 버튼
self.open_button = QPushButton("Open Parquet File")
self.open_button.clicked.connect(self.open_file)
self.layout.addWidget(self.open_button)
# 파일 정보 표시 레이블
self.file_info_label = QLabel("No file loaded")
self.file_info_label.setStyleSheet("font-weight: bold; color: #333;")
self.layout.addWidget(self.file_info_label)
# 테이블 뷰
self.table_view = QTableView()
self.table_view.setStyleSheet("QTableView { font-size: 10pt; }")
self.layout.addWidget(self.table_view)
# 상태 표시줄
self.status_bar = QStatusBar()
self.setStatusBar(self.status_bar)
# 초기 데이터
self.df = pd.DataFrame()
def open_file(self):
"""파일 다이얼로그를 열고 선택한 Parquet 파일을 로드"""
options = QFileDialog.Options()
file_name, _ = QFileDialog.getOpenFileName(
self, "Open Parquet File", "",
"Parquet Files (*.parquet);;All Files (*)",
options=options)
if file_name:
try:
# PyArrow 엔진으로 명시적 지정
self.df = pd.read_parquet(file_name, engine='pyarrow')
# 딕셔너리/리스트 타입 컬럼 처리
for col in self.df.columns:
if self.df[col].apply(lambda x: isinstance(x, (dict, list))).any():
self.df[col] = self.df[col].astype(str)
# 모델 설정
model = PandasModel(self.df)
self.table_view.setModel(model)
self.table_view.resizeColumnsToContents()
# 파일 정보 업데이트
self.file_info_label.setText(
f"File: {file_name.split('/')[-1]} | "
f"Rows: {len(self.df):,} | "
f"Columns: {len(self.df.columns)} | "
f"Engine: PyArrow")
self.status_bar.showMessage("File loaded successfully", 3000)
except Exception as e:
error_msg = f"Error: {str(e)}\n\nRequired: pip install pyarrow"
QMessageBox.critical(self, "Load Error", error_msg)
self.status_bar.showMessage("Error: Install pyarrow first", 5000)
if __name__ == "__main__":
app = QApplication(sys.argv)
app.setStyle('Fusion')
# 폰트 설정
font = app.font()
font.setPointSize(10)
app.setFont(font)
viewer = ParquetViewer()
viewer.show()
sys.exit(app.exec_())