카테고리 없음

parquet 뷰어

유키공 2025. 4. 29. 14:58
pip install pyarrow pandas PyQt5==5.15.4 PyQt5-sip==12.8.1 PyQt5==5.15.2
pyinstaller --onefile --hidden-import=fastparquet --noconsole parquet_redy.py
a = Analysis(
    ['parquet_viewer.py'],
    pathex=[],
    binaries=[],
    datas=[],
    hiddenimports=[
        'fastparquet',
        'fastparquet.speedups',  # fastparquet의 C 확장 모듈
        'pandas',
        'pyarrow'
    ],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
)
import sys
import pandas as pd
from PyQt5.QtWidgets import (
    QApplication, QMainWindow, QTableView, QFileDialog,
    QVBoxLayout, QWidget, QPushButton, QLabel,
    QStatusBar, QMessageBox
)
from PyQt5.QtCore import Qt, QAbstractTableModel

class PandasModel(QAbstractTableModel):
    """Pandas DataFrame을 QTableView에 표시하기 위한 모델"""
    def __init__(self, data):
        QAbstractTableModel.__init__(self)
        self._data = data

    def rowCount(self, parent=None):
        return self._data.shape[0]

    def columnCount(self, parent=None):
        return self._data.shape[1]

    def data(self, index, role=Qt.DisplayRole):
        if index.isValid():
            if role == Qt.DisplayRole:
                return str(self._data.iloc[index.row(), index.column()])
        return None

    def headerData(self, section, orientation, role):
        if orientation == Qt.Horizontal and role == Qt.DisplayRole:
            return self._data.columns[section]
        if orientation == Qt.Vertical and role == Qt.DisplayRole:
            return str(self._data.index[section])
        return None

class ParquetViewer(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Parquet File Viewer (PyArrow Only)")
        self.setGeometry(100, 100, 1000, 800)
        
        # 메인 위젯과 레이아웃 설정
        self.main_widget = QWidget()
        self.setCentralWidget(self.main_widget)
        self.layout = QVBoxLayout(self.main_widget)
        
        # 파일 열기 버튼
        self.open_button = QPushButton("Open Parquet File")
        self.open_button.clicked.connect(self.open_file)
        self.layout.addWidget(self.open_button)
        
        # 파일 정보 표시 레이블
        self.file_info_label = QLabel("No file loaded")
        self.file_info_label.setStyleSheet("font-weight: bold; color: #333;")
        self.layout.addWidget(self.file_info_label)
        
        # 테이블 뷰
        self.table_view = QTableView()
        self.table_view.setStyleSheet("QTableView { font-size: 10pt; }")
        self.layout.addWidget(self.table_view)
        
        # 상태 표시줄
        self.status_bar = QStatusBar()
        self.setStatusBar(self.status_bar)
        
        # 초기 데이터
        self.df = pd.DataFrame()
        
    def open_file(self):
        """파일 다이얼로그를 열고 선택한 Parquet 파일을 로드"""
        options = QFileDialog.Options()
        file_name, _ = QFileDialog.getOpenFileName(
            self, "Open Parquet File", "", 
            "Parquet Files (*.parquet);;All Files (*)", 
            options=options)
        
        if file_name:
            try:
                # PyArrow 엔진으로 명시적 지정
                self.df = pd.read_parquet(file_name, engine='pyarrow')
                
                # 딕셔너리/리스트 타입 컬럼 처리
                for col in self.df.columns:
                    if self.df[col].apply(lambda x: isinstance(x, (dict, list))).any():
                        self.df[col] = self.df[col].astype(str)
                
                # 모델 설정
                model = PandasModel(self.df)
                self.table_view.setModel(model)
                self.table_view.resizeColumnsToContents()
                
                # 파일 정보 업데이트
                self.file_info_label.setText(
                    f"File: {file_name.split('/')[-1]} | "
                    f"Rows: {len(self.df):,} | "
                    f"Columns: {len(self.df.columns)} | "
                    f"Engine: PyArrow")
                
                self.status_bar.showMessage("File loaded successfully", 3000)
                
            except Exception as e:
                error_msg = f"Error: {str(e)}\n\nRequired: pip install pyarrow"
                QMessageBox.critical(self, "Load Error", error_msg)
                self.status_bar.showMessage("Error: Install pyarrow first", 5000)

if __name__ == "__main__":
    app = QApplication(sys.argv)
    app.setStyle('Fusion')
    
    # 폰트 설정
    font = app.font()
    font.setPointSize(10)
    app.setFont(font)
    
    viewer = ParquetViewer()
    viewer.show()
    sys.exit(app.exec_())