Notice
Recent Posts
Recent Comments
Link
아미(아름다운미소)
csv reader 본문
import sys
import os
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import json
from PyQt5.QtWidgets import (
QApplication, QMainWindow, QTableView, QFileDialog,
QVBoxLayout, QWidget, QPushButton, QLabel,
QStatusBar, QMessageBox, QLineEdit, QHBoxLayout,
QComboBox, QHeaderView, QProgressDialog
)
from PyQt5.QtCore import (
Qt, QAbstractTableModel, QSortFilterProxyModel,
QThread, pyqtSignal, QObject
)
class LoadWorker(QThread):
"""데이터 로딩 작업 스레드 (Dict 오류 처리 추가)"""
progress = pyqtSignal(int)
finished = pyqtSignal(pd.DataFrame)
error = pyqtSignal(str)
def __init__(self, file_path):
super().__init__()
self.file_path = file_path
def safe_json_dumps(self, obj):
"""Dict/List 타입을 안전하게 JSON 문자열로 변환"""
try:
if isinstance(obj, (dict, list)):
return json.dumps(obj, ensure_ascii=False)
return str(obj)
except:
return "[Conversion Error]"
def convert_complex_types(self, df):
"""DataFrame 내의 복합 타입(dict, list)을 문자열로 변환"""
for col in df.columns:
try:
# 첫 번째 행의 값으로 타입 체크
sample = df[col].iloc[0] if len(df) > 0 else None
if isinstance(sample, (dict, list)):
df[col] = df[col].apply(self.safe_json_dumps)
elif not pd.api.types.is_string_dtype(df[col]):
df[col] = df[col].astype(str)
except Exception as e:
print(f"컬럼 {col} 처리 오류: {e}")
df[col] = df[col].astype(str)
return df
def run(self):
try:
self.progress.emit(5)
if self.file_path.endswith('.parquet'):
# Parquet 파일 로드
parquet_file = pq.ParquetFile(self.file_path)
num_row_groups = parquet_file.num_row_groups
chunks = []
for i in range(num_row_groups):
self.progress.emit(10 + int((i+1)/num_row_groups*70))
table = parquet_file.read_row_group(i)
df = table.to_pandas()
df = self.convert_complex_types(df) # Dict/List 처리
chunks.append(df)
self.progress.emit(90)
result_df = pd.concat(chunks, ignore_index=True)
else:
# CSV 파일 로드
chunksize = 100000
chunks = []
total_rows = sum(1 for _ in open(self.file_path, 'r', encoding='utf-8')) - 1
processed_rows = 0
for chunk in pd.read_csv(self.file_path, chunksize=chunksize):
progress = 10 + int(processed_rows / total_rows * 70)
self.progress.emit(progress)
chunk = self.convert_complex_types(chunk) # Dict/List 처리
chunks.append(chunk)
processed_rows += len(chunk)
self.progress.emit(90)
result_df = pd.concat(chunks, ignore_index=True)
self.progress.emit(95)
result_df = result_df.fillna("") # NULL 값 처리
self.progress.emit(100)
self.finished.emit(result_df)
except Exception as e:
error_msg = f"로딩 실패: {str(e)}\n\n{traceback.format_exc()}"
self.error.emit(error_msg)
class DataFrameModel(QAbstractTableModel):
"""Dict 타입을 안전하게 처리하는 데이터 모델"""
def __init__(self, data):
super().__init__()
self._data = data
def rowCount(self, parent=None):
return len(self._data)
def columnCount(self, parent=None):
return len(self._data.columns)
def data(self, index, role=Qt.DisplayRole):
if not index.isValid():
return None
value = self._data.iloc[index.row(), index.column()]
if role == Qt.DisplayRole:
return str(value) if not pd.isna(value) else ""
elif role == Qt.BackgroundRole:
if isinstance(value, (dict, list)):
return QColor(240, 248, 255) # 복합 타입 배경색
return QColor(255, 255, 255)
return None
def headerData(self, section, orientation, role):
if role == Qt.DisplayRole:
if orientation == Qt.Horizontal:
return str(self._data.columns[section])
return str(self._data.index[section])
return None
class DataViewer(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Data Viewer with Dict Handling")
self.setGeometry(100, 100, 1200, 800)
self.setup_ui()
def setup_ui(self):
self.central_widget = QWidget()
self.setCentralWidget(self.central_widget)
layout = QVBoxLayout(self.central_widget)
# 컨트롤 패널
control_panel = QWidget()
control_layout = QHBoxLayout(control_panel)
self.btn_open = QPushButton("파일 열기")
self.btn_open.clicked.connect(self.open_file)
control_layout.addWidget(self.btn_open)
self.search_input = QLineEdit()
self.search_input.setPlaceholderText("검색어 입력")
control_layout.addWidget(self.search_input)
self.column_combo = QComboBox()
self.column_combo.addItem("모든 컬럼")
control_layout.addWidget(self.column_combo)
layout.addWidget(control_panel)
# 테이블 뷰
self.table_view = QTableView()
self.table_view.setSortingEnabled(True)
self.proxy_model = QSortFilterProxyModel()
self.proxy_model.setFilterCaseSensitivity(Qt.CaseInsensitive)
self.table_view.setModel(self.proxy_model)
layout.addWidget(self.table_view)
# 상태바
self.status_bar = QStatusBar()
self.setStatusBar(self.status_bar)
# 로딩 다이얼로그
self.progress_dialog = QProgressDialog("파일을 로드 중입니다...", "취소", 0, 100, self)
self.progress_dialog.setWindowModality(Qt.WindowModal)
self.progress_dialog.canceled.connect(self.cancel_loading)
def open_file(self):
file_path, _ = QFileDialog.getOpenFileName(
self, "파일 열기", "",
"데이터 파일 (*.parquet *.csv);;모든 파일 (*)")
if file_path:
self.load_data(file_path)
def load_data(self, file_path):
"""데이터 로드 및 진행률 표시"""
self.progress_dialog.reset()
self.progress_dialog.show()
self.btn_open.setEnabled(False)
self.load_worker = LoadWorker(file_path)
self.load_worker.progress.connect(self.update_progress)
self.load_worker.finished.connect(self.data_load_complete)
self.load_worker.error.connect(self.data_load_error)
self.load_worker.start()
def update_progress(self, value):
"""진행률 업데이트"""
self.progress_dialog.setValue(value)
def data_load_complete(self, df):
"""데이터 로드 완료 처리"""
self.progress_dialog.reset()
self.btn_open.setEnabled(True)
# 데이터 모델 설정
model = DataFrameModel(df)
self.proxy_model.setSourceModel(model)
# 컬럼 목록 업데이트
self.column_combo.clear()
self.column_combo.addItem("모든 컬럼")
self.column_combo.addItems(df.columns.tolist())
# 상태바 업데이트
file_size = os.path.getsize(self.load_worker.file_path) / (1024 * 1024) # MB 단위
self.status_bar.showMessage(
f"로드 완료: {len(df):,}행 | {len(df.columns)}열 | {file_size:.2f}MB | "
f"Dict/List 컬럼: {self.count_complex_columns(df)}개"
)
def count_complex_columns(self, df):
"""Dict/List 타입 컬럼 수 카운트"""
count = 0
for col in df.columns:
sample = df[col].iloc[0] if len(df) > 0 else None
if isinstance(sample, (dict, list)):
count += 1
return count
def data_load_error(self, error_msg):
"""데이터 로드 오류 처리"""
self.progress_dialog.reset()
self.btn_open.setEnabled(True)
QMessageBox.critical(self, "로드 오류", error_msg)
self.status_bar.showMessage("로드 실패")
def cancel_loading(self):
"""로딩 취소"""
if hasattr(self, 'load_worker') and self.load_worker.isRunning():
self.load_worker.terminate()
self.progress_dialog.reset()
self.btn_open.setEnabled(True)
self.status_bar.showMessage("로딩 취소됨")
if __name__ == "__main__":
app = QApplication(sys.argv)
app.setStyle('Fusion')
viewer = DataViewer()
viewer.show()
sys.exit(app.exec_())
Comments