메모리 사용량을 상세히 분석

랭귀지/pandas
메모리 사용량을 상세히 분석

유키공 2025. 3. 28. 10:21
def analyze_memory_usage(df, warning_threshold=20):
    """
    [최종 개선사항]
    1. drop() 메서드 오류 해결
    2. 인덱스/컬럼 처리 강화
    3. 메모리 계산 최적화
    """
    try:
        # 1. 메모리 사용량 계산
        mem_usage = df.memory_usage(deep=True)
        total_memory = mem_usage.sum()
        
        # 2. 분석 결과 생성
        mem_analysis = pd.DataFrame({
            'Memory (MB)': (mem_usage / (1024**2)).round(2),
            'Percentage (%)': (mem_usage / total_memory * 100).round(2),
            'Dtype': df.dtypes,
            'Unique Values': df.nunique()
        }, index=mem_usage.index)
        
        # 3. 경고 컬럼 필터링
        high_mem_cols = [
            col for col in mem_analysis.index 
            if col in df.columns and 
            mem_analysis.loc[col, 'Percentage (%)'] > warning_threshold
        ]
        
        if high_mem_cols:
            print(f"⚠️ [경고] 다음 컬럼이 전체 메모리의 {warning_threshold}% 이상 사용:")
            for col in high_mem_cols:
                print(f"  - {col}: {mem_analysis.loc[col, 'Percentage (%)']}% (타입: {df[col].dtype})")
        
        # 4. 결과 출력 (drop() 대신 필요한 컬럼만 선택)
        print("\n🔍 메모리 사용량 분석:")
        display_cols = ['Memory (MB)', 'Percentage (%)', 'Dtype', 'Unique Values']
        print(mem_analysis[display_cols])
        
        # 5. 최적화 권장
        print("\n💡 최적화 권장:")
        for col in df.columns:
            dtype = str(df[col].dtype)
            nunique = df[col].nunique()
            if dtype == 'object':
                print(f"  - '{col}': 범주형 변환 (고유값 {nunique}개)")
            elif 'int' in dtype:
                print(f"  - '{col}': 정수형 다운캐스트 (현재: {dtype})")
            elif 'float' in dtype:
                print(f"  - '{col}': 실수형 다운캐스트 (현재: {dtype})")
                
        return mem_analysis
        
    except Exception as e:
        print(f"❌ 분석 실패: {str(e)}")
        print(f"인덱스 샘플: {df.index[:5].tolist()}")
        print(f"컬럼 목록: {df.columns.tolist()}")
        return None
저작자표시 (새창열림)