Source code for timeseries_compute.export_util

import pandas as pd
import datetime
import inspect
import os
import re
import json
import numpy as np
from typing import Any, Optional, Tuple

# Global step counter and debugging flag
export_data_mode = False  # Default to False, can be changed at runtime

_DATA_STEP_COUNTER = 0
_STATIC_TIMESTAMP = None  # set on first call


[docs] def export_data(data: Any, folder: str = "outputs", name: Optional[str] = None) -> Any: """ Save any data to a file with automatically incremented counter and inferred variable name. Returns the original data for piping operations. Only saves data if export_data_mode is True. Args: data: The data to save (DataFrame, dict, list, string, etc.) folder: Directory to save files in name: Optional explicit name to use instead of auto-detection Returns: The original data (for chaining) """ global _DATA_STEP_COUNTER global _STATIC_TIMESTAMP # If debugging mode is off, just return the data without saving if not export_data_mode: return data # Generate timestamp only on first call if _STATIC_TIMESTAMP is None: _STATIC_TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") _DATA_STEP_COUNTER += 1 # Create output folder if it doesn't exist os.makedirs(folder, exist_ok=True) # Get timestamp timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") # Get caller information script_name, line_number, variable_name = get_caller_info() # Determine file format based on data type if isinstance(data, pd.DataFrame): file_format = "csv" elif isinstance(data, (dict, list)): file_format = "json" elif isinstance(data, np.ndarray): file_format = "npy" elif isinstance(data, pd.Series): file_format = "json" elif hasattr(data, "tolist") and callable(getattr(data, "tolist", None)): file_format = "json" else: file_format = "txt" # Construct filename with appropriate extension # 20250502_203805--004--example_multivariate_garch#116--var=correlation_matrix.csv filename = ( f"{timestamp}" f"--{_DATA_STEP_COUNTER:03d}" f"--{script_name}" f"#{line_number}" f"--var={variable_name}" f".{file_format}" ) full_path = os.path.join(folder, filename) # Save the data in the appropriate format try: if file_format == "csv" and isinstance(data, pd.DataFrame): data.to_csv(full_path) elif file_format == "json": with open(full_path, "w") as f: if ( isinstance(data, (dict, list, int, float, str, bool)) or data is None ): json.dump(data, f, indent=2, default=str) else: # Try to convert to dict or list if possible try: if hasattr(data, "to_dict") and callable(getattr(data, "to_dict")) and not isinstance(data, np.ndarray): json.dump(data.to_dict(), f, indent=2, default=str) elif isinstance(data, pd.Series): # Handle pandas Series specifically json.dump(list(data), f, indent=2, default=str) elif isinstance(data, np.ndarray): # Handle numpy arrays specifically json.dump(data.tolist(), f, indent=2, default=str) elif hasattr(data, "tolist") and callable(getattr(data, "tolist", None)) and not isinstance(data, (pd.Series, pd.DataFrame)): # For other objects with callable tolist method (excluding pandas objects) json.dump(data.tolist(), f, indent=2, default=str) else: json.dump(str(data), f, indent=2) except Exception: json.dump(str(data), f, indent=2) elif file_format == "npy" and isinstance(data, np.ndarray): np.save(full_path, data) else: # txt or other formats with open(full_path, "w") as f: if isinstance(data, str): f.write(data) else: f.write(str(data)) print(f"Saved: {full_path}") except Exception as e: print(f"Error saving data: {e}") # Return the original data to allow for piping return data
[docs] def get_caller_info() -> Tuple[str, int, Optional[str]]: """ Extract information about the calling context. Returns: Tuple of (script_name, line_number, variable_name) """ frame = inspect.currentframe() if frame is None: return "unknown_script", 0, None caller_frame = frame.f_back if caller_frame is None: return "unknown_script", 0, None script_name = os.path.basename(caller_frame.f_code.co_filename).replace(".py", "") line_number = caller_frame.f_lineno # Try to extract variable name from code context variable_name = None try: frame_info = inspect.getframeinfo(caller_frame) if frame_info.code_context: context_lines = frame_info.code_context if context_lines and len(context_lines) > 0: # Look for variable assignment patterns line = context_lines[0].strip() if "export_data(" in line: # Extract variable name before export_data call if "=" in line and "export_data(" in line: var_part = line.split("export_data(")[0] if "=" in var_part: variable_name = var_part.split("=")[-1].strip() else: variable_name = var_part.strip() except Exception: # If we can't extract variable name, continue without it pass return script_name, line_number, variable_name
# Monkey patch DataFrame to add export_data method # Note: This approach avoids Pylance warnings about attribute assignment def _export_data_method(self, folder: str = "outputs", name: Optional[str] = None) -> pd.DataFrame: """Export DataFrame data using the export_data function.""" return export_data(self, folder, name) # Use setattr to avoid Pylance warnings about unknown attributes setattr(pd.DataFrame, 'export_data', _export_data_method)