Source code for tests.test_data_processor

#!/usr/bin/env python3
# tests/test_data_processor.py

import pytest
import logging
import numpy as np
import pandas as pd
import warnings
from timeseries_compute.data_processor import (
    MissingDataHandler,
    MissingDataHandlerFactory,
    test_stationarity,
    fill_data,
)
from unittest.mock import patch, MagicMock
from statsmodels.tsa.stattools import adfuller


# Add fixture for the test_stationarity function in the module
[docs] @pytest.fixture def df(): """Fixture to provide a DataFrame for testing stationarity.""" # Create a test DataFrame with some values return pd.DataFrame( {"A": [1.0, 2.0, 3.0, 4.0, 5.0], "B": [5.0, 6.0, 7.0, 8.0, 9.0]} )
[docs] @pytest.fixture def sample_data(): """Fixture to provide sample data for testing.""" data = { "A": [1.0, 2.0, None, 4.0, 5.0], "B": [None, 2.0, 3.0, None, 5.0], "C": [1.0, None, None, 4.0, 5.0], } return pd.DataFrame(data)
[docs] def test_create_handler_drop(): """Test the create_handler method of MissingDataHandlerFactory with 'drop' strategy.""" handler_func = MissingDataHandlerFactory.create_handler("drop") assert callable(handler_func) assert handler_func.__name__ == "drop_na"
[docs] def test_create_handler_forward_fill(): """Test the create_handler method of MissingDataHandlerFactory with 'forward_fill' strategy.""" handler_func = MissingDataHandlerFactory.create_handler("forward_fill") assert callable(handler_func) assert handler_func.__name__ == "forward_fill"
[docs] def test_create_handler_invalid_strategy(): """Test the create_handler method of MissingDataHandlerFactory with an invalid strategy.""" with pytest.raises(ValueError): MissingDataHandlerFactory.create_handler("invalid_strategy")
[docs] @pytest.fixture def sample_df(): """Create a sample DataFrame with NaN values for testing.""" return pd.DataFrame( { "A": [1.0, 2.0, np.nan, 4.0, 5.0], "B": [np.nan, 2.0, 3.0, np.nan, 5.0], "C": [1.0, 2.0, 3.0, 4.0, 5.0], } )
[docs] def test_drop_na(): """Test drop_na method removes rows with NaN values.""" # Create test data df = pd.DataFrame({"A": [1.0, 2.0, np.nan, 4.0], "B": [5.0, np.nan, 7.0, 8.0]}) # Create handler instance handler = MissingDataHandler() # Expected result expected = pd.DataFrame({"A": [1.0, 4.0], "B": [5.0, 8.0]}, index=[0, 3]) # Test method result = handler.drop_na(df) # Check equality pd.testing.assert_frame_equal(result, expected)
[docs] def test_forward_fill(sample_df): """Test forward_fill method correctly propagates values forward.""" # Create handler instance handler = MissingDataHandler() # Suppress the FutureWarning for the test with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) result = handler.forward_fill(sample_df) expected = pd.DataFrame( { "A": [1.0, 2.0, 2.0, 4.0, 5.0], "B": [np.nan, 2.0, 3.0, 3.0, 5.0], "C": [1.0, 2.0, 3.0, 4.0, 5.0], } ) pd.testing.assert_frame_equal(result, expected)
[docs] def test_fill_data(sample_df): """Test fill_data with various filling methods.""" # Test forward fill (default) # Suppress the FutureWarning for the test with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) result_forward = fill_data(sample_df, strategy="forward_fill") expected_forward = pd.DataFrame( { "A": [1.0, 2.0, 2.0, 4.0, 5.0], "B": [np.nan, 2.0, 3.0, 3.0, 5.0], "C": [1.0, 2.0, 3.0, 4.0, 5.0], } ) pd.testing.assert_frame_equal(result_forward, expected_forward) # Test drop with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) result_drop = fill_data(sample_df, strategy="drop") # The dataset has NaNs in rows 0, 2, and 3, so only rows 1 and 4 remain expected_drop = pd.DataFrame( {"A": [2.0, 5.0], "B": [2.0, 5.0], "C": [2.0, 5.0]}, index=[1, 4] ) pd.testing.assert_frame_equal(result_drop, expected_drop) # Test invalid method with pytest.raises(ValueError): fill_data(sample_df, strategy="invalid_strategy")
[docs] @patch("timeseries_compute.data_processor.adfuller") def test_ts_stationarity(mock_adfuller): """Test test_stationarity function properly uses adfuller test.""" # Mock adfuller return value (test statistic, p-value, lags, nobs, critical values, icbest) mock_adfuller.return_value = ( -3.5, 0.01, 1, 100, {"1%": -3.5, "5%": -2.9, "10%": -2.6}, 1, ) # Create test dataframe (not series) df = pd.DataFrame({"test_col": [1.0, 2.0, 3.0, 4.0, 5.0]}) # Call function result = test_stationarity(df) # Check mock was called with right args mock_adfuller.assert_called_once() # Check expected structure based on implementation assert isinstance(result, dict) assert "test_col" in result assert "ADF Statistic" in result["test_col"] assert "p-value" in result["test_col"] assert result["test_col"]["ADF Statistic"] == -3.5 assert result["test_col"]["p-value"] == 0.01
[docs] def test_stationarity_integration(): """Integration test for test_stationarity with real adfuller function.""" # Create stationary series np.random.seed(42) # Use more random data to avoid divide by zero in statsmodels stationary_series = np.random.randn(100) * 10 + 5 # scaled and shifted # Create non-stationary series (random walk) random_walk = np.cumsum(np.random.randn(100) * 2) # scaled random walk # Create a DataFrame for testing (since test_stationarity requires DataFrame) df = pd.DataFrame({"stationary": stationary_series, "non_stationary": random_walk}) # Suppress RuntimeWarning from statsmodels with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) # Call function with real adfuller adf_results = test_stationarity(df) # Check structure without returning assert isinstance(adf_results, dict) assert "stationary" in adf_results assert "non_stationary" in adf_results assert isinstance(adf_results["stationary"]["ADF Statistic"], float) assert isinstance(adf_results["stationary"]["p-value"], float) assert isinstance(adf_results["non_stationary"]["ADF Statistic"], float) assert isinstance(adf_results["non_stationary"]["p-value"], float)