5-DB_Growth_Prediction.r
# -----------------------------------------------------------------------------------# File Name : 5-DB_Growth_Prediction.r# Author : Pierre Montbleau# Description : Database Growth Prediction and Forecasting Script# Purpose : Extends database sizing analysis with predictive capabilities # using time series forecasting and ARIMA modeling# Call Syntax : source("F:\\DBA\\Scripts\\R\\5-DB_Growth_Prediction.r")# Parameters : days_back (default=90) - Historical data period in days# forecast_days (default=30) - Future prediction period in days# output_file_name (optional) - Custom output PDF filename# Last Modified: 2024-01-15# -----------------------------------------------------------------------------------
# Database Size Prediction Script# Extends the existing DB_Sizing_Report.r to add predictive capabilities
# -----------------------------------------------------------------------------------# Load necessary libraries (adding forecast package)# -----------------------------------------------------------------------------------library(ggplot2)library(dplyr)library(DBI)library(odbc)library(forecast) # For time series forecastinglibrary(gridExtra) # For arranging multiple plots
# -----------------------------------------------------------------------------------# Function to get database sizing data (same as original)# -----------------------------------------------------------------------------------get_database_sizing_data <- function(days_back) { # Connect to the database conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]") # Build and execute the query query <- paste0(" SELECT REPORT_ID, DB_NAME, DB_SIZE, CHECK_DATE FROM ATTSTATS.DATABASE_SIZING WHERE trunc(check_date) > trunc(sysdate-", days_back, ") ORDER BY CHECK_DATE") # Execute the query and fetch results result <- dbGetQuery(conn, query) # Disconnect from the database dbDisconnect(conn) return(result)}
# -----------------------------------------------------------------------------------# Function to create predictive visualization# -----------------------------------------------------------------------------------create_prediction_view <- function(days_back = 90, forecast_days = 30, output_file_name = "F:/DBA/Scripts/R/Reports/database_prediction_report.pdf") { # Get the historical data data <- get_database_sizing_data(days_back) # Convert CHECK_DATE to datetime format data$CHECK_DATE <- as.POSIXct(data$CHECK_DATE, format = "%Y/%m/%d %H:%M:%S") # Create daily summary data_summary <- data %>% group_by(date = as.Date(CHECK_DATE)) %>% summarize( avg_db_size = mean(DB_SIZE, na.rm = TRUE) ) %>% arrange(date) # Create time series object db_ts <- ts(data_summary$avg_db_size, frequency = 1) # Fit forecasting model (using auto.arima for automatic model selection) fit <- auto.arima(db_ts) # Forecast future values forecast_values <- forecast(fit, h = forecast_days) # Create data frame for plotting forecast_df <- data.frame( date = seq(max(data_summary$date) + 1, by = "day", length.out = forecast_days), forecast = as.numeric(forecast_values$mean), lower_80 = as.numeric(forecast_values$lower[,1]), upper_80 = as.numeric(forecast_values$upper[,1]), lower_95 = as.numeric(forecast_values$lower[,2]), upper_95 = as.numeric(forecast_values$upper[,2]) ) # Combine historical and forecast data plot_data <- data_summary %>% mutate(type = "Historical") %>% bind_rows( forecast_df %>% select(date, avg_db_size = forecast) %>% mutate(type = "Forecast") ) # Calculate growth rate statistics growth_rates <- diff(data_summary$avg_db_size) / data_summary$avg_db_size[-nrow(data_summary)] avg_growth_rate <- mean(growth_rates, na.rm = TRUE) max_growth_rate <- max(growth_rates, na.rm = TRUE) min_growth_rate <- min(growth_rates, na.rm = TRUE) # Plot 1: Historical and Forecasted Database Size p1 <- ggplot() + geom_line(data = data_summary, aes(x = date, y = avg_db_size, color = "Historical")) + geom_line(data = forecast_df, aes(x = date, y = forecast, color = "Forecast")) + geom_ribbon(data = forecast_df, aes(x = date, ymin = lower_80, ymax = upper_80), fill = "blue", alpha = 0.2) + geom_ribbon(data = forecast_df, aes(x = date, ymin = lower_95, ymax = upper_95), fill = "blue", alpha = 0.1) + labs(title = paste("Database Size Forecast for Next", forecast_days, "Days"), x = "Date", y = "Database Size (MB)", color = "Data Type") + scale_color_manual(values = c("Historical" = "blue", "Forecast" = "red")) + theme_minimal() + theme(legend.position = "bottom") # Plot 2: Growth Rate Analysis p2 <- ggplot(data.frame(growth_rate = growth_rates), aes(x = growth_rate)) + geom_histogram(fill = "steelblue", bins = 15, alpha = 0.7) + geom_vline(xintercept = avg_growth_rate, color = "red", linetype = "dashed") + annotate("text", x = avg_growth_rate, y = Inf, label = paste("Avg:", round(avg_growth_rate*100, 2), "%"), vjust = 2, hjust = -0.1, color = "red") + labs(title = "Historical Daily Growth Rate Distribution", x = "Daily Growth Rate", y = "Frequency") + theme_minimal() # Create summary text summary_text <- paste( "Database Growth Prediction Summary\n", "---------------------------------\n", "Historical Data Period: ", min(data_summary$date), " to ", max(data_summary$date), "\n", "Forecast Period: ", min(forecast_df$date), " to ", max(forecast_df$date), "\n", "Current Database Size: ", round(last(data_summary$avg_db_size), 2), " MB\n", "Predicted Size in ", forecast_days, " days: ", round(last(forecast_df$forecast), 2), " MB\n", "Predicted Growth: ", round(last(forecast_df$forecast) - last(data_summary$avg_db_size), 2), " MB\n", "Average Daily Growth Rate: ", round(avg_growth_rate * 100, 2), "%\n", "Maximum Daily Growth Rate: ", round(max_growth_rate * 100, 2), "%\n", "Minimum Daily Growth Rate: ", round(min_growth_rate * 100, 2), "%\n" ) # Save to PDF pdf(file = output_file_name, width = 10, height = 8) # Add summary text grid::grid.text(summary_text, x = 0.05, y = 0.9, just = "left", gp = grid::gpar(fontsize = 10, fontfamily = "mono")) # Arrange plots grid.arrange(p1, p2, ncol = 1) # Close PDF device dev.off() # Return results return(list( historical_data = data_summary, forecast_data = forecast_df, growth_stats = c( avg_growth_rate = avg_growth_rate, max_growth_rate = max_growth_rate, min_growth_rate = min_growth_rate ), plot_file = output_file_name ))}
# -----------------------------------------------------------------------------------# Main execution# -----------------------------------------------------------------------------------
# Create prediction view for next 30 days based on last 90 days of dataprediction_result <- create_prediction_view(days_back = 90, forecast_days = 30)
# Print where the report was savedcat("Prediction report saved to: ", prediction_result$plot_file, "\n")
# Open the PDF fileif (.Platform$OS.type == "windows") { shell.exec(prediction_result$plot_file) # For Windows} else { system(paste("open", shQuote(prediction_result$plot_file))) # For macOS # For Linux, use xdg-open # system(paste("xdg-open", shQuote(prediction_result$plot_file)))}
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------message("Database growth prediction script execution completed successfully.")# -----------------------------------------------------------------------------------
# Database Size Prediction Script# Extends the existing DB_Sizing_Report.r to add predictive capabilities
# -----------------------------------------------------------------------------------# Load necessary libraries (adding forecast package)# -----------------------------------------------------------------------------------library(ggplot2)library(dplyr)library(DBI)library(odbc)library(forecast) # For time series forecastinglibrary(gridExtra) # For arranging multiple plots
# -----------------------------------------------------------------------------------# Function to get database sizing data (same as original)# -----------------------------------------------------------------------------------get_database_sizing_data <- function(days_back) { # Connect to the database conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]") # Build and execute the query query <- paste0(" SELECT REPORT_ID, DB_NAME, DB_SIZE, CHECK_DATE FROM ATTSTATS.DATABASE_SIZING WHERE trunc(check_date) > trunc(sysdate-", days_back, ") ORDER BY CHECK_DATE") # Execute the query and fetch results result <- dbGetQuery(conn, query) # Disconnect from the database dbDisconnect(conn) return(result)}
# -----------------------------------------------------------------------------------# Function to create predictive visualization# -----------------------------------------------------------------------------------create_prediction_view <- function(days_back = 90, forecast_days = 30, output_file_name = "F:/DBA/Scripts/R/Reports/database_prediction_report.pdf") { # Get the historical data data <- get_database_sizing_data(days_back) # Convert CHECK_DATE to datetime format data$CHECK_DATE <- as.POSIXct(data$CHECK_DATE, format = "%Y/%m/%d %H:%M:%S") # Create daily summary data_summary <- data %>% group_by(date = as.Date(CHECK_DATE)) %>% summarize( avg_db_size = mean(DB_SIZE, na.rm = TRUE) ) %>% arrange(date) # Create time series object db_ts <- ts(data_summary$avg_db_size, frequency = 1) # Fit forecasting model (using auto.arima for automatic model selection) fit <- auto.arima(db_ts) # Forecast future values forecast_values <- forecast(fit, h = forecast_days) # Create data frame for plotting forecast_df <- data.frame( date = seq(max(data_summary$date) + 1, by = "day", length.out = forecast_days), forecast = as.numeric(forecast_values$mean), lower_80 = as.numeric(forecast_values$lower[,1]), upper_80 = as.numeric(forecast_values$upper[,1]), lower_95 = as.numeric(forecast_values$lower[,2]), upper_95 = as.numeric(forecast_values$upper[,2]) ) # Combine historical and forecast data plot_data <- data_summary %>% mutate(type = "Historical") %>% bind_rows( forecast_df %>% select(date, avg_db_size = forecast) %>% mutate(type = "Forecast") ) # Calculate growth rate statistics growth_rates <- diff(data_summary$avg_db_size) / data_summary$avg_db_size[-nrow(data_summary)] avg_growth_rate <- mean(growth_rates, na.rm = TRUE) max_growth_rate <- max(growth_rates, na.rm = TRUE) min_growth_rate <- min(growth_rates, na.rm = TRUE) # Plot 1: Historical and Forecasted Database Size p1 <- ggplot() + geom_line(data = data_summary, aes(x = date, y = avg_db_size, color = "Historical")) + geom_line(data = forecast_df, aes(x = date, y = forecast, color = "Forecast")) + geom_ribbon(data = forecast_df, aes(x = date, ymin = lower_80, ymax = upper_80), fill = "blue", alpha = 0.2) + geom_ribbon(data = forecast_df, aes(x = date, ymin = lower_95, ymax = upper_95), fill = "blue", alpha = 0.1) + labs(title = paste("Database Size Forecast for Next", forecast_days, "Days"), x = "Date", y = "Database Size (MB)", color = "Data Type") + scale_color_manual(values = c("Historical" = "blue", "Forecast" = "red")) + theme_minimal() + theme(legend.position = "bottom") # Plot 2: Growth Rate Analysis p2 <- ggplot(data.frame(growth_rate = growth_rates), aes(x = growth_rate)) + geom_histogram(fill = "steelblue", bins = 15, alpha = 0.7) + geom_vline(xintercept = avg_growth_rate, color = "red", linetype = "dashed") + annotate("text", x = avg_growth_rate, y = Inf, label = paste("Avg:", round(avg_growth_rate*100, 2), "%"), vjust = 2, hjust = -0.1, color = "red") + labs(title = "Historical Daily Growth Rate Distribution", x = "Daily Growth Rate", y = "Frequency") + theme_minimal() # Create summary text summary_text <- paste( "Database Growth Prediction Summary\n", "---------------------------------\n", "Historical Data Period: ", min(data_summary$date), " to ", max(data_summary$date), "\n", "Forecast Period: ", min(forecast_df$date), " to ", max(forecast_df$date), "\n", "Current Database Size: ", round(last(data_summary$avg_db_size), 2), " MB\n", "Predicted Size in ", forecast_days, " days: ", round(last(forecast_df$forecast), 2), " MB\n", "Predicted Growth: ", round(last(forecast_df$forecast) - last(data_summary$avg_db_size), 2), " MB\n", "Average Daily Growth Rate: ", round(avg_growth_rate * 100, 2), "%\n", "Maximum Daily Growth Rate: ", round(max_growth_rate * 100, 2), "%\n", "Minimum Daily Growth Rate: ", round(min_growth_rate * 100, 2), "%\n" ) # Save to PDF pdf(file = output_file_name, width = 10, height = 8) # Add summary text grid::grid.text(summary_text, x = 0.05, y = 0.9, just = "left", gp = grid::gpar(fontsize = 10, fontfamily = "mono")) # Arrange plots grid.arrange(p1, p2, ncol = 1) # Close PDF device dev.off() # Return results return(list( historical_data = data_summary, forecast_data = forecast_df, growth_stats = c( avg_growth_rate = avg_growth_rate, max_growth_rate = max_growth_rate, min_growth_rate = min_growth_rate ), plot_file = output_file_name ))}
# -----------------------------------------------------------------------------------# Main execution# -----------------------------------------------------------------------------------
# Create prediction view for next 30 days based on last 90 days of dataprediction_result <- create_prediction_view(days_back = 90, forecast_days = 30)
# Print where the report was savedcat("Prediction report saved to: ", prediction_result$plot_file, "\n")
# Open the PDF fileif (.Platform$OS.type == "windows") { shell.exec(prediction_result$plot_file) # For Windows} else { system(paste("open", shQuote(prediction_result$plot_file))) # For macOS # For Linux, use xdg-open # system(paste("xdg-open", shQuote(prediction_result$plot_file)))}
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------message("Database growth prediction script execution completed successfully.")# -----------------------------------------------------------------------------------