3-DB_Tablespace_Growth_PDF-Report.r
# -----------------------------------------------------------------------------------# File Name : 3-DB_Tablespace_Growth_PDF-Report.r# Author : Pierre Montbleau# Description : Tablespace Growth Analysis and Forecasting with PDF Reporting# Purpose : Monitor tablespace usage, forecast capacity issues, # identify critical tablespaces, and generate PDF reports# Call Syntax : source("F:\\DBA\\Scripts\\R\\3-DB_Tablespace_Growth_PDF-Report.r")# Parameters : None - all parameters configured internally# Last Modified: 2024-01-15# -----------------------------------------------------------------------------------
# 3. Predicting Tablespace Growth and Capacity Issues - Enhanced Version
# -----------------------------------------------------------------------------------# Load required packages with error handling# -----------------------------------------------------------------------------------required_packages <- c("DBI", "odbc", "dplyr", "lubridate", "ggplot2", "forecast", "zoo", "tibble", "purrr")invisible(lapply(required_packages, function(pkg) { if (!require(pkg, character.only = TRUE)) { stop(paste("Package", pkg, "is required but not installed")) }}))
# -----------------------------------------------------------------------------------# Configuration# -----------------------------------------------------------------------------------output_dir <- "F:/DBA/Scripts/R/Reports"report_title <- "Tablespace Growth Analysis Report"forecast_days <- 365warning_threshold <- 0.9 # 90% capacity threshold for warnings
# -----------------------------------------------------------------------------------# Utility Functions# -----------------------------------------------------------------------------------
#' Create a timestamped filename with directory#' @param dir Directory path#' @param prefix File prefix#' @param extension File extension#' @return Full file pathcreate_output_filename <- function(dir, prefix, extension) { timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S") filename <- paste0(prefix, "_", timestamp, extension) file.path(dir, filename)}
# -----------------------------------------------------------------------------------#' Safely establish database connection#' @return Database connection objectestablish_connection <- function() { tryCatch({ conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]") message("Database connection established successfully") return(conn) }, error = function(e) { stop(paste("Database connection failed:", e$message)) })}
# -----------------------------------------------------------------------------------#' Fetch tablespace usage data with validation#' @param conn Database connection#' @return Data frame with tablespace datafetch_tablespace_data <- function(conn) { tryCatch({ query <- " SELECT DB_NAME, TABLESPACE_NAME, USED_SIZE, ALLOCATED_SIZE, MAX_SIZE, PERCENT_USED, CHECK_DATE FROM ATTSTATS.TABLESPACE_USAGE ORDER BY DB_NAME, TABLESPACE_NAME, CHECK_DATE " data <- dbGetQuery(conn, query) if (nrow(data) == 0) { stop("No data returned from TABLESPACE_USAGE table") } # Validate required columns required_cols <- c("DB_NAME", "TABLESPACE_NAME", "USED_SIZE", "MAX_SIZE", "CHECK_DATE") missing_cols <- setdiff(required_cols, colnames(data)) if (length(missing_cols) > 0) { stop(paste("Missing required columns:", paste(missing_cols, collapse=", "))) } message(paste("Retrieved", nrow(data), "records of tablespace data")) return(data) }, error = function(e) { stop(paste("Data retrieval failed:", e$message)) })}
# -----------------------------------------------------------------------------------#' Clean and prepare tablespace data for analysis#' @param raw_data Raw tablespace data#' @return Processed data framepreprocess_data <- function(raw_data) { tryCatch({ processed <- raw_data %>% mutate( CHECK_DATE = as.Date(CHECK_DATE), FREE_SPACE = MAX_SIZE - USED_SIZE ) %>% group_by(DB_NAME, TABLESPACE_NAME) %>% arrange(CHECK_DATE) %>% mutate( GROWTH_RATE = (USED_SIZE - lag(USED_SIZE)) / as.numeric(CHECK_DATE - lag(CHECK_DATE)), DAYS_TO_FULL = ifelse(GROWTH_RATE > 0, FREE_SPACE / GROWTH_RATE, Inf) ) %>% ungroup() return(processed) }, error = function(e) { stop(paste("Data preprocessing failed:", e$message)) })}
# -----------------------------------------------------------------------------------#' Generate growth forecast for a tablespace#' @param data Tablespace data#' @return Forecast resultsforecast_tablespace <- function(data) { # Return empty tibble if insufficient data if (nrow(data) < 3) { return(tibble( DB_NAME = character(), TABLESPACE_NAME = character(), LAST_DATE = as.Date(character()), LAST_SIZE = numeric(), FORECAST_DATE = as.Date(character()), FORECAST_SIZE = numeric(), UPPER_95 = numeric(), LOWER_95 = numeric() )) } tryCatch({ ts_data <- ts(data$USED_SIZE, frequency = 7) # Weekly seasonality fit <- tryCatch({ auto.arima(ts_data) }, error = function(e) { message(paste("ARIMA failed for", unique(data$TABLESPACE_NAME), "- using simple linear model")) lm(USED_SIZE ~ CHECK_DATE, data = data) }) if (inherits(fit, "lm")) { future_dates <- data.frame( CHECK_DATE = seq(max(data$CHECK_DATE), by = "day", length.out = forecast_days + 1)[-1] ) pred <- predict(fit, newdata = future_dates, interval = "confidence") forecast_values <- list( mean = pred[, "fit"], upper = cbind(NA, pred[, "upr"]), lower = cbind(NA, pred[, "lwr"]) ) } else { forecast_values <- forecast(fit, h = forecast_days) } tibble( DB_NAME = unique(data$DB_NAME), TABLESPACE_NAME = unique(data$TABLESPACE_NAME), LAST_DATE = max(data$CHECK_DATE), LAST_SIZE = last(data$USED_SIZE), FORECAST_DATE = max(data$CHECK_DATE) + days(1:forecast_days), FORECAST_SIZE = as.numeric(forecast_values$mean), UPPER_95 = as.numeric(ifelse(is.null(forecast_values$upper[,2]), NA, forecast_values$upper[,2])), LOWER_95 = as.numeric(ifelse(is.null(forecast_values$lower[,2]), NA, forecast_values$lower[,2])) ) }, error = function(e) { warning(paste("Forecasting failed for", unique(data$TABLESPACE_NAME), "with error:", e$message)) return(NULL) })}
# -----------------------------------------------------------------------------------#' Generate visualizations for critical tablespaces#' @param forecast_data Forecast results#' @param critical_data Critical tablespaces datagenerate_visualizations <- function(forecast_data, critical_data) { if (nrow(critical_data) == 0) return(NULL) # Ensure we have all required columns if (!all(c("DB_NAME", "TABLESPACE_NAME", "FORECAST_DATE", "FORECAST_SIZE", "LOWER_95", "UPPER_95", "MAX_SIZE") %in% names(critical_data))) { stop("Missing required columns in critical_data") } # Main forecast plot p1 <- ggplot(critical_data, aes(x = FORECAST_DATE, y = FORECAST_SIZE)) + geom_line(aes(color = TABLESPACE_NAME)) + geom_ribbon(aes(ymin = LOWER_95, ymax = UPPER_95, fill = TABLESPACE_NAME), alpha = 0.2) + geom_hline(aes(yintercept = MAX_SIZE), linetype = "dashed", color = "red") + facet_wrap(~DB_NAME, scales = "free_y") + labs(title = "Tablespace Growth Forecast vs Capacity", subtitle = "Red line indicates maximum capacity", x = "Date", y = "Size (MB)") + theme_minimal() + theme(legend.position = "bottom") # Prepare growth rate data properly growth_data <- critical_data %>% group_by(DB_NAME, TABLESPACE_NAME) %>% summarize( MAX_SIZE = first(MAX_SIZE), DAYS_TO_FULL = min(DAYS_TO_FULL, na.rm = TRUE), .groups = "drop" ) p2 <- ggplot(growth_data, aes(x = reorder(TABLESPACE_NAME, DAYS_TO_FULL), y = DAYS_TO_FULL)) + geom_bar(stat = "identity", aes(fill = DB_NAME)) + coord_flip() + labs(title = "Estimated Days Until Tablespace Full", x = "Tablespace", y = "Days") + theme_minimal() list(forecast_plot = p1, days_to_full_plot = p2)}
# -----------------------------------------------------------------------------------#' Generate the analysis report#' @param results Analysis resultsgenerate_report <- function(results) { # Create PDF file pdf_file <- create_output_filename(output_dir, "Tablespace_Growth_Report", ".pdf") pdf(pdf_file, width = 11, height = 8.5) on.exit(dev.off()) # Add title page grid::grid.newpage() grid::grid.text(report_title, gp = grid::gpar(fontsize = 20)) grid::grid.text(paste("Generated on:", Sys.time()), y = 0.9) # Add visualizations if available if (!is.null(results$plots)) { print(results$plots$forecast_plot) print(results$plots$days_to_full_plot) } # Add summary table if (!is.null(results$capacity_warnings)) { grid::grid.newpage() gridExtra::grid.table(results$capacity_warnings %>% select(DB_NAME, TABLESPACE_NAME, CURRENT_PERCENT, PROJECTED_FULL_DATE) %>% arrange(PROJECTED_FULL_DATE)) } message(paste("\nReport generated:", pdf_file)) return(pdf_file)}
# -----------------------------------------------------------------------------------# Main Analysis Function# -----------------------------------------------------------------------------------analyze_tablespace_growth <- function() { tryCatch({ # Establish connection conn <- establish_connection() on.exit(dbDisconnect(conn)) # Fetch and process data raw_data <- fetch_tablespace_data(conn) tablespace_data <- preprocess_data(raw_data) # Generate forecasts forecast_results <- tablespace_data %>% group_by(DB_NAME, TABLESPACE_NAME) %>% group_modify(~ forecast_tablespace(.x)) %>% ungroup() if (nrow(forecast_results) == 0) { stop("Insufficient data to generate forecasts") } # Join with max size data forecast_results <- forecast_results %>% left_join( tablespace_data %>% group_by(DB_NAME, TABLESPACE_NAME) %>% summarize(MAX_SIZE = first(MAX_SIZE), DAYS_TO_FULL = first(DAYS_TO_FULL), .groups = "drop"), by = c("DB_NAME", "TABLESPACE_NAME") ) # Identify critical tablespaces critical_tablespaces <- forecast_results %>% group_by(DB_NAME, TABLESPACE_NAME) %>% filter(any(FORECAST_SIZE > MAX_SIZE, na.rm = TRUE)) %>% ungroup() # Calculate capacity warnings capacity_warnings <- forecast_results %>% mutate(PERCENT_FULL = FORECAST_SIZE / MAX_SIZE * 100) %>% filter(PERCENT_FULL > warning_threshold * 100) %>% group_by(DB_NAME, TABLESPACE_NAME) %>% summarize( CURRENT_SIZE_MB = last(LAST_SIZE), MAX_SIZE_MB = first(MAX_SIZE), CURRENT_PERCENT = round(last(LAST_SIZE) / first(MAX_SIZE) * 100, 1), FIRST_WARNING_DATE = min(FORECAST_DATE), PROJECTED_FULL_DATE = FORECAST_DATE[which.min(abs(PERCENT_FULL - 100))], .groups = "drop" ) # Generate visualizations plots <- generate_visualizations(forecast_results, critical_tablespaces) # Generate report report_path <- generate_report(list( forecast_data = forecast_results, critical_tablespaces = if (nrow(critical_tablespaces) > 0) critical_tablespaces else NULL, capacity_warnings = if (nrow(capacity_warnings) > 0) capacity_warnings else NULL, plots = plots )) return(list( status = "Success", report_path = report_path, critical_count = nrow(capacity_warnings), warning_tablespaces = if (nrow(capacity_warnings) > 0) capacity_warnings else "None" )) }, error = function(e) { return(list( status = "Error", message = e$message )) })}
# -----------------------------------------------------------------------------------# Execute Analysis# -----------------------------------------------------------------------------------if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE)}
analysis_results <- analyze_tablespace_growth()
# -----------------------------------------------------------------------------------# Print summary to console# -----------------------------------------------------------------------------------if (analysis_results$status == "Success") { message(paste("\nAnalysis completed successfully")) message(paste("Critical tablespaces found:", analysis_results$critical_count)) message(paste("Report saved to:", analysis_results$report_path))} else { message(paste("\nAnalysis failed:", analysis_results$message))}
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------message("Tablespace growth analysis script execution completed successfully.")# -----------------------------------------------------------------------------------
# 3. Predicting Tablespace Growth and Capacity Issues - Enhanced Version
# -----------------------------------------------------------------------------------# Load required packages with error handling# -----------------------------------------------------------------------------------required_packages <- c("DBI", "odbc", "dplyr", "lubridate", "ggplot2", "forecast", "zoo", "tibble", "purrr")invisible(lapply(required_packages, function(pkg) { if (!require(pkg, character.only = TRUE)) { stop(paste("Package", pkg, "is required but not installed")) }}))
# -----------------------------------------------------------------------------------# Configuration# -----------------------------------------------------------------------------------output_dir <- "F:/DBA/Scripts/R/Reports"report_title <- "Tablespace Growth Analysis Report"forecast_days <- 365warning_threshold <- 0.9 # 90% capacity threshold for warnings
# -----------------------------------------------------------------------------------# Utility Functions# -----------------------------------------------------------------------------------
#' Create a timestamped filename with directory#' @param dir Directory path#' @param prefix File prefix#' @param extension File extension#' @return Full file pathcreate_output_filename <- function(dir, prefix, extension) { timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S") filename <- paste0(prefix, "_", timestamp, extension) file.path(dir, filename)}
# -----------------------------------------------------------------------------------#' Safely establish database connection#' @return Database connection objectestablish_connection <- function() { tryCatch({ conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]") message("Database connection established successfully") return(conn) }, error = function(e) { stop(paste("Database connection failed:", e$message)) })}
# -----------------------------------------------------------------------------------#' Fetch tablespace usage data with validation#' @param conn Database connection#' @return Data frame with tablespace datafetch_tablespace_data <- function(conn) { tryCatch({ query <- " SELECT DB_NAME, TABLESPACE_NAME, USED_SIZE, ALLOCATED_SIZE, MAX_SIZE, PERCENT_USED, CHECK_DATE FROM ATTSTATS.TABLESPACE_USAGE ORDER BY DB_NAME, TABLESPACE_NAME, CHECK_DATE " data <- dbGetQuery(conn, query) if (nrow(data) == 0) { stop("No data returned from TABLESPACE_USAGE table") } # Validate required columns required_cols <- c("DB_NAME", "TABLESPACE_NAME", "USED_SIZE", "MAX_SIZE", "CHECK_DATE") missing_cols <- setdiff(required_cols, colnames(data)) if (length(missing_cols) > 0) { stop(paste("Missing required columns:", paste(missing_cols, collapse=", "))) } message(paste("Retrieved", nrow(data), "records of tablespace data")) return(data) }, error = function(e) { stop(paste("Data retrieval failed:", e$message)) })}
# -----------------------------------------------------------------------------------#' Clean and prepare tablespace data for analysis#' @param raw_data Raw tablespace data#' @return Processed data framepreprocess_data <- function(raw_data) { tryCatch({ processed <- raw_data %>% mutate( CHECK_DATE = as.Date(CHECK_DATE), FREE_SPACE = MAX_SIZE - USED_SIZE ) %>% group_by(DB_NAME, TABLESPACE_NAME) %>% arrange(CHECK_DATE) %>% mutate( GROWTH_RATE = (USED_SIZE - lag(USED_SIZE)) / as.numeric(CHECK_DATE - lag(CHECK_DATE)), DAYS_TO_FULL = ifelse(GROWTH_RATE > 0, FREE_SPACE / GROWTH_RATE, Inf) ) %>% ungroup() return(processed) }, error = function(e) { stop(paste("Data preprocessing failed:", e$message)) })}
# -----------------------------------------------------------------------------------#' Generate growth forecast for a tablespace#' @param data Tablespace data#' @return Forecast resultsforecast_tablespace <- function(data) { # Return empty tibble if insufficient data if (nrow(data) < 3) { return(tibble( DB_NAME = character(), TABLESPACE_NAME = character(), LAST_DATE = as.Date(character()), LAST_SIZE = numeric(), FORECAST_DATE = as.Date(character()), FORECAST_SIZE = numeric(), UPPER_95 = numeric(), LOWER_95 = numeric() )) } tryCatch({ ts_data <- ts(data$USED_SIZE, frequency = 7) # Weekly seasonality fit <- tryCatch({ auto.arima(ts_data) }, error = function(e) { message(paste("ARIMA failed for", unique(data$TABLESPACE_NAME), "- using simple linear model")) lm(USED_SIZE ~ CHECK_DATE, data = data) }) if (inherits(fit, "lm")) { future_dates <- data.frame( CHECK_DATE = seq(max(data$CHECK_DATE), by = "day", length.out = forecast_days + 1)[-1] ) pred <- predict(fit, newdata = future_dates, interval = "confidence") forecast_values <- list( mean = pred[, "fit"], upper = cbind(NA, pred[, "upr"]), lower = cbind(NA, pred[, "lwr"]) ) } else { forecast_values <- forecast(fit, h = forecast_days) } tibble( DB_NAME = unique(data$DB_NAME), TABLESPACE_NAME = unique(data$TABLESPACE_NAME), LAST_DATE = max(data$CHECK_DATE), LAST_SIZE = last(data$USED_SIZE), FORECAST_DATE = max(data$CHECK_DATE) + days(1:forecast_days), FORECAST_SIZE = as.numeric(forecast_values$mean), UPPER_95 = as.numeric(ifelse(is.null(forecast_values$upper[,2]), NA, forecast_values$upper[,2])), LOWER_95 = as.numeric(ifelse(is.null(forecast_values$lower[,2]), NA, forecast_values$lower[,2])) ) }, error = function(e) { warning(paste("Forecasting failed for", unique(data$TABLESPACE_NAME), "with error:", e$message)) return(NULL) })}
# -----------------------------------------------------------------------------------#' Generate visualizations for critical tablespaces#' @param forecast_data Forecast results#' @param critical_data Critical tablespaces datagenerate_visualizations <- function(forecast_data, critical_data) { if (nrow(critical_data) == 0) return(NULL) # Ensure we have all required columns if (!all(c("DB_NAME", "TABLESPACE_NAME", "FORECAST_DATE", "FORECAST_SIZE", "LOWER_95", "UPPER_95", "MAX_SIZE") %in% names(critical_data))) { stop("Missing required columns in critical_data") } # Main forecast plot p1 <- ggplot(critical_data, aes(x = FORECAST_DATE, y = FORECAST_SIZE)) + geom_line(aes(color = TABLESPACE_NAME)) + geom_ribbon(aes(ymin = LOWER_95, ymax = UPPER_95, fill = TABLESPACE_NAME), alpha = 0.2) + geom_hline(aes(yintercept = MAX_SIZE), linetype = "dashed", color = "red") + facet_wrap(~DB_NAME, scales = "free_y") + labs(title = "Tablespace Growth Forecast vs Capacity", subtitle = "Red line indicates maximum capacity", x = "Date", y = "Size (MB)") + theme_minimal() + theme(legend.position = "bottom") # Prepare growth rate data properly growth_data <- critical_data %>% group_by(DB_NAME, TABLESPACE_NAME) %>% summarize( MAX_SIZE = first(MAX_SIZE), DAYS_TO_FULL = min(DAYS_TO_FULL, na.rm = TRUE), .groups = "drop" ) p2 <- ggplot(growth_data, aes(x = reorder(TABLESPACE_NAME, DAYS_TO_FULL), y = DAYS_TO_FULL)) + geom_bar(stat = "identity", aes(fill = DB_NAME)) + coord_flip() + labs(title = "Estimated Days Until Tablespace Full", x = "Tablespace", y = "Days") + theme_minimal() list(forecast_plot = p1, days_to_full_plot = p2)}
# -----------------------------------------------------------------------------------#' Generate the analysis report#' @param results Analysis resultsgenerate_report <- function(results) { # Create PDF file pdf_file <- create_output_filename(output_dir, "Tablespace_Growth_Report", ".pdf") pdf(pdf_file, width = 11, height = 8.5) on.exit(dev.off()) # Add title page grid::grid.newpage() grid::grid.text(report_title, gp = grid::gpar(fontsize = 20)) grid::grid.text(paste("Generated on:", Sys.time()), y = 0.9) # Add visualizations if available if (!is.null(results$plots)) { print(results$plots$forecast_plot) print(results$plots$days_to_full_plot) } # Add summary table if (!is.null(results$capacity_warnings)) { grid::grid.newpage() gridExtra::grid.table(results$capacity_warnings %>% select(DB_NAME, TABLESPACE_NAME, CURRENT_PERCENT, PROJECTED_FULL_DATE) %>% arrange(PROJECTED_FULL_DATE)) } message(paste("\nReport generated:", pdf_file)) return(pdf_file)}
# -----------------------------------------------------------------------------------# Main Analysis Function# -----------------------------------------------------------------------------------analyze_tablespace_growth <- function() { tryCatch({ # Establish connection conn <- establish_connection() on.exit(dbDisconnect(conn)) # Fetch and process data raw_data <- fetch_tablespace_data(conn) tablespace_data <- preprocess_data(raw_data) # Generate forecasts forecast_results <- tablespace_data %>% group_by(DB_NAME, TABLESPACE_NAME) %>% group_modify(~ forecast_tablespace(.x)) %>% ungroup() if (nrow(forecast_results) == 0) { stop("Insufficient data to generate forecasts") } # Join with max size data forecast_results <- forecast_results %>% left_join( tablespace_data %>% group_by(DB_NAME, TABLESPACE_NAME) %>% summarize(MAX_SIZE = first(MAX_SIZE), DAYS_TO_FULL = first(DAYS_TO_FULL), .groups = "drop"), by = c("DB_NAME", "TABLESPACE_NAME") ) # Identify critical tablespaces critical_tablespaces <- forecast_results %>% group_by(DB_NAME, TABLESPACE_NAME) %>% filter(any(FORECAST_SIZE > MAX_SIZE, na.rm = TRUE)) %>% ungroup() # Calculate capacity warnings capacity_warnings <- forecast_results %>% mutate(PERCENT_FULL = FORECAST_SIZE / MAX_SIZE * 100) %>% filter(PERCENT_FULL > warning_threshold * 100) %>% group_by(DB_NAME, TABLESPACE_NAME) %>% summarize( CURRENT_SIZE_MB = last(LAST_SIZE), MAX_SIZE_MB = first(MAX_SIZE), CURRENT_PERCENT = round(last(LAST_SIZE) / first(MAX_SIZE) * 100, 1), FIRST_WARNING_DATE = min(FORECAST_DATE), PROJECTED_FULL_DATE = FORECAST_DATE[which.min(abs(PERCENT_FULL - 100))], .groups = "drop" ) # Generate visualizations plots <- generate_visualizations(forecast_results, critical_tablespaces) # Generate report report_path <- generate_report(list( forecast_data = forecast_results, critical_tablespaces = if (nrow(critical_tablespaces) > 0) critical_tablespaces else NULL, capacity_warnings = if (nrow(capacity_warnings) > 0) capacity_warnings else NULL, plots = plots )) return(list( status = "Success", report_path = report_path, critical_count = nrow(capacity_warnings), warning_tablespaces = if (nrow(capacity_warnings) > 0) capacity_warnings else "None" )) }, error = function(e) { return(list( status = "Error", message = e$message )) })}
# -----------------------------------------------------------------------------------# Execute Analysis# -----------------------------------------------------------------------------------if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE)}
analysis_results <- analyze_tablespace_growth()
# -----------------------------------------------------------------------------------# Print summary to console# -----------------------------------------------------------------------------------if (analysis_results$status == "Success") { message(paste("\nAnalysis completed successfully")) message(paste("Critical tablespaces found:", analysis_results$critical_count)) message(paste("Report saved to:", analysis_results$report_path))} else { message(paste("\nAnalysis failed:", analysis_results$message))}
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------message("Tablespace growth analysis script execution completed successfully.")# -----------------------------------------------------------------------------------