1-DB_Performance.r
# -----------------------------------------------------------------------------------# File Name : 1-DB_Performance.r# Author : Pierre Montbleau# Description : Oracle Database Performance Analysis Script# Purpose : Analyze database performance metrics (CPU, memory, wait events) # and generate PDF reports with predictive modeling# Call Syntax : source("F:\\DBA\\Scripts\\R\\1-DB_Performance.r")# Parameters : None - all parameters are configured within the script# Last Modified: 2024-01-15# -----------------------------------------------------------------------------------
# Oracle Database Performance Analysis with R# Robust version with comprehensive error handling and PDF report
# -----------------------------------------------------------------------------------# Load required packages# -----------------------------------------------------------------------------------library(DBI)library(odbc)library(dplyr)library(lubridate)library(ggplot2)library(caret)library(randomForest)library(tidyr)library(gridExtra)library(knitr)library(grid)
# -----------------------------------------------------------------------------------# Configuration Section# -----------------------------------------------------------------------------------
# Get the directory where the script is locatedscript_dir <- "F:\\DBA\\Scripts\\R\\Reports"if (is.null(script_dir)) { script_dir <- getwd() # Fallback to working directory if not running from source}
# Generate unique filename for report in the script directoryreport_filename <- file.path(script_dir, paste0("DB_Performance_Report_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".pdf"))
# -----------------------------------------------------------------------------------# Database Connection Function# -----------------------------------------------------------------------------------
safe_db_connect <- function() { tryCatch({ conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]") message("Successfully connected to database") return(conn) }, error = function(e) { message("Connection failed: ", e$message) return(NULL) })}
# -----------------------------------------------------------------------------------# Query Execution with Robust Error Handling# -----------------------------------------------------------------------------------
execute_safe_query <- function(conn, query, default = data.frame()) { tryCatch({ result <- dbGetQuery(conn, query) if(nrow(result) == 0) { warning("Query returned empty results: ", substr(query, 1, 50), "...") return(default) } return(result) }, error = function(e) { warning("Query failed: ", e$message, "\nAttempting to continue with default data") return(default) })}
# -----------------------------------------------------------------------------------# PDF Report Initialization# -----------------------------------------------------------------------------------
init_report <- function(filename) { pdf(file = filename, width = 11, height = 8.5) # Add title page grid.newpage() grid.text("Database Performance Analysis Report", gp = gpar(fontsize = 24, fontface = "bold"), y = 0.7) grid.text(paste("Generated:", Sys.time()), gp = gpar(fontsize = 16), y = 0.6) grid.text("Confidential - Internal Use Only", gp = gpar(fontsize = 12, col = "gray"), y = 0.2) return(TRUE)}
# -----------------------------------------------------------------------------------# Main Analysis Function# -----------------------------------------------------------------------------------
analyze_database_performance <- function() { # Initialize report if(!init_report(report_filename)) { stop("Failed to initialize PDF report") } # Establish connection conn <- safe_db_connect() if(is.null(conn)) { grid.newpage() grid.text("Connection Failed", gp = gpar(fontsize = 18, col = "red"), y = 0.7) grid.text("Cannot proceed without database connection", gp = gpar(fontsize = 14), y = 0.6) dev.off() stop("Cannot proceed without database connection") } on.exit({ message("Closing database connection") dbDisconnect(conn) dev.off() message("Report generated: ", report_filename) }) # Query data with error handling message("\nQuerying database tables...") # CPU data - critical, fail if unavailable cpu_data <- execute_safe_query( conn, "SELECT DB_NAME, CPU_COUNT, TOTAL_CPU_USAGE, CHECK_DATE FROM ATTSTATS.CPU_USAGE" ) if(nrow(cpu_data) == 0) { grid.newpage() grid.text("Analysis Failed", gp = gpar(fontsize = 18, col = "red"), y = 0.7) grid.text("No CPU data available - analysis cannot continue", gp = gpar(fontsize = 14), y = 0.6) stop("No CPU data available - analysis cannot continue") } # Other data sources - can proceed without these memory_data <- execute_safe_query( conn, "SELECT DB_NAME, NAME, VALUE, CHECK_DATE FROM ATTSTATS.MEMORY_USAGE WHERE NAME IN ('total PGA allocated', 'maximum PGA allocated')", default = data.frame(DB_NAME=character(), NAME=character(), VALUE=numeric(), CHECK_DATE=character()) ) wait_events <- execute_safe_query( conn, "SELECT DB_NAME, EVENT, WAIT_TIME, CHECK_DATE FROM ATTSTATS.CURRENT_SESSIONS_WAIT_EVENTS", default = data.frame(DB_NAME=character(), EVENT=character(), WAIT_TIME=numeric(), CHECK_DATE=character()) ) db_load <- execute_safe_query( conn, "SELECT DB_NAME, CPU_USAGE, ACTIVE_SESSIONS, WAIT_EVENT, EVENT_COUNT, LOG_TIME, ALERT_MESSAGE FROM ATTSTATS.DB_LOAD_MONITOR", default = data.frame(DB_NAME=character(), CPU_USAGE=numeric(), ACTIVE_SESSIONS=numeric(), WAIT_EVENT=character(), EVENT_COUNT=numeric(), LOG_TIME=character(), ALERT_MESSAGE=character()) ) # ----------------------------------------------------------------------------------- # Data Quality Assessment # ----------------------------------------------------------------------------------- grid.newpage() grid.text("Data Quality Assessment", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.9) # Check CPU data ranges quality_text <- c() if(any(cpu_data$TOTAL_CPU_USAGE > 500, na.rm = TRUE)) { quality_text <- c(quality_text, "WARNING: Unusually high CPU values detected (>500%). Verify data collection method.") } # Basic data summaries cpu_summary <- capture.output(summary(cpu_data$TOTAL_CPU_USAGE)) quality_text <- c(quality_text, "CPU Data Summary:", cpu_summary) if(length(quality_text) > 0) { grid.text(paste(quality_text, collapse = "\n"), x = 0.1, y = 0.7, just = "left", gp = gpar(fontsize = 10, fontfamily = "mono")) } # ----------------------------------------------------------------------------------- # Data Preparation # ----------------------------------------------------------------------------------- message("\nPreparing data for analysis...") # Process CPU data with date handling cpu_usage_agg <- cpu_data %>% mutate( CHECK_DATE = tryCatch(as.Date(CHECK_DATE), error = function(e) { warning("Date conversion error, using minimum date") as.Date("1970-01-01") }) ) %>% group_by(DB_NAME, CHECK_DATE) %>% summarise( AVG_CPU_USAGE = mean(TOTAL_CPU_USAGE, na.rm = TRUE), PEAK_CPU_USAGE = max(TOTAL_CPU_USAGE, na.rm = TRUE), CPU_VARIABILITY = sd(TOTAL_CPU_USAGE, na.rm = TRUE), .groups = "drop" ) %>% mutate( PEAK_CPU_USAGE = ifelse(is.infinite(PEAK_CPU_USAGE), AVG_CPU_USAGE, PEAK_CPU_USAGE), CPU_VARIABILITY = ifelse(is.na(CPU_VARIABILITY), 0, CPU_VARIABILITY) ) # Process memory data if available if(nrow(memory_data) > 0) { memory_usage_agg <- memory_data %>% mutate( CHECK_DATE = tryCatch(as.Date(CHECK_DATE), error = function(e) as.Date("1970-01-01")), VALUE = as.numeric(VALUE) ) %>% pivot_wider( names_from = NAME, values_from = VALUE, values_fn = mean ) %>% rename( Total_PGA = `total PGA allocated`, Maximum_PGA = `maximum PGA allocated` ) %>% mutate( PGA_Utilization = ifelse(Maximum_PGA > 0, Total_PGA/Maximum_PGA, NA) ) performance_data <- cpu_usage_agg %>% left_join(memory_usage_agg, by = c("DB_NAME", "CHECK_DATE")) } else { performance_data <- cpu_usage_agg %>% mutate( Total_PGA = NA_real_, Maximum_PGA = NA_real_, PGA_Utilization = NA_real_ ) } # ----------------------------------------------------------------------------------- # Threshold Determination # ----------------------------------------------------------------------------------- message("\nCalculating performance thresholds...") # Robust threshold calculation cpu_threshold <- tryCatch({ quantiles <- quantile(performance_data$AVG_CPU_USAGE, probs = c(0.75, 0.90, 0.95), na.rm = TRUE) if(diff(range(quantiles)) < 5) { # If quantiles are too close, use statistical approach threshold <- median(performance_data$AVG_CPU_USAGE) + mad(performance_data$AVG_CPU_USAGE, constant = 2) message("Using median + 2MAD threshold: ", round(threshold, 1)) threshold } else { message("Using 95th percentile threshold: ", round(quantiles[3], 1)) quantiles[3] } }, error = function(e) { warning("Threshold calculation failed, using fallback value") 80 # Default threshold }) # Create performance indicator performance_data <- performance_data %>% mutate( PERFORMANCE_ISSUE = as.integer(AVG_CPU_USAGE > cpu_threshold) ) # ----------------------------------------------------------------------------------- # Analysis and Visualization # ----------------------------------------------------------------------------------- message("\nPerforming analysis...") # Basic visualization p <- ggplot(performance_data, aes(x = CHECK_DATE, y = AVG_CPU_USAGE)) + geom_line(aes(group = DB_NAME, color = DB_NAME)) + geom_point(aes(color = as.factor(PERFORMANCE_ISSUE)), size = 2) + geom_hline(yintercept = cpu_threshold, linetype = "dashed", color = "red") + labs(title = "Database CPU Usage Over Time", subtitle = paste("Threshold:", round(cpu_threshold, 1), "%"), x = "Date", y = "CPU Usage (%)", color = "Performance Issue") + scale_color_manual(values = c("0" = "darkgreen", "1" = "red"), labels = c("Normal", "Issue")) + theme_minimal() + theme(plot.margin = unit(c(1,1,1,1), "cm")) grid.newpage() grid.text("CPU Usage Analysis", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.95) print(p, vp = viewport(y = 0.5, height = 0.8)) # Summary statistics page grid.newpage() grid.text("Performance Data Summary", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.9) summary_text <- c( paste("Analysis performed on:", Sys.time()), paste("Number of databases analyzed:", length(unique(performance_data$DB_NAME))), paste("Time period covered:", min(performance_data$CHECK_DATE), "to", max(performance_data$CHECK_DATE)), paste("CPU threshold for performance issues:", round(cpu_threshold, 1), "%"), paste("Number of performance issues detected:", sum(performance_data$PERFORMANCE_ISSUE)), "\nKey Statistics:", capture.output(summary(performance_data %>% select(AVG_CPU_USAGE, PEAK_CPU_USAGE, CPU_VARIABILITY))) ) if(nrow(memory_data) > 0) { summary_text <- c(summary_text, "\nMemory Statistics:", capture.output(summary(performance_data %>% select(Total_PGA, Maximum_PGA, PGA_Utilization)))) } grid.text(paste(summary_text, collapse = "\n"), x = 0.1, y = 0.7, just = "left", gp = gpar(fontsize = 10, fontfamily = "mono")) # Only attempt modeling if we have sufficient cases if(sum(performance_data$PERFORMANCE_ISSUE, na.rm = TRUE) >= 5 && sum(!performance_data$PERFORMANCE_ISSUE, na.rm = TRUE) >= 5) { message("\nBuilding predictive model...") # Prepare modeling data model_data <- performance_data %>% select(-DB_NAME, -CHECK_DATE) %>% mutate(PERFORMANCE_ISSUE = as.factor(PERFORMANCE_ISSUE)) %>% na.omit() # Train-test split set.seed(123) train_index <- createDataPartition(model_data$PERFORMANCE_ISSUE, p = 0.8, list = FALSE) train_data <- model_data[train_index, ] test_data <- model_data[-train_index, ] # Train model model <- tryCatch({ randomForest( PERFORMANCE_ISSUE ~ ., data = train_data, ntree = 200, importance = TRUE, strata = train_data$PERFORMANCE_ISSUE ) }, error = function(e) { warning("Model training failed: ", e$message) return(NULL) }) # Evaluate if model was trained if(!is.null(model)) { predictions <- predict(model, test_data) cm <- confusionMatrix(predictions, test_data$PERFORMANCE_ISSUE) # Model evaluation page grid.newpage() grid.text("Predictive Model Results", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.9) model_text <- c( "Random Forest Model for Performance Issues", paste("Number of trees:", model$ntree), "\nConfusion Matrix:", capture.output(print(cm)), "\nOverall Accuracy:", paste(round(cm$overall['Accuracy'], 3))) grid.text(paste(model_text, collapse = "\n"), x = 0.1, y = 0.7, just = "left", gp = gpar(fontsize = 10, fontfamily = "mono")) # Feature importance plot imp <- importance(model) imp_df <- data.frame( Feature = rownames(imp), Importance = imp[, "MeanDecreaseGini"] ) imp_plot <- ggplot(imp_df, aes(x = reorder(Feature, Importance), y = Importance)) + geom_bar(stat = "identity", fill = "steelblue") + coord_flip() + labs(title = "Feature Importance", x = "Feature", y = "Mean Decrease in Gini") + theme_minimal() grid.newpage() print(imp_plot, vp = viewport(y = 0.5, height = 0.8)) } } else { grid.newpage() grid.text("Predictive Modeling Not Performed", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.7) grid.text(paste("Insufficient cases for modeling (", sum(performance_data$PERFORMANCE_ISSUE), " issues found)"), gp = gpar(fontsize = 14), y = 0.6) } # Add conclusion page grid.newpage() grid.text("Analysis Complete", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.7) grid.text(paste("Report generated:", Sys.time()), gp = gpar(fontsize = 14), y = 0.6) grid.text(paste("File saved as:", report_filename), gp = gpar(fontsize = 12), y = 0.5) message("\nAnalysis complete")}
# -----------------------------------------------------------------------------------# Execute the analysis with top-level error handling# -----------------------------------------------------------------------------------tryCatch({ analyze_database_performance() message("Report saved to: ", normalizePath(report_filename))}, error = function(e) { message("Fatal error in analysis: ", e$message) if(exists("report_filename") && file.exists(report_filename)) { file.remove(report_filename) }})
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------message("Script execution completed successfully.")# -----------------------------------------------------------------------------------
# Oracle Database Performance Analysis with R# Robust version with comprehensive error handling and PDF report
# -----------------------------------------------------------------------------------# Load required packages# -----------------------------------------------------------------------------------library(DBI)library(odbc)library(dplyr)library(lubridate)library(ggplot2)library(caret)library(randomForest)library(tidyr)library(gridExtra)library(knitr)library(grid)
# -----------------------------------------------------------------------------------# Configuration Section# -----------------------------------------------------------------------------------
# Get the directory where the script is locatedscript_dir <- "F:\\DBA\\Scripts\\R\\Reports"if (is.null(script_dir)) { script_dir <- getwd() # Fallback to working directory if not running from source}
# Generate unique filename for report in the script directoryreport_filename <- file.path(script_dir, paste0("DB_Performance_Report_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".pdf"))
# -----------------------------------------------------------------------------------# Database Connection Function# -----------------------------------------------------------------------------------
safe_db_connect <- function() { tryCatch({ conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]") message("Successfully connected to database") return(conn) }, error = function(e) { message("Connection failed: ", e$message) return(NULL) })}
# -----------------------------------------------------------------------------------# Query Execution with Robust Error Handling# -----------------------------------------------------------------------------------
execute_safe_query <- function(conn, query, default = data.frame()) { tryCatch({ result <- dbGetQuery(conn, query) if(nrow(result) == 0) { warning("Query returned empty results: ", substr(query, 1, 50), "...") return(default) } return(result) }, error = function(e) { warning("Query failed: ", e$message, "\nAttempting to continue with default data") return(default) })}
# -----------------------------------------------------------------------------------# PDF Report Initialization# -----------------------------------------------------------------------------------
init_report <- function(filename) { pdf(file = filename, width = 11, height = 8.5) # Add title page grid.newpage() grid.text("Database Performance Analysis Report", gp = gpar(fontsize = 24, fontface = "bold"), y = 0.7) grid.text(paste("Generated:", Sys.time()), gp = gpar(fontsize = 16), y = 0.6) grid.text("Confidential - Internal Use Only", gp = gpar(fontsize = 12, col = "gray"), y = 0.2) return(TRUE)}
# -----------------------------------------------------------------------------------# Main Analysis Function# -----------------------------------------------------------------------------------
analyze_database_performance <- function() { # Initialize report if(!init_report(report_filename)) { stop("Failed to initialize PDF report") } # Establish connection conn <- safe_db_connect() if(is.null(conn)) { grid.newpage() grid.text("Connection Failed", gp = gpar(fontsize = 18, col = "red"), y = 0.7) grid.text("Cannot proceed without database connection", gp = gpar(fontsize = 14), y = 0.6) dev.off() stop("Cannot proceed without database connection") } on.exit({ message("Closing database connection") dbDisconnect(conn) dev.off() message("Report generated: ", report_filename) }) # Query data with error handling message("\nQuerying database tables...") # CPU data - critical, fail if unavailable cpu_data <- execute_safe_query( conn, "SELECT DB_NAME, CPU_COUNT, TOTAL_CPU_USAGE, CHECK_DATE FROM ATTSTATS.CPU_USAGE" ) if(nrow(cpu_data) == 0) { grid.newpage() grid.text("Analysis Failed", gp = gpar(fontsize = 18, col = "red"), y = 0.7) grid.text("No CPU data available - analysis cannot continue", gp = gpar(fontsize = 14), y = 0.6) stop("No CPU data available - analysis cannot continue") } # Other data sources - can proceed without these memory_data <- execute_safe_query( conn, "SELECT DB_NAME, NAME, VALUE, CHECK_DATE FROM ATTSTATS.MEMORY_USAGE WHERE NAME IN ('total PGA allocated', 'maximum PGA allocated')", default = data.frame(DB_NAME=character(), NAME=character(), VALUE=numeric(), CHECK_DATE=character()) ) wait_events <- execute_safe_query( conn, "SELECT DB_NAME, EVENT, WAIT_TIME, CHECK_DATE FROM ATTSTATS.CURRENT_SESSIONS_WAIT_EVENTS", default = data.frame(DB_NAME=character(), EVENT=character(), WAIT_TIME=numeric(), CHECK_DATE=character()) ) db_load <- execute_safe_query( conn, "SELECT DB_NAME, CPU_USAGE, ACTIVE_SESSIONS, WAIT_EVENT, EVENT_COUNT, LOG_TIME, ALERT_MESSAGE FROM ATTSTATS.DB_LOAD_MONITOR", default = data.frame(DB_NAME=character(), CPU_USAGE=numeric(), ACTIVE_SESSIONS=numeric(), WAIT_EVENT=character(), EVENT_COUNT=numeric(), LOG_TIME=character(), ALERT_MESSAGE=character()) ) # ----------------------------------------------------------------------------------- # Data Quality Assessment # ----------------------------------------------------------------------------------- grid.newpage() grid.text("Data Quality Assessment", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.9) # Check CPU data ranges quality_text <- c() if(any(cpu_data$TOTAL_CPU_USAGE > 500, na.rm = TRUE)) { quality_text <- c(quality_text, "WARNING: Unusually high CPU values detected (>500%). Verify data collection method.") } # Basic data summaries cpu_summary <- capture.output(summary(cpu_data$TOTAL_CPU_USAGE)) quality_text <- c(quality_text, "CPU Data Summary:", cpu_summary) if(length(quality_text) > 0) { grid.text(paste(quality_text, collapse = "\n"), x = 0.1, y = 0.7, just = "left", gp = gpar(fontsize = 10, fontfamily = "mono")) } # ----------------------------------------------------------------------------------- # Data Preparation # ----------------------------------------------------------------------------------- message("\nPreparing data for analysis...") # Process CPU data with date handling cpu_usage_agg <- cpu_data %>% mutate( CHECK_DATE = tryCatch(as.Date(CHECK_DATE), error = function(e) { warning("Date conversion error, using minimum date") as.Date("1970-01-01") }) ) %>% group_by(DB_NAME, CHECK_DATE) %>% summarise( AVG_CPU_USAGE = mean(TOTAL_CPU_USAGE, na.rm = TRUE), PEAK_CPU_USAGE = max(TOTAL_CPU_USAGE, na.rm = TRUE), CPU_VARIABILITY = sd(TOTAL_CPU_USAGE, na.rm = TRUE), .groups = "drop" ) %>% mutate( PEAK_CPU_USAGE = ifelse(is.infinite(PEAK_CPU_USAGE), AVG_CPU_USAGE, PEAK_CPU_USAGE), CPU_VARIABILITY = ifelse(is.na(CPU_VARIABILITY), 0, CPU_VARIABILITY) ) # Process memory data if available if(nrow(memory_data) > 0) { memory_usage_agg <- memory_data %>% mutate( CHECK_DATE = tryCatch(as.Date(CHECK_DATE), error = function(e) as.Date("1970-01-01")), VALUE = as.numeric(VALUE) ) %>% pivot_wider( names_from = NAME, values_from = VALUE, values_fn = mean ) %>% rename( Total_PGA = `total PGA allocated`, Maximum_PGA = `maximum PGA allocated` ) %>% mutate( PGA_Utilization = ifelse(Maximum_PGA > 0, Total_PGA/Maximum_PGA, NA) ) performance_data <- cpu_usage_agg %>% left_join(memory_usage_agg, by = c("DB_NAME", "CHECK_DATE")) } else { performance_data <- cpu_usage_agg %>% mutate( Total_PGA = NA_real_, Maximum_PGA = NA_real_, PGA_Utilization = NA_real_ ) } # ----------------------------------------------------------------------------------- # Threshold Determination # ----------------------------------------------------------------------------------- message("\nCalculating performance thresholds...") # Robust threshold calculation cpu_threshold <- tryCatch({ quantiles <- quantile(performance_data$AVG_CPU_USAGE, probs = c(0.75, 0.90, 0.95), na.rm = TRUE) if(diff(range(quantiles)) < 5) { # If quantiles are too close, use statistical approach threshold <- median(performance_data$AVG_CPU_USAGE) + mad(performance_data$AVG_CPU_USAGE, constant = 2) message("Using median + 2MAD threshold: ", round(threshold, 1)) threshold } else { message("Using 95th percentile threshold: ", round(quantiles[3], 1)) quantiles[3] } }, error = function(e) { warning("Threshold calculation failed, using fallback value") 80 # Default threshold }) # Create performance indicator performance_data <- performance_data %>% mutate( PERFORMANCE_ISSUE = as.integer(AVG_CPU_USAGE > cpu_threshold) ) # ----------------------------------------------------------------------------------- # Analysis and Visualization # ----------------------------------------------------------------------------------- message("\nPerforming analysis...") # Basic visualization p <- ggplot(performance_data, aes(x = CHECK_DATE, y = AVG_CPU_USAGE)) + geom_line(aes(group = DB_NAME, color = DB_NAME)) + geom_point(aes(color = as.factor(PERFORMANCE_ISSUE)), size = 2) + geom_hline(yintercept = cpu_threshold, linetype = "dashed", color = "red") + labs(title = "Database CPU Usage Over Time", subtitle = paste("Threshold:", round(cpu_threshold, 1), "%"), x = "Date", y = "CPU Usage (%)", color = "Performance Issue") + scale_color_manual(values = c("0" = "darkgreen", "1" = "red"), labels = c("Normal", "Issue")) + theme_minimal() + theme(plot.margin = unit(c(1,1,1,1), "cm")) grid.newpage() grid.text("CPU Usage Analysis", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.95) print(p, vp = viewport(y = 0.5, height = 0.8)) # Summary statistics page grid.newpage() grid.text("Performance Data Summary", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.9) summary_text <- c( paste("Analysis performed on:", Sys.time()), paste("Number of databases analyzed:", length(unique(performance_data$DB_NAME))), paste("Time period covered:", min(performance_data$CHECK_DATE), "to", max(performance_data$CHECK_DATE)), paste("CPU threshold for performance issues:", round(cpu_threshold, 1), "%"), paste("Number of performance issues detected:", sum(performance_data$PERFORMANCE_ISSUE)), "\nKey Statistics:", capture.output(summary(performance_data %>% select(AVG_CPU_USAGE, PEAK_CPU_USAGE, CPU_VARIABILITY))) ) if(nrow(memory_data) > 0) { summary_text <- c(summary_text, "\nMemory Statistics:", capture.output(summary(performance_data %>% select(Total_PGA, Maximum_PGA, PGA_Utilization)))) } grid.text(paste(summary_text, collapse = "\n"), x = 0.1, y = 0.7, just = "left", gp = gpar(fontsize = 10, fontfamily = "mono")) # Only attempt modeling if we have sufficient cases if(sum(performance_data$PERFORMANCE_ISSUE, na.rm = TRUE) >= 5 && sum(!performance_data$PERFORMANCE_ISSUE, na.rm = TRUE) >= 5) { message("\nBuilding predictive model...") # Prepare modeling data model_data <- performance_data %>% select(-DB_NAME, -CHECK_DATE) %>% mutate(PERFORMANCE_ISSUE = as.factor(PERFORMANCE_ISSUE)) %>% na.omit() # Train-test split set.seed(123) train_index <- createDataPartition(model_data$PERFORMANCE_ISSUE, p = 0.8, list = FALSE) train_data <- model_data[train_index, ] test_data <- model_data[-train_index, ] # Train model model <- tryCatch({ randomForest( PERFORMANCE_ISSUE ~ ., data = train_data, ntree = 200, importance = TRUE, strata = train_data$PERFORMANCE_ISSUE ) }, error = function(e) { warning("Model training failed: ", e$message) return(NULL) }) # Evaluate if model was trained if(!is.null(model)) { predictions <- predict(model, test_data) cm <- confusionMatrix(predictions, test_data$PERFORMANCE_ISSUE) # Model evaluation page grid.newpage() grid.text("Predictive Model Results", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.9) model_text <- c( "Random Forest Model for Performance Issues", paste("Number of trees:", model$ntree), "\nConfusion Matrix:", capture.output(print(cm)), "\nOverall Accuracy:", paste(round(cm$overall['Accuracy'], 3))) grid.text(paste(model_text, collapse = "\n"), x = 0.1, y = 0.7, just = "left", gp = gpar(fontsize = 10, fontfamily = "mono")) # Feature importance plot imp <- importance(model) imp_df <- data.frame( Feature = rownames(imp), Importance = imp[, "MeanDecreaseGini"] ) imp_plot <- ggplot(imp_df, aes(x = reorder(Feature, Importance), y = Importance)) + geom_bar(stat = "identity", fill = "steelblue") + coord_flip() + labs(title = "Feature Importance", x = "Feature", y = "Mean Decrease in Gini") + theme_minimal() grid.newpage() print(imp_plot, vp = viewport(y = 0.5, height = 0.8)) } } else { grid.newpage() grid.text("Predictive Modeling Not Performed", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.7) grid.text(paste("Insufficient cases for modeling (", sum(performance_data$PERFORMANCE_ISSUE), " issues found)"), gp = gpar(fontsize = 14), y = 0.6) } # Add conclusion page grid.newpage() grid.text("Analysis Complete", gp = gpar(fontsize = 18, fontface = "bold"), y = 0.7) grid.text(paste("Report generated:", Sys.time()), gp = gpar(fontsize = 14), y = 0.6) grid.text(paste("File saved as:", report_filename), gp = gpar(fontsize = 12), y = 0.5) message("\nAnalysis complete")}
# -----------------------------------------------------------------------------------# Execute the analysis with top-level error handling# -----------------------------------------------------------------------------------tryCatch({ analyze_database_performance() message("Report saved to: ", normalizePath(report_filename))}, error = function(e) { message("Fatal error in analysis: ", e$message) if(exists("report_filename") && file.exists(report_filename)) { file.remove(report_filename) }})
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------message("Script execution completed successfully.")# -----------------------------------------------------------------------------------