11-Process_Monitoring_Analysis.r
# -----------------------------------------------------------------------------------# File Name : 11-Process_Monitoring_Analysis.r# Author : Pierre Montbleau# Description : Comprehensive process monitoring analysis with anomaly detection# Purpose : Monitor database processes, detect anomalies, and generate reports# Call Syntax : source("F:\\DBA\\Scripts\\R\\11-Process_Monitoring_Analysis.r")# Parameters : days_back - Number of days to analyze (default: 30)# Last Modified: 2024-12-19# -----------------------------------------------------------------------------------
# Load required librarieslibrary(odbc)library(DBI)library(ggplot2)library(dplyr)library(anomalize)library(lubridate)library(plotly)library(kableExtra)library(corrplot)library(tibble)
# -----------------------------------------------------------------------------------# Database Connection Function# -----------------------------------------------------------------------------------
safe_db_connect <- function() { tryCatch({ conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]") message("Successfully connected to database") return(conn) }, error = function(e) { message("Connection failed: ", e$message) return(NULL) })}
# -----------------------------------------------------------------------------------# Data Retrieval Functions# -----------------------------------------------------------------------------------
# Retrieve data from Oracleget_process_monitoring_data <- function(conn, days_back = 30) { query <- paste(" SELECT MONITOR_ID, CHECK_TIMESTAMP, CURRENT_PROCESSES, MAX_PROCESSES, PROCESS_USAGE_PCT, ALERT_LEVEL, ACTION_TAKEN, SESSION_COUNT, ACTIVE_SESSIONS FROM ATTSTATS.PROCESS_MONITORING WHERE CHECK_TIMESTAMP >= SYSDATE - ", days_back, " ORDER BY CHECK_TIMESTAMP ") data <- dbGetQuery(conn, query) message(paste("Retrieved", nrow(data), "records from database")) return(data)}
# -----------------------------------------------------------------------------------# Data Preprocessing Functions# -----------------------------------------------------------------------------------
# Preprocess and analyze datapreprocess_data <- function(data) { # Convert timestamp data$CHECK_TIMESTAMP <- as.POSIXct(data$CHECK_TIMESTAMP) # Calculate additional metrics data <- data %>% mutate( date = as.Date(CHECK_TIMESTAMP), hour = hour(CHECK_TIMESTAMP), day_of_week = wday(CHECK_TIMESTAMP, label = TRUE), session_utilization = ifelse(SESSION_COUNT > 0, ACTIVE_SESSIONS / SESSION_COUNT * 100, 0), process_capacity_remaining = MAX_PROCESSES - CURRENT_PROCESSES, capacity_utilization = ifelse(MAX_PROCESSES > 0, CURRENT_PROCESSES / MAX_PROCESSES * 100, 0) ) # Summary of preprocessing message("Data preprocessing completed:") message(paste("- Time range:", min(data$CHECK_TIMESTAMP), "to", max(data$CHECK_TIMESTAMP))) message(paste("- Unique days:", length(unique(data$date)))) return(data)}
# -----------------------------------------------------------------------------------# Anomaly Detection Functions# -----------------------------------------------------------------------------------
# Simplified version that skips time series for small datasetsdetect_anomalies <- function(data) { message("Starting anomaly detection...") # Ensure data is sorted by timestamp data <- data %>% arrange(CHECK_TIMESTAMP) # Calculate data duration in days data_days <- as.numeric(difftime(max(data$CHECK_TIMESTAMP), min(data$CHECK_TIMESTAMP), units = "days")) # Skip time series decomposition for small datasets if (data_days < 3) { message("Small dataset detected: Using statistical and business rules only") ts_anomaly <- "No" } else { # Use time series decomposition for larger datasets ts_data <- tryCatch({ data %>% as_tibble() %>% time_decompose(PROCESS_USAGE_PCT, method = "stl", frequency = "1 day", trend = "1 week") %>% anomalize(remainder, method = "gesd", alpha = 0.05, max_anoms = 0.2) %>% time_recompose() }, error = function(e) { message("Time series decomposition failed: ", e$message) NULL }) if (!is.null(ts_data)) { ts_anomaly <- ts_data$anomaly } else { ts_anomaly <- "No" } } # Method 2: Statistical outliers for key metrics statistical_anomalies <- data %>% mutate( usage_zscore = scale(PROCESS_USAGE_PCT)[,1], process_zscore = scale(CURRENT_PROCESSES)[,1], session_zscore = scale(ACTIVE_SESSIONS)[,1], is_usage_anomaly = abs(usage_zscore) > 2.5 & !is.na(usage_zscore), is_process_anomaly = abs(process_zscore) > 2.5 & !is.na(process_zscore), is_session_anomaly = abs(session_zscore) > 2.5 & !is.na(session_zscore) ) # Method 3: Business rule anomalies business_anomalies <- data %>% mutate( is_critical_usage = PROCESS_USAGE_PCT > 90, is_process_limit_near = CURRENT_PROCESSES >= MAX_PROCESSES * 0.95, is_high_session_utilization = session_utilization > 80 & !is.na(session_utilization), is_capacity_exceeded = CURRENT_PROCESSES > MAX_PROCESSES, is_business_anomaly = is_critical_usage | is_process_limit_near | is_high_session_utilization | is_capacity_exceeded ) # Combine all anomaly detection methods final_data <- data %>% mutate(ts_anomaly = ts_anomaly) %>% left_join(statistical_anomalies %>% select(CHECK_TIMESTAMP, is_usage_anomaly, is_process_anomaly, is_session_anomaly), by = "CHECK_TIMESTAMP") %>% left_join(business_anomalies %>% select(CHECK_TIMESTAMP, is_business_anomaly, is_capacity_exceeded, is_critical_usage, is_process_limit_near, is_high_session_utilization), by = "CHECK_TIMESTAMP") %>% mutate( overall_anomaly_score = as.numeric(ts_anomaly == "Yes") + as.numeric(is_usage_anomaly) + as.numeric(is_process_anomaly) + as.numeric(is_session_anomaly) + as.numeric(is_business_anomaly), is_anomaly = overall_anomaly_score >= 2, anomaly_type = case_when( is_capacity_exceeded ~ "Capacity Exceeded", is_critical_usage & is_process_limit_near ~ "Critical Overload", is_critical_usage ~ "High Usage", is_process_limit_near ~ "Near Capacity", is_high_session_utilization ~ "High Session Usage", is_usage_anomaly | is_process_anomaly | is_session_anomaly ~ "Statistical Anomaly", ts_anomaly == "Yes" ~ "Time Series Anomaly", TRUE ~ "No Anomaly" ) ) anomaly_count <- sum(final_data$is_anomaly, na.rm = TRUE) message(paste("Anomalies detected:", anomaly_count, "(", round(anomaly_count/nrow(final_data)*100, 2), "%)")) return(final_data)}
# -----------------------------------------------------------------------------------# Visualization Functions# -----------------------------------------------------------------------------------
# Create comprehensive visualizationscreate_visualizations <- function(data_with_anomalies) { message("Creating visualizations...") plots <- list() # Filter out NA values for plotting plot_data <- data_with_anomalies %>% filter(!is.na(PROCESS_USAGE_PCT)) # 1. Process Usage Over Time with Anomalies plots$usage_timeseries <- ggplot(plot_data, aes(x = CHECK_TIMESTAMP, y = PROCESS_USAGE_PCT)) + geom_line(color = "steelblue", alpha = 0.7) + geom_point(data = filter(plot_data, is_anomaly), aes(color = anomaly_type), size = 2, alpha = 0.8) + geom_hline(yintercept = 90, linetype = "dashed", color = "orange", size = 0.8) + geom_hline(yintercept = 95, linetype = "dashed", color = "red", size = 0.8) + scale_color_brewer(palette = "Set1") + labs(title = "Process Usage Percentage Over Time", subtitle = "Colored points indicate different types of anomalies", x = "Timestamp", y = "Process Usage %", color = "Anomaly Type") + theme_minimal() + theme(legend.position = "bottom") # 2. Current Processes vs Max Capacity plots$process_capacity <- ggplot(plot_data, aes(x = CHECK_TIMESTAMP)) + geom_ribbon(aes(ymin = 0, ymax = MAX_PROCESSES), fill = "grey90", alpha = 0.5) + geom_line(aes(y = CURRENT_PROCESSES, color = "Current Processes"), alpha = 0.8) + geom_line(aes(y = MAX_PROCESSES, color = "Max Capacity"), linetype = "dashed", size = 1) + geom_point(data = filter(plot_data, is_anomaly), aes(y = CURRENT_PROCESSES, shape = "Anomaly"), color = "red", size = 2) + scale_color_manual(values = c("Current Processes" = "blue", "Max Capacity" = "darkred")) + scale_shape_manual(values = c("Anomaly" = 17)) + labs(title = "Current Processes vs Maximum Capacity", x = "Timestamp", y = "Number of Processes", color = "", shape = "") + theme_minimal() # 3. Alert Level Distribution plots$alert_distribution <- data_with_anomalies %>% count(ALERT_LEVEL) %>% ggplot(aes(x = reorder(ALERT_LEVEL, n), y = n, fill = ALERT_LEVEL)) + geom_bar(stat = "identity") + geom_text(aes(label = n), hjust = -0.2) + coord_flip() + labs(title = "Alert Level Distribution", x = "Alert Level", y = "Count") + theme_minimal() + theme(legend.position = "none") # 4. Hourly Pattern Analysis hourly_summary <- data_with_anomalies %>% group_by(hour) %>% summarise( avg_usage = mean(PROCESS_USAGE_PCT, na.rm = TRUE), anomaly_count = sum(is_anomaly, na.rm = TRUE), .groups = 'drop' ) plots$hourly_pattern <- ggplot(hourly_summary, aes(x = hour)) + geom_col(aes(y = avg_usage, fill = "Average Usage"), alpha = 0.7) + geom_line(aes(y = anomaly_count * 5, color = "Anomaly Count"), size = 1) + geom_point(aes(y = anomaly_count * 5, color = "Anomaly Count"), size = 2) + scale_y_continuous( name = "Average Usage %", sec.axis = sec_axis(~./5, name = "Anomaly Count") ) + scale_fill_manual(values = c("Average Usage" = "steelblue")) + scale_color_manual(values = c("Anomaly Count" = "red")) + labs(title = "Hourly Pattern: Usage and Anomalies", x = "Hour of Day") + theme_minimal() # 5. Anomaly Score Distribution plots$anomaly_scores <- data_with_anomalies %>% filter(!is.na(overall_anomaly_score)) %>% ggplot(aes(x = overall_anomaly_score)) + geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) + labs(title = "Distribution of Anomaly Scores", x = "Anomaly Score", y = "Count") + theme_minimal() return(plots)}
# -----------------------------------------------------------------------------------# Reporting Functions# -----------------------------------------------------------------------------------
# Generate comprehensive reportgenerate_report <- function(data_with_anomalies) { message("Generating analysis report...") # Summary statistics summary_stats <- data_with_anomalies %>% summarise( total_records = n(), anomaly_count = sum(is_anomaly, na.rm = TRUE), anomaly_percentage = round(mean(is_anomaly, na.rm = TRUE) * 100, 2), avg_process_usage = round(mean(PROCESS_USAGE_PCT, na.rm = TRUE), 2), max_process_usage = max(PROCESS_USAGE_PCT, na.rm = TRUE), avg_active_sessions = round(mean(ACTIVE_SESSIONS, na.rm = TRUE), 2), critical_alerts = sum(ALERT_LEVEL == "CRITICAL", na.rm = TRUE), warning_alerts = sum(ALERT_LEVEL == "WARNING", na.rm = TRUE), capacity_exceeded_count = sum(is_capacity_exceeded, na.rm = TRUE) ) # Anomaly details anomaly_details <- data_with_anomalies %>% filter(is_anomaly) %>% select(CHECK_TIMESTAMP, CURRENT_PROCESSES, MAX_PROCESSES, PROCESS_USAGE_PCT, ALERT_LEVEL, anomaly_type, overall_anomaly_score) %>% arrange(desc(overall_anomaly_score), desc(PROCESS_USAGE_PCT)) # Time-based anomaly patterns time_patterns <- data_with_anomalies %>% group_by(hour, day_of_week) %>% summarise( total_observations = n(), anomaly_rate = round(mean(is_anomaly, na.rm = TRUE) * 100, 2), avg_usage = round(mean(PROCESS_USAGE_PCT, na.rm = TRUE), 2), max_usage = round(max(PROCESS_USAGE_PCT, na.rm = TRUE), 2), .groups = 'drop' ) %>% arrange(desc(anomaly_rate)) # Anomaly type breakdown anomaly_breakdown <- data_with_anomalies %>% filter(is_anomaly) %>% count(anomaly_type) %>% arrange(desc(n)) return(list( summary = summary_stats, anomalies = anomaly_details, patterns = time_patterns, anomaly_breakdown = anomaly_breakdown ))}
# -----------------------------------------------------------------------------------# Main Execution Function# -----------------------------------------------------------------------------------
main <- function(days_back = 30) { cat("Starting Process Monitoring Analysis...\n") cat("=========================================\n") # Connect to database conn <- safe_db_connect() if (is.null(conn)) { stop("Failed to connect to database. Analysis aborted.") } on.exit(dbDisconnect(conn)) # Retrieve data raw_data <- get_process_monitoring_data(conn, days_back) if (nrow(raw_data) == 0) { cat("No data found for the specified period.\n") return(NULL) } # Preprocess data processed_data <- preprocess_data(raw_data) # Detect anomalies data_with_anomalies <- detect_anomalies(processed_data) # Create visualizations plots <- create_visualizations(data_with_anomalies) # Generate report report <- generate_report(data_with_anomalies) # Print comprehensive summary cat("\n=== PROCESS MONITORING ANALYSIS SUMMARY ===\n") cat("Time Period: Last", days_back, "days\n") cat("Total records analyzed:", report$summary$total_records, "\n") cat("Anomalies detected:", report$summary$anomaly_count, "\n") cat("Anomaly rate:", report$summary$anomaly_percentage, "%\n") cat("Average process usage:", report$summary$avg_process_usage, "%\n") cat("Maximum process usage:", report$summary$max_process_usage, "%\n") cat("Critical alerts:", report$summary$critical_alerts, "\n") cat("Warning alerts:", report$summary$warning_alerts, "\n") cat("Capacity exceeded events:", report$summary$capacity_exceeded_count, "\n") # Display anomaly breakdown cat("\n=== ANOMALY TYPE BREAKDOWN ===\n") print(report$anomaly_breakdown) # Display top anomalies cat("\n=== TOP 10 ANOMALIES ===\n") print(head(report$anomalies, 10)) # Display highest risk time periods cat("\n=== HIGHEST ANOMALY RATES BY TIME ===\n") print(head(report$patterns, 10)) # Define output directory output_dir <- "F:/DBA/Scripts/R/Reports" # Create directory if it doesn't exist if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) cat("Created directory:", output_dir, "\n") } # Save plots cat("\nSaving plots...\n") for (plot_name in names(plots)) { filename <- file.path(output_dir, paste0("process_monitoring_", plot_name, ".png")) tryCatch({ ggsave(filename, plots[[plot_name]], width = 12, height = 8, dpi = 300) cat("Saved:", filename, "\n") }, error = function(e) { cat("Failed to save", filename, ":", e$message, "\n") }) } # Save detailed data output_filename <- file.path(output_dir, paste0("process_monitoring_analysis_", Sys.Date(), ".csv")) write.csv(data_with_anomalies, output_filename, row.names = FALSE) cat("Saved detailed data:", output_filename, "\n") # Save report summary report_filename <- file.path(output_dir, paste0("process_monitoring_report_", Sys.Date(), ".txt")) sink(report_filename) cat("PROCESS MONITORING ANALYSIS REPORT\n") cat("Generated on:", Sys.Date(), "\n\n") cat("SUMMARY STATISTICS:\n") print(report$summary) cat("\nANOMALY BREAKDOWN:\n") print(report$anomaly_breakdown) cat("\nTOP 15 ANOMALIES:\n") print(head(report$anomalies, 15)) sink() cat("Saved report:", report_filename, "\n") return(list( data = data_with_anomalies, plots = plots, report = report ))}
# -----------------------------------------------------------------------------------# Quick Analysis Function# -----------------------------------------------------------------------------------
# For quick analysis without saving filesquick_analysis <- function(days_back = 7) { conn <- safe_db_connect() if (is.null(conn)) return(NULL) on.exit(dbDisconnect(conn)) data <- get_process_monitoring_data(conn, days_back) if (nrow(data) == 0) return(NULL) processed_data <- preprocess_data(data) anomaly_data <- detect_anomalies(processed_data) # Create quick plot p <- ggplot(anomaly_data, aes(x = CHECK_TIMESTAMP, y = PROCESS_USAGE_PCT)) + geom_line(color = "steelblue") + geom_point(data = filter(anomaly_data, is_anomaly), aes(color = anomaly_type), size = 2) + labs(title = paste("Quick Analysis - Last", days_back, "Days"), x = "Time", y = "Process Usage %") + theme_minimal() print(p) cat("Quick Summary:\n") cat("- Total records:", nrow(anomaly_data), "\n") cat("- Anomalies:", sum(anomaly_data$is_anomaly), "\n") cat("- Max usage:", max(anomaly_data$PROCESS_USAGE_PCT, na.rm = TRUE), "%\n") return(anomaly_data)}
# -----------------------------------------------------------------------------------# Execute Main Analysis# -----------------------------------------------------------------------------------
# Execute the analysis with error handlingtryCatch({ results <- main(days_back = 30) cat("\nProcess Monitoring Analysis completed successfully!\n")}, error = function(e) { cat("Error during analysis:", e$message, "\n")})
# Uncomment to run quick analysis# quick_results <- quick_analysis(7)
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------cat("Script execution completed: 11-Process_Monitoring_Analysis.r\n")
# Load required librarieslibrary(odbc)library(DBI)library(ggplot2)library(dplyr)library(anomalize)library(lubridate)library(plotly)library(kableExtra)library(corrplot)library(tibble)
# -----------------------------------------------------------------------------------# Database Connection Function# -----------------------------------------------------------------------------------
safe_db_connect <- function() { tryCatch({ conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]") message("Successfully connected to database") return(conn) }, error = function(e) { message("Connection failed: ", e$message) return(NULL) })}
# -----------------------------------------------------------------------------------# Data Retrieval Functions# -----------------------------------------------------------------------------------
# Retrieve data from Oracleget_process_monitoring_data <- function(conn, days_back = 30) { query <- paste(" SELECT MONITOR_ID, CHECK_TIMESTAMP, CURRENT_PROCESSES, MAX_PROCESSES, PROCESS_USAGE_PCT, ALERT_LEVEL, ACTION_TAKEN, SESSION_COUNT, ACTIVE_SESSIONS FROM ATTSTATS.PROCESS_MONITORING WHERE CHECK_TIMESTAMP >= SYSDATE - ", days_back, " ORDER BY CHECK_TIMESTAMP ") data <- dbGetQuery(conn, query) message(paste("Retrieved", nrow(data), "records from database")) return(data)}
# -----------------------------------------------------------------------------------# Data Preprocessing Functions# -----------------------------------------------------------------------------------
# Preprocess and analyze datapreprocess_data <- function(data) { # Convert timestamp data$CHECK_TIMESTAMP <- as.POSIXct(data$CHECK_TIMESTAMP) # Calculate additional metrics data <- data %>% mutate( date = as.Date(CHECK_TIMESTAMP), hour = hour(CHECK_TIMESTAMP), day_of_week = wday(CHECK_TIMESTAMP, label = TRUE), session_utilization = ifelse(SESSION_COUNT > 0, ACTIVE_SESSIONS / SESSION_COUNT * 100, 0), process_capacity_remaining = MAX_PROCESSES - CURRENT_PROCESSES, capacity_utilization = ifelse(MAX_PROCESSES > 0, CURRENT_PROCESSES / MAX_PROCESSES * 100, 0) ) # Summary of preprocessing message("Data preprocessing completed:") message(paste("- Time range:", min(data$CHECK_TIMESTAMP), "to", max(data$CHECK_TIMESTAMP))) message(paste("- Unique days:", length(unique(data$date)))) return(data)}
# -----------------------------------------------------------------------------------# Anomaly Detection Functions# -----------------------------------------------------------------------------------
# Simplified version that skips time series for small datasetsdetect_anomalies <- function(data) { message("Starting anomaly detection...") # Ensure data is sorted by timestamp data <- data %>% arrange(CHECK_TIMESTAMP) # Calculate data duration in days data_days <- as.numeric(difftime(max(data$CHECK_TIMESTAMP), min(data$CHECK_TIMESTAMP), units = "days")) # Skip time series decomposition for small datasets if (data_days < 3) { message("Small dataset detected: Using statistical and business rules only") ts_anomaly <- "No" } else { # Use time series decomposition for larger datasets ts_data <- tryCatch({ data %>% as_tibble() %>% time_decompose(PROCESS_USAGE_PCT, method = "stl", frequency = "1 day", trend = "1 week") %>% anomalize(remainder, method = "gesd", alpha = 0.05, max_anoms = 0.2) %>% time_recompose() }, error = function(e) { message("Time series decomposition failed: ", e$message) NULL }) if (!is.null(ts_data)) { ts_anomaly <- ts_data$anomaly } else { ts_anomaly <- "No" } } # Method 2: Statistical outliers for key metrics statistical_anomalies <- data %>% mutate( usage_zscore = scale(PROCESS_USAGE_PCT)[,1], process_zscore = scale(CURRENT_PROCESSES)[,1], session_zscore = scale(ACTIVE_SESSIONS)[,1], is_usage_anomaly = abs(usage_zscore) > 2.5 & !is.na(usage_zscore), is_process_anomaly = abs(process_zscore) > 2.5 & !is.na(process_zscore), is_session_anomaly = abs(session_zscore) > 2.5 & !is.na(session_zscore) ) # Method 3: Business rule anomalies business_anomalies <- data %>% mutate( is_critical_usage = PROCESS_USAGE_PCT > 90, is_process_limit_near = CURRENT_PROCESSES >= MAX_PROCESSES * 0.95, is_high_session_utilization = session_utilization > 80 & !is.na(session_utilization), is_capacity_exceeded = CURRENT_PROCESSES > MAX_PROCESSES, is_business_anomaly = is_critical_usage | is_process_limit_near | is_high_session_utilization | is_capacity_exceeded ) # Combine all anomaly detection methods final_data <- data %>% mutate(ts_anomaly = ts_anomaly) %>% left_join(statistical_anomalies %>% select(CHECK_TIMESTAMP, is_usage_anomaly, is_process_anomaly, is_session_anomaly), by = "CHECK_TIMESTAMP") %>% left_join(business_anomalies %>% select(CHECK_TIMESTAMP, is_business_anomaly, is_capacity_exceeded, is_critical_usage, is_process_limit_near, is_high_session_utilization), by = "CHECK_TIMESTAMP") %>% mutate( overall_anomaly_score = as.numeric(ts_anomaly == "Yes") + as.numeric(is_usage_anomaly) + as.numeric(is_process_anomaly) + as.numeric(is_session_anomaly) + as.numeric(is_business_anomaly), is_anomaly = overall_anomaly_score >= 2, anomaly_type = case_when( is_capacity_exceeded ~ "Capacity Exceeded", is_critical_usage & is_process_limit_near ~ "Critical Overload", is_critical_usage ~ "High Usage", is_process_limit_near ~ "Near Capacity", is_high_session_utilization ~ "High Session Usage", is_usage_anomaly | is_process_anomaly | is_session_anomaly ~ "Statistical Anomaly", ts_anomaly == "Yes" ~ "Time Series Anomaly", TRUE ~ "No Anomaly" ) ) anomaly_count <- sum(final_data$is_anomaly, na.rm = TRUE) message(paste("Anomalies detected:", anomaly_count, "(", round(anomaly_count/nrow(final_data)*100, 2), "%)")) return(final_data)}
# -----------------------------------------------------------------------------------# Visualization Functions# -----------------------------------------------------------------------------------
# Create comprehensive visualizationscreate_visualizations <- function(data_with_anomalies) { message("Creating visualizations...") plots <- list() # Filter out NA values for plotting plot_data <- data_with_anomalies %>% filter(!is.na(PROCESS_USAGE_PCT)) # 1. Process Usage Over Time with Anomalies plots$usage_timeseries <- ggplot(plot_data, aes(x = CHECK_TIMESTAMP, y = PROCESS_USAGE_PCT)) + geom_line(color = "steelblue", alpha = 0.7) + geom_point(data = filter(plot_data, is_anomaly), aes(color = anomaly_type), size = 2, alpha = 0.8) + geom_hline(yintercept = 90, linetype = "dashed", color = "orange", size = 0.8) + geom_hline(yintercept = 95, linetype = "dashed", color = "red", size = 0.8) + scale_color_brewer(palette = "Set1") + labs(title = "Process Usage Percentage Over Time", subtitle = "Colored points indicate different types of anomalies", x = "Timestamp", y = "Process Usage %", color = "Anomaly Type") + theme_minimal() + theme(legend.position = "bottom") # 2. Current Processes vs Max Capacity plots$process_capacity <- ggplot(plot_data, aes(x = CHECK_TIMESTAMP)) + geom_ribbon(aes(ymin = 0, ymax = MAX_PROCESSES), fill = "grey90", alpha = 0.5) + geom_line(aes(y = CURRENT_PROCESSES, color = "Current Processes"), alpha = 0.8) + geom_line(aes(y = MAX_PROCESSES, color = "Max Capacity"), linetype = "dashed", size = 1) + geom_point(data = filter(plot_data, is_anomaly), aes(y = CURRENT_PROCESSES, shape = "Anomaly"), color = "red", size = 2) + scale_color_manual(values = c("Current Processes" = "blue", "Max Capacity" = "darkred")) + scale_shape_manual(values = c("Anomaly" = 17)) + labs(title = "Current Processes vs Maximum Capacity", x = "Timestamp", y = "Number of Processes", color = "", shape = "") + theme_minimal() # 3. Alert Level Distribution plots$alert_distribution <- data_with_anomalies %>% count(ALERT_LEVEL) %>% ggplot(aes(x = reorder(ALERT_LEVEL, n), y = n, fill = ALERT_LEVEL)) + geom_bar(stat = "identity") + geom_text(aes(label = n), hjust = -0.2) + coord_flip() + labs(title = "Alert Level Distribution", x = "Alert Level", y = "Count") + theme_minimal() + theme(legend.position = "none") # 4. Hourly Pattern Analysis hourly_summary <- data_with_anomalies %>% group_by(hour) %>% summarise( avg_usage = mean(PROCESS_USAGE_PCT, na.rm = TRUE), anomaly_count = sum(is_anomaly, na.rm = TRUE), .groups = 'drop' ) plots$hourly_pattern <- ggplot(hourly_summary, aes(x = hour)) + geom_col(aes(y = avg_usage, fill = "Average Usage"), alpha = 0.7) + geom_line(aes(y = anomaly_count * 5, color = "Anomaly Count"), size = 1) + geom_point(aes(y = anomaly_count * 5, color = "Anomaly Count"), size = 2) + scale_y_continuous( name = "Average Usage %", sec.axis = sec_axis(~./5, name = "Anomaly Count") ) + scale_fill_manual(values = c("Average Usage" = "steelblue")) + scale_color_manual(values = c("Anomaly Count" = "red")) + labs(title = "Hourly Pattern: Usage and Anomalies", x = "Hour of Day") + theme_minimal() # 5. Anomaly Score Distribution plots$anomaly_scores <- data_with_anomalies %>% filter(!is.na(overall_anomaly_score)) %>% ggplot(aes(x = overall_anomaly_score)) + geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) + labs(title = "Distribution of Anomaly Scores", x = "Anomaly Score", y = "Count") + theme_minimal() return(plots)}
# -----------------------------------------------------------------------------------# Reporting Functions# -----------------------------------------------------------------------------------
# Generate comprehensive reportgenerate_report <- function(data_with_anomalies) { message("Generating analysis report...") # Summary statistics summary_stats <- data_with_anomalies %>% summarise( total_records = n(), anomaly_count = sum(is_anomaly, na.rm = TRUE), anomaly_percentage = round(mean(is_anomaly, na.rm = TRUE) * 100, 2), avg_process_usage = round(mean(PROCESS_USAGE_PCT, na.rm = TRUE), 2), max_process_usage = max(PROCESS_USAGE_PCT, na.rm = TRUE), avg_active_sessions = round(mean(ACTIVE_SESSIONS, na.rm = TRUE), 2), critical_alerts = sum(ALERT_LEVEL == "CRITICAL", na.rm = TRUE), warning_alerts = sum(ALERT_LEVEL == "WARNING", na.rm = TRUE), capacity_exceeded_count = sum(is_capacity_exceeded, na.rm = TRUE) ) # Anomaly details anomaly_details <- data_with_anomalies %>% filter(is_anomaly) %>% select(CHECK_TIMESTAMP, CURRENT_PROCESSES, MAX_PROCESSES, PROCESS_USAGE_PCT, ALERT_LEVEL, anomaly_type, overall_anomaly_score) %>% arrange(desc(overall_anomaly_score), desc(PROCESS_USAGE_PCT)) # Time-based anomaly patterns time_patterns <- data_with_anomalies %>% group_by(hour, day_of_week) %>% summarise( total_observations = n(), anomaly_rate = round(mean(is_anomaly, na.rm = TRUE) * 100, 2), avg_usage = round(mean(PROCESS_USAGE_PCT, na.rm = TRUE), 2), max_usage = round(max(PROCESS_USAGE_PCT, na.rm = TRUE), 2), .groups = 'drop' ) %>% arrange(desc(anomaly_rate)) # Anomaly type breakdown anomaly_breakdown <- data_with_anomalies %>% filter(is_anomaly) %>% count(anomaly_type) %>% arrange(desc(n)) return(list( summary = summary_stats, anomalies = anomaly_details, patterns = time_patterns, anomaly_breakdown = anomaly_breakdown ))}
# -----------------------------------------------------------------------------------# Main Execution Function# -----------------------------------------------------------------------------------
main <- function(days_back = 30) { cat("Starting Process Monitoring Analysis...\n") cat("=========================================\n") # Connect to database conn <- safe_db_connect() if (is.null(conn)) { stop("Failed to connect to database. Analysis aborted.") } on.exit(dbDisconnect(conn)) # Retrieve data raw_data <- get_process_monitoring_data(conn, days_back) if (nrow(raw_data) == 0) { cat("No data found for the specified period.\n") return(NULL) } # Preprocess data processed_data <- preprocess_data(raw_data) # Detect anomalies data_with_anomalies <- detect_anomalies(processed_data) # Create visualizations plots <- create_visualizations(data_with_anomalies) # Generate report report <- generate_report(data_with_anomalies) # Print comprehensive summary cat("\n=== PROCESS MONITORING ANALYSIS SUMMARY ===\n") cat("Time Period: Last", days_back, "days\n") cat("Total records analyzed:", report$summary$total_records, "\n") cat("Anomalies detected:", report$summary$anomaly_count, "\n") cat("Anomaly rate:", report$summary$anomaly_percentage, "%\n") cat("Average process usage:", report$summary$avg_process_usage, "%\n") cat("Maximum process usage:", report$summary$max_process_usage, "%\n") cat("Critical alerts:", report$summary$critical_alerts, "\n") cat("Warning alerts:", report$summary$warning_alerts, "\n") cat("Capacity exceeded events:", report$summary$capacity_exceeded_count, "\n") # Display anomaly breakdown cat("\n=== ANOMALY TYPE BREAKDOWN ===\n") print(report$anomaly_breakdown) # Display top anomalies cat("\n=== TOP 10 ANOMALIES ===\n") print(head(report$anomalies, 10)) # Display highest risk time periods cat("\n=== HIGHEST ANOMALY RATES BY TIME ===\n") print(head(report$patterns, 10)) # Define output directory output_dir <- "F:/DBA/Scripts/R/Reports" # Create directory if it doesn't exist if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) cat("Created directory:", output_dir, "\n") } # Save plots cat("\nSaving plots...\n") for (plot_name in names(plots)) { filename <- file.path(output_dir, paste0("process_monitoring_", plot_name, ".png")) tryCatch({ ggsave(filename, plots[[plot_name]], width = 12, height = 8, dpi = 300) cat("Saved:", filename, "\n") }, error = function(e) { cat("Failed to save", filename, ":", e$message, "\n") }) } # Save detailed data output_filename <- file.path(output_dir, paste0("process_monitoring_analysis_", Sys.Date(), ".csv")) write.csv(data_with_anomalies, output_filename, row.names = FALSE) cat("Saved detailed data:", output_filename, "\n") # Save report summary report_filename <- file.path(output_dir, paste0("process_monitoring_report_", Sys.Date(), ".txt")) sink(report_filename) cat("PROCESS MONITORING ANALYSIS REPORT\n") cat("Generated on:", Sys.Date(), "\n\n") cat("SUMMARY STATISTICS:\n") print(report$summary) cat("\nANOMALY BREAKDOWN:\n") print(report$anomaly_breakdown) cat("\nTOP 15 ANOMALIES:\n") print(head(report$anomalies, 15)) sink() cat("Saved report:", report_filename, "\n") return(list( data = data_with_anomalies, plots = plots, report = report ))}
# -----------------------------------------------------------------------------------# Quick Analysis Function# -----------------------------------------------------------------------------------
# For quick analysis without saving filesquick_analysis <- function(days_back = 7) { conn <- safe_db_connect() if (is.null(conn)) return(NULL) on.exit(dbDisconnect(conn)) data <- get_process_monitoring_data(conn, days_back) if (nrow(data) == 0) return(NULL) processed_data <- preprocess_data(data) anomaly_data <- detect_anomalies(processed_data) # Create quick plot p <- ggplot(anomaly_data, aes(x = CHECK_TIMESTAMP, y = PROCESS_USAGE_PCT)) + geom_line(color = "steelblue") + geom_point(data = filter(anomaly_data, is_anomaly), aes(color = anomaly_type), size = 2) + labs(title = paste("Quick Analysis - Last", days_back, "Days"), x = "Time", y = "Process Usage %") + theme_minimal() print(p) cat("Quick Summary:\n") cat("- Total records:", nrow(anomaly_data), "\n") cat("- Anomalies:", sum(anomaly_data$is_anomaly), "\n") cat("- Max usage:", max(anomaly_data$PROCESS_USAGE_PCT, na.rm = TRUE), "%\n") return(anomaly_data)}
# -----------------------------------------------------------------------------------# Execute Main Analysis# -----------------------------------------------------------------------------------
# Execute the analysis with error handlingtryCatch({ results <- main(days_back = 30) cat("\nProcess Monitoring Analysis completed successfully!\n")}, error = function(e) { cat("Error during analysis:", e$message, "\n")})
# Uncomment to run quick analysis# quick_results <- quick_analysis(7)
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------cat("Script execution completed: 11-Process_Monitoring_Analysis.r\n")