• HOME
  • SERVICES
    • DATABASE SERVICES
    • CYBERSECURITY
    • INTERNET OF THINGS
  • PUBLISHING
    • PORTFOLIO
    • ORACLE SCRIPT LIBRARY
    • R CODE SCRIPT LIBRARY
  • ABOUT
    • ABOUT MONTBLEAU.CA
    • About Pierre Montbleau
  • Blog
  • Academic Press
    • Bookshelf

11-Process_Monitoring_Analysis.r

# -----------------------------------------------------------------------------------# File Name : 11-Process_Monitoring_Analysis.r# Author : Pierre Montbleau# Description : Comprehensive process monitoring analysis with anomaly detection# Purpose : Monitor database processes, detect anomalies, and generate reports# Call Syntax : source("F:\\DBA\\Scripts\\R\\11-Process_Monitoring_Analysis.r")# Parameters : days_back - Number of days to analyze (default: 30)# Last Modified: 2024-12-19# -----------------------------------------------------------------------------------
# Load required librarieslibrary(odbc)library(DBI)library(ggplot2)library(dplyr)library(anomalize)library(lubridate)library(plotly)library(kableExtra)library(corrplot)library(tibble)
# -----------------------------------------------------------------------------------# Database Connection Function# -----------------------------------------------------------------------------------
safe_db_connect <- function() { tryCatch({ conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]") message("Successfully connected to database") return(conn) }, error = function(e) { message("Connection failed: ", e$message) return(NULL) })}
# -----------------------------------------------------------------------------------# Data Retrieval Functions# -----------------------------------------------------------------------------------
# Retrieve data from Oracleget_process_monitoring_data <- function(conn, days_back = 30) { query <- paste(" SELECT MONITOR_ID, CHECK_TIMESTAMP, CURRENT_PROCESSES, MAX_PROCESSES, PROCESS_USAGE_PCT, ALERT_LEVEL, ACTION_TAKEN, SESSION_COUNT, ACTIVE_SESSIONS FROM ATTSTATS.PROCESS_MONITORING WHERE CHECK_TIMESTAMP >= SYSDATE - ", days_back, " ORDER BY CHECK_TIMESTAMP ") data <- dbGetQuery(conn, query) message(paste("Retrieved", nrow(data), "records from database")) return(data)}
# -----------------------------------------------------------------------------------# Data Preprocessing Functions# -----------------------------------------------------------------------------------
# Preprocess and analyze datapreprocess_data <- function(data) { # Convert timestamp data$CHECK_TIMESTAMP <- as.POSIXct(data$CHECK_TIMESTAMP) # Calculate additional metrics data <- data %>% mutate( date = as.Date(CHECK_TIMESTAMP), hour = hour(CHECK_TIMESTAMP), day_of_week = wday(CHECK_TIMESTAMP, label = TRUE), session_utilization = ifelse(SESSION_COUNT > 0, ACTIVE_SESSIONS / SESSION_COUNT * 100, 0), process_capacity_remaining = MAX_PROCESSES - CURRENT_PROCESSES, capacity_utilization = ifelse(MAX_PROCESSES > 0, CURRENT_PROCESSES / MAX_PROCESSES * 100, 0) ) # Summary of preprocessing message("Data preprocessing completed:") message(paste("- Time range:", min(data$CHECK_TIMESTAMP), "to", max(data$CHECK_TIMESTAMP))) message(paste("- Unique days:", length(unique(data$date)))) return(data)}
# -----------------------------------------------------------------------------------# Anomaly Detection Functions# -----------------------------------------------------------------------------------
# Simplified version that skips time series for small datasetsdetect_anomalies <- function(data) { message("Starting anomaly detection...") # Ensure data is sorted by timestamp data <- data %>% arrange(CHECK_TIMESTAMP) # Calculate data duration in days data_days <- as.numeric(difftime(max(data$CHECK_TIMESTAMP), min(data$CHECK_TIMESTAMP), units = "days")) # Skip time series decomposition for small datasets if (data_days < 3) { message("Small dataset detected: Using statistical and business rules only") ts_anomaly <- "No" } else { # Use time series decomposition for larger datasets ts_data <- tryCatch({ data %>% as_tibble() %>% time_decompose(PROCESS_USAGE_PCT, method = "stl", frequency = "1 day", trend = "1 week") %>% anomalize(remainder, method = "gesd", alpha = 0.05, max_anoms = 0.2) %>% time_recompose() }, error = function(e) { message("Time series decomposition failed: ", e$message) NULL }) if (!is.null(ts_data)) { ts_anomaly <- ts_data$anomaly } else { ts_anomaly <- "No" } } # Method 2: Statistical outliers for key metrics statistical_anomalies <- data %>% mutate( usage_zscore = scale(PROCESS_USAGE_PCT)[,1], process_zscore = scale(CURRENT_PROCESSES)[,1], session_zscore = scale(ACTIVE_SESSIONS)[,1], is_usage_anomaly = abs(usage_zscore) > 2.5 & !is.na(usage_zscore), is_process_anomaly = abs(process_zscore) > 2.5 & !is.na(process_zscore), is_session_anomaly = abs(session_zscore) > 2.5 & !is.na(session_zscore) ) # Method 3: Business rule anomalies business_anomalies <- data %>% mutate( is_critical_usage = PROCESS_USAGE_PCT > 90, is_process_limit_near = CURRENT_PROCESSES >= MAX_PROCESSES * 0.95, is_high_session_utilization = session_utilization > 80 & !is.na(session_utilization), is_capacity_exceeded = CURRENT_PROCESSES > MAX_PROCESSES, is_business_anomaly = is_critical_usage | is_process_limit_near | is_high_session_utilization | is_capacity_exceeded ) # Combine all anomaly detection methods final_data <- data %>% mutate(ts_anomaly = ts_anomaly) %>% left_join(statistical_anomalies %>% select(CHECK_TIMESTAMP, is_usage_anomaly, is_process_anomaly, is_session_anomaly), by = "CHECK_TIMESTAMP") %>% left_join(business_anomalies %>% select(CHECK_TIMESTAMP, is_business_anomaly, is_capacity_exceeded, is_critical_usage, is_process_limit_near, is_high_session_utilization), by = "CHECK_TIMESTAMP") %>% mutate( overall_anomaly_score = as.numeric(ts_anomaly == "Yes") + as.numeric(is_usage_anomaly) + as.numeric(is_process_anomaly) + as.numeric(is_session_anomaly) + as.numeric(is_business_anomaly), is_anomaly = overall_anomaly_score >= 2, anomaly_type = case_when( is_capacity_exceeded ~ "Capacity Exceeded", is_critical_usage & is_process_limit_near ~ "Critical Overload", is_critical_usage ~ "High Usage", is_process_limit_near ~ "Near Capacity", is_high_session_utilization ~ "High Session Usage", is_usage_anomaly | is_process_anomaly | is_session_anomaly ~ "Statistical Anomaly", ts_anomaly == "Yes" ~ "Time Series Anomaly", TRUE ~ "No Anomaly" ) ) anomaly_count <- sum(final_data$is_anomaly, na.rm = TRUE) message(paste("Anomalies detected:", anomaly_count, "(", round(anomaly_count/nrow(final_data)*100, 2), "%)")) return(final_data)}
# -----------------------------------------------------------------------------------# Visualization Functions# -----------------------------------------------------------------------------------
# Create comprehensive visualizationscreate_visualizations <- function(data_with_anomalies) { message("Creating visualizations...") plots <- list() # Filter out NA values for plotting plot_data <- data_with_anomalies %>% filter(!is.na(PROCESS_USAGE_PCT)) # 1. Process Usage Over Time with Anomalies plots$usage_timeseries <- ggplot(plot_data, aes(x = CHECK_TIMESTAMP, y = PROCESS_USAGE_PCT)) + geom_line(color = "steelblue", alpha = 0.7) + geom_point(data = filter(plot_data, is_anomaly), aes(color = anomaly_type), size = 2, alpha = 0.8) + geom_hline(yintercept = 90, linetype = "dashed", color = "orange", size = 0.8) + geom_hline(yintercept = 95, linetype = "dashed", color = "red", size = 0.8) + scale_color_brewer(palette = "Set1") + labs(title = "Process Usage Percentage Over Time", subtitle = "Colored points indicate different types of anomalies", x = "Timestamp", y = "Process Usage %", color = "Anomaly Type") + theme_minimal() + theme(legend.position = "bottom") # 2. Current Processes vs Max Capacity plots$process_capacity <- ggplot(plot_data, aes(x = CHECK_TIMESTAMP)) + geom_ribbon(aes(ymin = 0, ymax = MAX_PROCESSES), fill = "grey90", alpha = 0.5) + geom_line(aes(y = CURRENT_PROCESSES, color = "Current Processes"), alpha = 0.8) + geom_line(aes(y = MAX_PROCESSES, color = "Max Capacity"), linetype = "dashed", size = 1) + geom_point(data = filter(plot_data, is_anomaly), aes(y = CURRENT_PROCESSES, shape = "Anomaly"), color = "red", size = 2) + scale_color_manual(values = c("Current Processes" = "blue", "Max Capacity" = "darkred")) + scale_shape_manual(values = c("Anomaly" = 17)) + labs(title = "Current Processes vs Maximum Capacity", x = "Timestamp", y = "Number of Processes", color = "", shape = "") + theme_minimal() # 3. Alert Level Distribution plots$alert_distribution <- data_with_anomalies %>% count(ALERT_LEVEL) %>% ggplot(aes(x = reorder(ALERT_LEVEL, n), y = n, fill = ALERT_LEVEL)) + geom_bar(stat = "identity") + geom_text(aes(label = n), hjust = -0.2) + coord_flip() + labs(title = "Alert Level Distribution", x = "Alert Level", y = "Count") + theme_minimal() + theme(legend.position = "none") # 4. Hourly Pattern Analysis hourly_summary <- data_with_anomalies %>% group_by(hour) %>% summarise( avg_usage = mean(PROCESS_USAGE_PCT, na.rm = TRUE), anomaly_count = sum(is_anomaly, na.rm = TRUE), .groups = 'drop' ) plots$hourly_pattern <- ggplot(hourly_summary, aes(x = hour)) + geom_col(aes(y = avg_usage, fill = "Average Usage"), alpha = 0.7) + geom_line(aes(y = anomaly_count * 5, color = "Anomaly Count"), size = 1) + geom_point(aes(y = anomaly_count * 5, color = "Anomaly Count"), size = 2) + scale_y_continuous( name = "Average Usage %", sec.axis = sec_axis(~./5, name = "Anomaly Count") ) + scale_fill_manual(values = c("Average Usage" = "steelblue")) + scale_color_manual(values = c("Anomaly Count" = "red")) + labs(title = "Hourly Pattern: Usage and Anomalies", x = "Hour of Day") + theme_minimal() # 5. Anomaly Score Distribution plots$anomaly_scores <- data_with_anomalies %>% filter(!is.na(overall_anomaly_score)) %>% ggplot(aes(x = overall_anomaly_score)) + geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) + labs(title = "Distribution of Anomaly Scores", x = "Anomaly Score", y = "Count") + theme_minimal() return(plots)}
# -----------------------------------------------------------------------------------# Reporting Functions# -----------------------------------------------------------------------------------
# Generate comprehensive reportgenerate_report <- function(data_with_anomalies) { message("Generating analysis report...") # Summary statistics summary_stats <- data_with_anomalies %>% summarise( total_records = n(), anomaly_count = sum(is_anomaly, na.rm = TRUE), anomaly_percentage = round(mean(is_anomaly, na.rm = TRUE) * 100, 2), avg_process_usage = round(mean(PROCESS_USAGE_PCT, na.rm = TRUE), 2), max_process_usage = max(PROCESS_USAGE_PCT, na.rm = TRUE), avg_active_sessions = round(mean(ACTIVE_SESSIONS, na.rm = TRUE), 2), critical_alerts = sum(ALERT_LEVEL == "CRITICAL", na.rm = TRUE), warning_alerts = sum(ALERT_LEVEL == "WARNING", na.rm = TRUE), capacity_exceeded_count = sum(is_capacity_exceeded, na.rm = TRUE) ) # Anomaly details anomaly_details <- data_with_anomalies %>% filter(is_anomaly) %>% select(CHECK_TIMESTAMP, CURRENT_PROCESSES, MAX_PROCESSES, PROCESS_USAGE_PCT, ALERT_LEVEL, anomaly_type, overall_anomaly_score) %>% arrange(desc(overall_anomaly_score), desc(PROCESS_USAGE_PCT)) # Time-based anomaly patterns time_patterns <- data_with_anomalies %>% group_by(hour, day_of_week) %>% summarise( total_observations = n(), anomaly_rate = round(mean(is_anomaly, na.rm = TRUE) * 100, 2), avg_usage = round(mean(PROCESS_USAGE_PCT, na.rm = TRUE), 2), max_usage = round(max(PROCESS_USAGE_PCT, na.rm = TRUE), 2), .groups = 'drop' ) %>% arrange(desc(anomaly_rate)) # Anomaly type breakdown anomaly_breakdown <- data_with_anomalies %>% filter(is_anomaly) %>% count(anomaly_type) %>% arrange(desc(n)) return(list( summary = summary_stats, anomalies = anomaly_details, patterns = time_patterns, anomaly_breakdown = anomaly_breakdown ))}
# -----------------------------------------------------------------------------------# Main Execution Function# -----------------------------------------------------------------------------------
main <- function(days_back = 30) { cat("Starting Process Monitoring Analysis...\n") cat("=========================================\n") # Connect to database conn <- safe_db_connect() if (is.null(conn)) { stop("Failed to connect to database. Analysis aborted.") } on.exit(dbDisconnect(conn)) # Retrieve data raw_data <- get_process_monitoring_data(conn, days_back) if (nrow(raw_data) == 0) { cat("No data found for the specified period.\n") return(NULL) } # Preprocess data processed_data <- preprocess_data(raw_data) # Detect anomalies data_with_anomalies <- detect_anomalies(processed_data) # Create visualizations plots <- create_visualizations(data_with_anomalies) # Generate report report <- generate_report(data_with_anomalies) # Print comprehensive summary cat("\n=== PROCESS MONITORING ANALYSIS SUMMARY ===\n") cat("Time Period: Last", days_back, "days\n") cat("Total records analyzed:", report$summary$total_records, "\n") cat("Anomalies detected:", report$summary$anomaly_count, "\n") cat("Anomaly rate:", report$summary$anomaly_percentage, "%\n") cat("Average process usage:", report$summary$avg_process_usage, "%\n") cat("Maximum process usage:", report$summary$max_process_usage, "%\n") cat("Critical alerts:", report$summary$critical_alerts, "\n") cat("Warning alerts:", report$summary$warning_alerts, "\n") cat("Capacity exceeded events:", report$summary$capacity_exceeded_count, "\n") # Display anomaly breakdown cat("\n=== ANOMALY TYPE BREAKDOWN ===\n") print(report$anomaly_breakdown) # Display top anomalies cat("\n=== TOP 10 ANOMALIES ===\n") print(head(report$anomalies, 10)) # Display highest risk time periods cat("\n=== HIGHEST ANOMALY RATES BY TIME ===\n") print(head(report$patterns, 10)) # Define output directory output_dir <- "F:/DBA/Scripts/R/Reports" # Create directory if it doesn't exist if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) cat("Created directory:", output_dir, "\n") } # Save plots cat("\nSaving plots...\n") for (plot_name in names(plots)) { filename <- file.path(output_dir, paste0("process_monitoring_", plot_name, ".png")) tryCatch({ ggsave(filename, plots[[plot_name]], width = 12, height = 8, dpi = 300) cat("Saved:", filename, "\n") }, error = function(e) { cat("Failed to save", filename, ":", e$message, "\n") }) } # Save detailed data output_filename <- file.path(output_dir, paste0("process_monitoring_analysis_", Sys.Date(), ".csv")) write.csv(data_with_anomalies, output_filename, row.names = FALSE) cat("Saved detailed data:", output_filename, "\n") # Save report summary report_filename <- file.path(output_dir, paste0("process_monitoring_report_", Sys.Date(), ".txt")) sink(report_filename) cat("PROCESS MONITORING ANALYSIS REPORT\n") cat("Generated on:", Sys.Date(), "\n\n") cat("SUMMARY STATISTICS:\n") print(report$summary) cat("\nANOMALY BREAKDOWN:\n") print(report$anomaly_breakdown) cat("\nTOP 15 ANOMALIES:\n") print(head(report$anomalies, 15)) sink() cat("Saved report:", report_filename, "\n") return(list( data = data_with_anomalies, plots = plots, report = report ))}
# -----------------------------------------------------------------------------------# Quick Analysis Function# -----------------------------------------------------------------------------------
# For quick analysis without saving filesquick_analysis <- function(days_back = 7) { conn <- safe_db_connect() if (is.null(conn)) return(NULL) on.exit(dbDisconnect(conn)) data <- get_process_monitoring_data(conn, days_back) if (nrow(data) == 0) return(NULL) processed_data <- preprocess_data(data) anomaly_data <- detect_anomalies(processed_data) # Create quick plot p <- ggplot(anomaly_data, aes(x = CHECK_TIMESTAMP, y = PROCESS_USAGE_PCT)) + geom_line(color = "steelblue") + geom_point(data = filter(anomaly_data, is_anomaly), aes(color = anomaly_type), size = 2) + labs(title = paste("Quick Analysis - Last", days_back, "Days"), x = "Time", y = "Process Usage %") + theme_minimal() print(p) cat("Quick Summary:\n") cat("- Total records:", nrow(anomaly_data), "\n") cat("- Anomalies:", sum(anomaly_data$is_anomaly), "\n") cat("- Max usage:", max(anomaly_data$PROCESS_USAGE_PCT, na.rm = TRUE), "%\n") return(anomaly_data)}
# -----------------------------------------------------------------------------------# Execute Main Analysis# -----------------------------------------------------------------------------------
# Execute the analysis with error handlingtryCatch({ results <- main(days_back = 30) cat("\nProcess Monitoring Analysis completed successfully!\n")}, error = function(e) { cat("Error during analysis:", e$message, "\n")})
# Uncomment to run quick analysis# quick_results <- quick_analysis(7)
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------cat("Script execution completed: 11-Process_Monitoring_Analysis.r\n")
montbleau.ca Academic press
address
110 Rue Reid Vaudreuil-Dorion, Qc, Canada J7V 0G1
Mon-Sun 9 am - 7 pm
contact us
montbleau@hotmail.com
5149497697
follow us/
Copyright © 2024. All rights reserved. Powered by Domain.com.

We use cookies to enable essential functionality on our website, and analyze website traffic. By clicking Accept you consent to our use of cookies. Read about how we use cookies.

Your Cookie Settings

We use cookies to enable essential functionality on our website, and analyze website traffic. Read about how we use cookies.

Cookie Categories
Essential

These cookies are strictly necessary to provide you with services available through our websites. You cannot refuse these cookies without impacting how our websites function. You can block or delete them by changing your browser settings, as described under the heading "Managing cookies" in the Privacy and Cookies Policy.

Analytics

These cookies collect information that is used in aggregate form to help us understand how our websites are being used or how effective our marketing campaigns are.