6-DB_Session_Longevity.r
# -----------------------------------------------------------------------------------# File Name : 6-DB_Session_Longevity.r# Author : Pierre Montbleau# Description : Oracle Database Session Longevity and Survival Analysis Script# Purpose : Analyze database session patterns, perform survival analysis,# identify long-running sessions, and generate PDF reports# Call Syntax : source("F:\\DBA\\Scripts\\R\\6-DB_Session_Longevity.r")# Parameters : days_back (default=10) - Historical data period in days# Output directory configurable in config list# Last Modified: 2024-01-15# -----------------------------------------------------------------------------------
# Oracle Session Longevity Analysis# Comprehensive analysis of database session patterns with PDF reporting
## install.packages("kableExtra")
# -----------------------------------------------------------------------------------# Load required libraries# -----------------------------------------------------------------------------------library(DBI)library(odbc)library(dplyr)library(lubridate)library(ggplot2)library(survival)library(ggsurvfit)library(survminer)library(gridExtra)library(kableExtra)library(grid)
# -----------------------------------------------------------------------------------# Configuration# -----------------------------------------------------------------------------------config <- list( db_connection = list( dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]" ), analysis = list( days_back = 10, output_dir = "F:/DBA/Scripts/R/Reports" ))
# -----------------------------------------------------------------------------------# Helper Functions# -----------------------------------------------------------------------------------
safe_connect <- function() { tryCatch({ conn <- dbConnect(odbc::odbc(), dsn = config$db_connection$dsn, uid = config$db_connection$uid, pwd = config$db_connection$pwd) message("Successfully connected to database") return(conn) }, error = function(e) { stop("Connection failed: ", e$message) })}
fetch_data <- function(conn) { queries <- list( long_sessions = paste0( "SELECT MACHINE, OSUSER, USERNAME, LOGON_TIME, STATUS, NO_SESSIONS, CHECK_DATE FROM ATTSTATS.LONG_CONNECTED_SESSIONS WHERE CHECK_DATE >= TO_DATE('", Sys.Date() - config$analysis$days_back, "', 'YYYY-MM-DD')" ), current_sessions = paste0( "SELECT DB_NAME, SID, SERIAL#, USERNAME, STATUS, EVENT, WAIT_TIME, CHECK_DATE FROM ATTSTATS.CURRENT_SESSIONS_WAIT_EVENTS WHERE CHECK_DATE >= TO_DATE('", Sys.Date() - config$analysis$days_back, "', 'YYYY-MM-DD')" ), user_activity = paste0( "SELECT USERNAME, LOGIN_TIME, CLIENT_IP, CLIENT_HOSTNAME, ACTION FROM ATTSTATS.USER_ACTIVITY_LOG WHERE LOGIN_TIME >= TO_DATE('", Sys.Date() - config$analysis$days_back, "', 'YYYY-MM-DD')" ) ) lapply(queries, function(q) { tryCatch({ dbGetQuery(conn, q) }, error = function(e) { warning("Query failed: ", e$message) return(NULL) }) })}
preprocess_data <- function(raw_data) { processed <- list() # Process long sessions if (!is.null(raw_data$long_sessions)) { processed$session_data <- raw_data$long_sessions %>% mutate( LOGON_TIME = as.POSIXct(LOGON_TIME), CHECK_DATE = as.POSIXct(CHECK_DATE), SESSION_DURATION = as.numeric(difftime(CHECK_DATE, LOGON_TIME, units = "hours")), SESSION_ENDED = ifelse(STATUS == "ACTIVE", 0, 1) ) %>% filter(!is.na(SESSION_DURATION)) } else { processed$session_data <- data.frame() } # Process current sessions if (!is.null(raw_data$current_sessions)) { processed$current_active <- raw_data$current_sessions %>% mutate(CHECK_DATE = as.POSIXct(CHECK_DATE)) %>% filter(STATUS == "ACTIVE") if (!is.null(raw_data$user_activity)) { processed$current_active <- processed$current_active %>% left_join( raw_data$user_activity %>% select(USERNAME, CLIENT_IP, CLIENT_HOSTNAME) %>% distinct(), by = "USERNAME" ) } } else { processed$current_active <- data.frame() } return(processed)}
# -----------------------------------------------------------------------------------# Analysis Functions# -----------------------------------------------------------------------------------
analyze_survival <- function(data) { if (nrow(data) == 0) return(NULL) tryCatch({ if (length(unique(data$USERNAME)) > 1) { survfit(Surv(SESSION_DURATION, SESSION_ENDED) ~ USERNAME, data = data) } else { survfit(Surv(SESSION_DURATION, SESSION_ENDED) ~ 1, data = data) } }, error = function(e) { warning("Survival analysis failed: ", e$message) return(NULL) })}
build_model <- function(data) { if (nrow(data) < 5) return(NULL) tryCatch({ if (length(unique(data$USERNAME)) > 1) { coxph(Surv(SESSION_DURATION, SESSION_ENDED) ~ USERNAME, data = data) } else { coxph(Surv(SESSION_DURATION, SESSION_ENDED) ~ 1, data = data) } }, error = function(e) { warning("Model building failed: ", e$message) return(NULL) })}
identify_long_runners <- function(data) { if (nrow(data) == 0) return(data.frame()) tryCatch({ data %>% filter(SESSION_DURATION > quantile(SESSION_DURATION, 0.9, na.rm = TRUE)) %>% group_by(USERNAME, MACHINE, OSUSER) %>% summarise( AVG_DURATION = round(mean(SESSION_DURATION), 2), COUNT = n(), .groups = "drop" ) %>% arrange(desc(AVG_DURATION)) }, error = function(e) { warning("Long runner identification failed: ", e$message) return(data.frame()) })}
# -----------------------------------------------------------------------------------# Visualization Functions# -----------------------------------------------------------------------------------
plot_survival <- function(fit, data) { if (is.null(fit)) { grid.text("Insufficient data for survival analysis", gp = gpar(fontsize = 12, col = "red")) return() } ggsurvplot(fit, data = data, risk.table = TRUE, conf.int = TRUE, xlab = "Hours", ylab = "Survival Probability", title = paste("Session Survival (Last", config$analysis$days_back, "Days)"), ggtheme = theme_minimal())}
plot_duration_distribution <- function(data) { if (nrow(data) == 0) { grid.text("No session duration data available", gp = gpar(fontsize = 12, col = "red")) return() } ggplot(data, aes(x = SESSION_DURATION)) + geom_histogram(bins = 30, fill = "steelblue", alpha = 0.7) + labs(title = "Session Duration Distribution", x = "Duration (hours)", y = "Count") + theme_minimal()}
plot_long_sessions <- function(data) { if (nrow(data) == 0) { grid.text("No long-running sessions identified", gp = gpar(fontsize = 12, col = "red")) return() } ggplot(data, aes(x = reorder(MACHINE, AVG_DURATION), y = AVG_DURATION, fill = USERNAME)) + geom_col() + coord_flip() + labs(title = "Top Long-Running Sessions", x = "Machine", y = "Average Duration (hours)") + theme_minimal() + theme(legend.position = "bottom")}
# -----------------------------------------------------------------------------------# Report Generation# -----------------------------------------------------------------------------------
generate_report <- function(processed_data, surv_fit, model) { # Create output directory if needed if (!dir.exists(config$analysis$output_dir)) { dir.create(config$analysis$output_dir, recursive = TRUE) } report_file <- file.path( config$analysis$output_dir, paste0("Session_Analysis_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".pdf") ) pdf(report_file, width = 11, height = 8.5) # Title page grid.newpage() grid.arrange( textGrob("Oracle Session Longevity Analysis", gp = gpar(fontsize = 24, fontface = "bold")), textGrob(paste("Analysis Period:", Sys.Date() - config$analysis$days_back, "to", Sys.Date()), gp = gpar(fontsize = 16)), textGrob(paste("Generated:", format(Sys.time(), "%Y-%m-%d %H:%M:%S")), gp = gpar(fontsize = 12)), ncol = 1, heights = c(1, 0.5, 0.3) ) # Session duration distribution grid.newpage() print(plot_duration_distribution(processed_data$session_data)) # Survival analysis if (!is.null(surv_fit)) { grid.newpage() print(plot_survival(surv_fit, processed_data$session_data)) } # Model summary if (!is.null(model)) { grid.newpage() model_summary <- as.data.frame(summary(model)$coefficients) %>% mutate(across(where(is.numeric), ~ round(., 4))) grid.table(model_summary) grid.text("Cox Proportional Hazards Model", x = 0.5, y = 0.95, gp = gpar(fontsize = 16, fontface = "bold")) } # Long-running sessions long_runners <- identify_long_runners(processed_data$session_data) grid.newpage() print(plot_long_sessions(long_runners)) # Current session predictions if (nrow(processed_data$current_active) > 0) { if (!is.null(model)) { processed_data$current_active$PREDICTED_DURATION <- predict(model, newdata = processed_data$current_active, type = "risk") } else { processed_data$current_active$PREDICTED_DURATION <- median(processed_data$session_data$SESSION_DURATION, na.rm = TRUE) } potential_issues <- processed_data$current_active %>% mutate(PREDICTED_DURATION = round(PREDICTED_DURATION, 2)) %>% filter(PREDICTED_DURATION > quantile(processed_data$session_data$SESSION_DURATION, 0.75, na.rm = TRUE)) %>% select(USERNAME, DB_NAME, EVENT, WAIT_TIME, PREDICTED_DURATION) %>% arrange(desc(PREDICTED_DURATION)) if (nrow(potential_issues) > 0) { grid.newpage() grid.table(potential_issues) grid.text("Potential Long-Running Sessions", x = 0.5, y = 0.95, gp = gpar(fontsize = 16, fontface = "bold")) } } dev.off() message("Report generated: ", report_file)}
# -----------------------------------------------------------------------------------# Main Execution# -----------------------------------------------------------------------------------
main <- function() { message("Starting Oracle session analysis...") # Establish database connection conn <- tryCatch({ safe_connect() }, error = function(e) { stop("Analysis aborted: ", e$message) }) on.exit(dbDisconnect(conn)) # Fetch and process data raw_data <- fetch_data(conn) processed_data <- preprocess_data(raw_data) # Perform analyses surv_fit <- analyze_survival(processed_data$session_data) model <- build_model(processed_data$session_data) # Generate report generate_report(processed_data, surv_fit, model) message("Analysis completed successfully")}
# -----------------------------------------------------------------------------------# Execute the analysis# -----------------------------------------------------------------------------------main()
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------message("Oracle session longevity analysis script execution completed successfully.")# -----------------------------------------------------------------------------------
# Oracle Session Longevity Analysis# Comprehensive analysis of database session patterns with PDF reporting
## install.packages("kableExtra")
# -----------------------------------------------------------------------------------# Load required libraries# -----------------------------------------------------------------------------------library(DBI)library(odbc)library(dplyr)library(lubridate)library(ggplot2)library(survival)library(ggsurvfit)library(survminer)library(gridExtra)library(kableExtra)library(grid)
# -----------------------------------------------------------------------------------# Configuration# -----------------------------------------------------------------------------------config <- list( db_connection = list( dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]" ), analysis = list( days_back = 10, output_dir = "F:/DBA/Scripts/R/Reports" ))
# -----------------------------------------------------------------------------------# Helper Functions# -----------------------------------------------------------------------------------
safe_connect <- function() { tryCatch({ conn <- dbConnect(odbc::odbc(), dsn = config$db_connection$dsn, uid = config$db_connection$uid, pwd = config$db_connection$pwd) message("Successfully connected to database") return(conn) }, error = function(e) { stop("Connection failed: ", e$message) })}
fetch_data <- function(conn) { queries <- list( long_sessions = paste0( "SELECT MACHINE, OSUSER, USERNAME, LOGON_TIME, STATUS, NO_SESSIONS, CHECK_DATE FROM ATTSTATS.LONG_CONNECTED_SESSIONS WHERE CHECK_DATE >= TO_DATE('", Sys.Date() - config$analysis$days_back, "', 'YYYY-MM-DD')" ), current_sessions = paste0( "SELECT DB_NAME, SID, SERIAL#, USERNAME, STATUS, EVENT, WAIT_TIME, CHECK_DATE FROM ATTSTATS.CURRENT_SESSIONS_WAIT_EVENTS WHERE CHECK_DATE >= TO_DATE('", Sys.Date() - config$analysis$days_back, "', 'YYYY-MM-DD')" ), user_activity = paste0( "SELECT USERNAME, LOGIN_TIME, CLIENT_IP, CLIENT_HOSTNAME, ACTION FROM ATTSTATS.USER_ACTIVITY_LOG WHERE LOGIN_TIME >= TO_DATE('", Sys.Date() - config$analysis$days_back, "', 'YYYY-MM-DD')" ) ) lapply(queries, function(q) { tryCatch({ dbGetQuery(conn, q) }, error = function(e) { warning("Query failed: ", e$message) return(NULL) }) })}
preprocess_data <- function(raw_data) { processed <- list() # Process long sessions if (!is.null(raw_data$long_sessions)) { processed$session_data <- raw_data$long_sessions %>% mutate( LOGON_TIME = as.POSIXct(LOGON_TIME), CHECK_DATE = as.POSIXct(CHECK_DATE), SESSION_DURATION = as.numeric(difftime(CHECK_DATE, LOGON_TIME, units = "hours")), SESSION_ENDED = ifelse(STATUS == "ACTIVE", 0, 1) ) %>% filter(!is.na(SESSION_DURATION)) } else { processed$session_data <- data.frame() } # Process current sessions if (!is.null(raw_data$current_sessions)) { processed$current_active <- raw_data$current_sessions %>% mutate(CHECK_DATE = as.POSIXct(CHECK_DATE)) %>% filter(STATUS == "ACTIVE") if (!is.null(raw_data$user_activity)) { processed$current_active <- processed$current_active %>% left_join( raw_data$user_activity %>% select(USERNAME, CLIENT_IP, CLIENT_HOSTNAME) %>% distinct(), by = "USERNAME" ) } } else { processed$current_active <- data.frame() } return(processed)}
# -----------------------------------------------------------------------------------# Analysis Functions# -----------------------------------------------------------------------------------
analyze_survival <- function(data) { if (nrow(data) == 0) return(NULL) tryCatch({ if (length(unique(data$USERNAME)) > 1) { survfit(Surv(SESSION_DURATION, SESSION_ENDED) ~ USERNAME, data = data) } else { survfit(Surv(SESSION_DURATION, SESSION_ENDED) ~ 1, data = data) } }, error = function(e) { warning("Survival analysis failed: ", e$message) return(NULL) })}
build_model <- function(data) { if (nrow(data) < 5) return(NULL) tryCatch({ if (length(unique(data$USERNAME)) > 1) { coxph(Surv(SESSION_DURATION, SESSION_ENDED) ~ USERNAME, data = data) } else { coxph(Surv(SESSION_DURATION, SESSION_ENDED) ~ 1, data = data) } }, error = function(e) { warning("Model building failed: ", e$message) return(NULL) })}
identify_long_runners <- function(data) { if (nrow(data) == 0) return(data.frame()) tryCatch({ data %>% filter(SESSION_DURATION > quantile(SESSION_DURATION, 0.9, na.rm = TRUE)) %>% group_by(USERNAME, MACHINE, OSUSER) %>% summarise( AVG_DURATION = round(mean(SESSION_DURATION), 2), COUNT = n(), .groups = "drop" ) %>% arrange(desc(AVG_DURATION)) }, error = function(e) { warning("Long runner identification failed: ", e$message) return(data.frame()) })}
# -----------------------------------------------------------------------------------# Visualization Functions# -----------------------------------------------------------------------------------
plot_survival <- function(fit, data) { if (is.null(fit)) { grid.text("Insufficient data for survival analysis", gp = gpar(fontsize = 12, col = "red")) return() } ggsurvplot(fit, data = data, risk.table = TRUE, conf.int = TRUE, xlab = "Hours", ylab = "Survival Probability", title = paste("Session Survival (Last", config$analysis$days_back, "Days)"), ggtheme = theme_minimal())}
plot_duration_distribution <- function(data) { if (nrow(data) == 0) { grid.text("No session duration data available", gp = gpar(fontsize = 12, col = "red")) return() } ggplot(data, aes(x = SESSION_DURATION)) + geom_histogram(bins = 30, fill = "steelblue", alpha = 0.7) + labs(title = "Session Duration Distribution", x = "Duration (hours)", y = "Count") + theme_minimal()}
plot_long_sessions <- function(data) { if (nrow(data) == 0) { grid.text("No long-running sessions identified", gp = gpar(fontsize = 12, col = "red")) return() } ggplot(data, aes(x = reorder(MACHINE, AVG_DURATION), y = AVG_DURATION, fill = USERNAME)) + geom_col() + coord_flip() + labs(title = "Top Long-Running Sessions", x = "Machine", y = "Average Duration (hours)") + theme_minimal() + theme(legend.position = "bottom")}
# -----------------------------------------------------------------------------------# Report Generation# -----------------------------------------------------------------------------------
generate_report <- function(processed_data, surv_fit, model) { # Create output directory if needed if (!dir.exists(config$analysis$output_dir)) { dir.create(config$analysis$output_dir, recursive = TRUE) } report_file <- file.path( config$analysis$output_dir, paste0("Session_Analysis_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".pdf") ) pdf(report_file, width = 11, height = 8.5) # Title page grid.newpage() grid.arrange( textGrob("Oracle Session Longevity Analysis", gp = gpar(fontsize = 24, fontface = "bold")), textGrob(paste("Analysis Period:", Sys.Date() - config$analysis$days_back, "to", Sys.Date()), gp = gpar(fontsize = 16)), textGrob(paste("Generated:", format(Sys.time(), "%Y-%m-%d %H:%M:%S")), gp = gpar(fontsize = 12)), ncol = 1, heights = c(1, 0.5, 0.3) ) # Session duration distribution grid.newpage() print(plot_duration_distribution(processed_data$session_data)) # Survival analysis if (!is.null(surv_fit)) { grid.newpage() print(plot_survival(surv_fit, processed_data$session_data)) } # Model summary if (!is.null(model)) { grid.newpage() model_summary <- as.data.frame(summary(model)$coefficients) %>% mutate(across(where(is.numeric), ~ round(., 4))) grid.table(model_summary) grid.text("Cox Proportional Hazards Model", x = 0.5, y = 0.95, gp = gpar(fontsize = 16, fontface = "bold")) } # Long-running sessions long_runners <- identify_long_runners(processed_data$session_data) grid.newpage() print(plot_long_sessions(long_runners)) # Current session predictions if (nrow(processed_data$current_active) > 0) { if (!is.null(model)) { processed_data$current_active$PREDICTED_DURATION <- predict(model, newdata = processed_data$current_active, type = "risk") } else { processed_data$current_active$PREDICTED_DURATION <- median(processed_data$session_data$SESSION_DURATION, na.rm = TRUE) } potential_issues <- processed_data$current_active %>% mutate(PREDICTED_DURATION = round(PREDICTED_DURATION, 2)) %>% filter(PREDICTED_DURATION > quantile(processed_data$session_data$SESSION_DURATION, 0.75, na.rm = TRUE)) %>% select(USERNAME, DB_NAME, EVENT, WAIT_TIME, PREDICTED_DURATION) %>% arrange(desc(PREDICTED_DURATION)) if (nrow(potential_issues) > 0) { grid.newpage() grid.table(potential_issues) grid.text("Potential Long-Running Sessions", x = 0.5, y = 0.95, gp = gpar(fontsize = 16, fontface = "bold")) } } dev.off() message("Report generated: ", report_file)}
# -----------------------------------------------------------------------------------# Main Execution# -----------------------------------------------------------------------------------
main <- function() { message("Starting Oracle session analysis...") # Establish database connection conn <- tryCatch({ safe_connect() }, error = function(e) { stop("Analysis aborted: ", e$message) }) on.exit(dbDisconnect(conn)) # Fetch and process data raw_data <- fetch_data(conn) processed_data <- preprocess_data(raw_data) # Perform analyses surv_fit <- analyze_survival(processed_data$session_data) model <- build_model(processed_data$session_data) # Generate report generate_report(processed_data, surv_fit, model) message("Analysis completed successfully")}
# -----------------------------------------------------------------------------------# Execute the analysis# -----------------------------------------------------------------------------------main()
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------message("Oracle session longevity analysis script execution completed successfully.")# -----------------------------------------------------------------------------------