• HOME
  • SERVICES
    • DATABASE SERVICES
    • CYBERSECURITY
    • INTERNET OF THINGS
  • PUBLISHING
    • PORTFOLIO
    • ORACLE SCRIPT LIBRARY
    • R CODE SCRIPT LIBRARY
  • ABOUT
    • ABOUT MONTBLEAU.CA
    • About Pierre Montbleau
  • Blog
  • Academic Press
    • Bookshelf

7_1-predict_all_export_timing.r

# -----------------------------------------------------------------------------------# File Name : 7_1-Predict_All_export_timing.r# Author : Pierre Montbleau# Description : Oracle Database Export Time Prediction for All Schemas# Purpose : Analyze all database schemas, predict export times with 16 parallel # processes, generate PDF reports and Data Pump command templates# Call Syntax : source("F:\\DBA\\Scripts\\R\\7_1-Predict_All_export_timing.r")# Parameters : model_type (default="basic_16par") - Prediction model to use# Output directory configured internally (F:/DBA/Scripts/R/Reports)# Last Modified: 2024-01-15# -----------------------------------------------------------------------------------
# Load required librarieslibrary(DBI)library(odbc)library(dplyr)library(ggplot2)library(caret)library(lubridate) # For time formattinglibrary(gridExtra)library(grid)library(knitr)library(rmarkdown)library(scales)
# -----------------------------------------------------------------------------------# Database connection# -----------------------------------------------------------------------------------conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]")
# -----------------------------------------------------------------------------------# Function to get ALL schemas from the database# -----------------------------------------------------------------------------------get_all_schemas <- function(conn) { # Try different privilege levels view_attempts <- list( list(name = "DBA", prefix = "dba_", table = "users"), list(name = "ALL", prefix = "all_", table = "users"), list(name = "USER", prefix = "user_", table = "users") ) for (attempt in view_attempts) { query <- paste0(" SELECT username as schema_name FROM ", attempt$prefix, attempt$table, " WHERE username NOT IN ( 'ANONYMOUS', 'APEX_PUBLIC_USER', 'APPQOSSYS', 'AUDSYS', 'CTXSYS', 'DBSNMP', 'DIP', 'DVF', 'DVSYS', 'EXFSYS', 'FLOWS_FILES', 'GSMADMIN_INTERNAL', 'GSMCATUSER', 'GSMUSER', 'LBACSYS', 'MDDATA', 'MDSYS', 'MGMT_VIEW', 'OLAPSYS', 'ORACLE_OCM', 'ORDDATA', 'ORDPLUGINS', 'ORDSYS', 'OUTLN', 'OWBSYS', 'REMOTE_SCHEDULER_AGENT', 'SI_INFORMTN_SCHEMA', 'SPATIAL_CSW_ADMIN_USR', 'SPATIAL_WFS_ADMIN_USR', 'SYS', 'SYSTEM', 'WMSYS', 'XDB', 'XS$NULL' ) ORDER BY username") tryCatch({ schemas <- dbGetQuery(conn, query) message("Successfully retrieved schemas using ", attempt$name, "_ views") return(schemas$SCHEMA_NAME) }, error = function(e) { message("Attempt with ", attempt$name, "_ views failed: ", conditionMessage(e)) }) } stop("All attempts to retrieve schemas failed.")}
# -----------------------------------------------------------------------------------# Function to get schema statistics with robust error handling# -----------------------------------------------------------------------------------get_schema_stats <- function(conn, schema_name) { # Try different privilege levels in order of preference view_attempts <- list( list(name = "DBA", prefix = "dba_"), list(name = "ALL", prefix = "all_"), list(name = "USER", prefix = "user_") ) for (attempt in view_attempts) { query <- paste0(" SELECT object_type, COUNT(*) as object_count, NVL(SUM(bytes)/1024/1024, 0) as size_mb FROM ", attempt$prefix, "objects o LEFT JOIN ", attempt$prefix, "segments s ON (o.owner = s.owner AND o.object_name = s.segment_name AND o.object_type = s.segment_type) WHERE o.owner = '", toupper(schema_name), "' GROUP BY object_type ORDER BY size_mb DESC NULLS LAST") tryCatch({ stats <- dbGetQuery(conn, query) # Normalize column names to lowercase names(stats) <- tolower(names(stats)) return(stats) }, error = function(e) { message("Attempt with ", attempt$name, "_ views for schema ", schema_name, " failed: ", conditionMessage(e)) }) } warning("All attempts to retrieve statistics for schema '", schema_name, "' failed.") return(NULL)}
# -----------------------------------------------------------------------------------# Function to convert minutes to HH:MM:SS format# -----------------------------------------------------------------------------------format_time_hhmmss <- function(total_minutes) { if (is.na(total_minutes) || total_minutes == 0) { return("00:00:00") } total_seconds <- round(total_minutes * 60) hours <- floor(total_seconds / 3600) minutes <- floor((total_seconds %% 3600) / 60) seconds <- total_seconds %% 60 sprintf("%02d:%02d:%02d", hours, minutes, seconds)}
# -----------------------------------------------------------------------------------# Function to estimate export time with 16 parallel processes# -----------------------------------------------------------------------------------estimate_export_time_16par <- function(schema_stats, model_type = "basic_16par") { # Check for empty results if (is.null(schema_stats) || nrow(schema_stats) == 0) { return(list( estimated_minutes = 0, formatted_time = "00:00:00", size_mb = 0, object_count = 0, model_used = "No data available", warning = "No schema statistics were returned", parallel_processes = 16, parallel_efficiency = 0.75, base_time_minutes = 0, complexity = "FAILED", estimated_dump_size_gb = 0 )) } if (model_type == "basic_16par") { # Enhanced model for 16 parallel processes total_size_mb <- sum(schema_stats$size_mb, na.rm = TRUE) total_objects <- sum(schema_stats$object_count, na.rm = TRUE) # Base factors (optimized for parallel processing) size_factor <- 0.5 # minutes per GB (reduced due to parallelism) object_factor <- 0.008 # minutes per 100 objects (reduced) parallel_efficiency <- 0.75 # 75% efficiency for 16 processes # Calculate base time size_time <- (total_size_mb / 1024) * size_factor object_time <- (total_objects / 100) * object_factor base_time <- size_time + object_time # Apply parallel efficiency (16 processes) parallel_time <- base_time / (16 * parallel_efficiency) # Minimum time for very small schemas parallel_time <- max(parallel_time, 0.5) # Complexity classification for parallel processing if (parallel_time < 1) { complexity <- "VERY SIMPLE" } else if (parallel_time < 5) { complexity <- "SIMPLE" } else if (parallel_time < 15) { complexity <- "MODERATE" } else if (parallel_time < 30) { complexity <- "COMPLEX" } else { complexity <- "VERY COMPLEX" } return(list( estimated_minutes = round(parallel_time, 2), formatted_time = format_time_hhmmss(parallel_time), size_mb = round(total_size_mb, 1), object_count = total_objects, model_used = "16 Parallel Processes Model", parallel_processes = 16, parallel_efficiency = parallel_efficiency, base_time_minutes = round(base_time, 2), complexity = complexity, estimated_dump_size_gb = round(total_size_mb / 1024 * 0.7, 3) # 70% compression )) } else if (model_type == "regression") { stop("Regression model not implemented in this example") }}
# -----------------------------------------------------------------------------------# Function to generate data pump command for individual schema with 16 parallel# -----------------------------------------------------------------------------------generate_schema_datapump_16par <- function(schema_name, estimation, db_name = "ORCL") { timestamp <- format(Sys.time(), "%Y%m%d_%H%M") # Determine optimal file size for schema if (estimation$size_mb > 102400) { # > 100GB filesize_gb <- 5 } else if (estimation$size_mb > 10240) { # > 10GB filesize_gb <- 2 } else if (estimation$size_mb > 1024) { # > 1GB filesize_gb <- 1 } else { filesize_gb <- 0.5 # 500MB } # Calculate number of files needed (minimum 2 for parallelism) num_files_needed <- ceiling(estimation$estimated_dump_size_gb / filesize_gb) num_files_needed <- max(2, min(num_files_needed, 8)) # Cap at 8 files per schema command <- paste0( "expdp system/<password> \\\n", " SCHEMAS=", schema_name, " \\\n", " DIRECTORY=DATA_PUMP_DIR \\\n", " DUMPFILE=", schema_name, "_", timestamp, "_%U.dmp \\\n", " FILESIZE=", filesize_gb, "G \\\n", " PARALLEL=", min(16, num_files_needed), " \\\n", " LOGFILE=", schema_name, "_export_", timestamp, ".log \\\n", " COMPRESSION=ALL \\\n", " EXCLUDE=STATISTICS \\\n", " JOB_NAME=", schema_name, "_EXP_", timestamp ) return(list( command = command, filesize_gb = filesize_gb, num_files = num_files_needed, actual_parallel = min(16, num_files_needed) ))}
# -----------------------------------------------------------------------------------# Function to generate PDF report for ALL schemas with 16 parallel focus# -----------------------------------------------------------------------------------generate_comprehensive_report_16par <- function(all_results, output_dir = "F:/DBA/Scripts/R/Reports") { # Create output directory if it doesn't exist if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) } # Generate unique filename with timestamp timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S") report_file <- file.path(output_dir, paste0("All_Schemas_Export_16PAR_Estimation_", timestamp, ".pdf")) # Create temporary Rmd file temp_rmd <- tempfile(fileext = ".Rmd") # Filter out failed schemas valid_results <- all_results[sapply(all_results, function(x) !is.null(x$stats))] if (length(valid_results) == 0) { warning("No valid schema results to generate report") return(NULL) } # Prepare data for summary table summary_data <- data.frame( Schema = names(valid_results), Size_MB = sapply(valid_results, function(x) x$estimation$size_mb), Objects = sapply(valid_results, function(x) x$estimation$object_count), Estimated_Minutes = sapply(valid_results, function(x) x$estimation$estimated_minutes), Formatted_Time = sapply(valid_results, function(x) x$estimation$formatted_time), Complexity = sapply(valid_results, function(x) x$estimation$complexity), Base_Time_Minutes = sapply(valid_results, function(x) x$estimation$base_time_minutes), Est_Dump_Size_GB = sapply(valid_results, function(x) x$estimation$estimated_dump_size_gb), stringsAsFactors = FALSE ) # Order by estimated time (descending) summary_data <- summary_data[order(-summary_data$Estimated_Minutes), ] # Calculate totals total_size <- sum(summary_data$Size_MB, na.rm = TRUE) total_objects <- sum(summary_data$Objects, na.rm = TRUE) total_time_parallel <- sum(summary_data$Estimated_Minutes, na.rm = TRUE) total_time_base <- sum(summary_data$Base_Time_Minutes, na.rm = TRUE) # Calculate time savings time_savings_percent <- if (total_time_base > 0) { round((1 - total_time_parallel / total_time_base) * 100, 1) } else { 0 } # Calculate complexity distribution complexity_dist <- table(summary_data$Complexity) # Write Rmd content writeLines(c( "---", "title: 'Oracle ALL Schemas Export Time Estimation Report (16 Parallel Processes)'", "author: 'Automated DBA Report'", "date: '`r Sys.Date()`'", "output: pdf_document", "---", "", "```{r setup, include=FALSE}", "knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)", "library(ggplot2)", "library(dplyr)", "library(scales)", "```", "", "# Oracle ALL Schemas Export Time Estimation - 16 Parallel Processes", "", "## Report Summary", paste0("- **Total Schemas Analyzed:** ", nrow(summary_data)), paste0("- **Total Size:** ", round(total_size, 1), " MB (", round(total_size/1024, 1), " GB)"), paste0("- **Total Objects:** ", format(total_objects, big.mark = ",")), paste0("- **Total Estimated Export Time (16 PAR):** ", format_time_hhmmss(total_time_parallel)), paste0("- **Base Time (Sequential):** ", format_time_hhmmss(total_time_base)), paste0("- **Time Savings:** ", time_savings_percent, "% faster with 16 parallel processes"), paste0("- **Parallel Efficiency Factor:** 75%"), paste0("- **Report Generated:** ", Sys.time()), "", "## Performance Overview", "", "### Complexity Distribution", "```{r complexity-dist}", "complexity_df <- as.data.frame(complexity_dist)", "names(complexity_df) <- c('Complexity', 'Count')", "knitr::kable(complexity_df, caption = 'Schema Complexity Distribution')", "```", "", "### Time Savings Analysis", "```{r time-savings, fig.height=6, fig.width=10}", "savings_data <- data.frame(", " Method = c('Sequential', '16 Parallel'),", " Time_Hours = c(total_time_base/60, total_time_parallel/60)", ")", "", "ggplot(savings_data, aes(x = Method, y = Time_Hours, fill = Method)) +", " geom_bar(stat = 'identity') +", " labs(title = 'Total Export Time Comparison',", " x = 'Export Method',", " y = 'Time (Hours)') +", " theme_minimal() +", " geom_text(aes(label = paste0(round(Time_Hours, 1), ' hours')), vjust = -0.5) +", " scale_fill_manual(values = c('Sequential' = 'red', '16 Parallel' = 'green'))", "```", "", "## Export Time Summary for All Schemas (16 Parallel)", "", "```{r summary-table}", "display_data <- summary_data %>%", " select(Schema, Size_MB, Objects, Estimated_Minutes, Formatted_Time, Complexity)", "knitr::kable(display_data, caption = 'Export Time Estimation with 16 Parallel Processes')", "```", "", "## Visualizations", "", "### Top 10 Schemas by Estimated Export Time (16 PAR)", "```{r top10-plot, fig.height=6, fig.width=10}", "top_10 <- summary_data %>%", " arrange(desc(Estimated_Minutes)) %>%", " head(10)", "", "ggplot(top_10, aes(x = reorder(Schema, Estimated_Minutes), y = Estimated_Minutes, fill = Complexity)) +", " geom_bar(stat = 'identity') +", " coord_flip() +", " labs(title = 'Top 10 Schemas by Estimated Export Time (16 Parallel)',", " x = 'Schema',", " y = 'Estimated Time (minutes)') +", " theme_minimal() +", " theme(legend.position = 'bottom') +", " scale_y_continuous(labels = scales::comma)", "```", "", "### Size vs Objects Scatter Plot with Complexity", "```{r scatter-plot, fig.height=6, fig.width=10}", "ggplot(summary_data, aes(x = Size_MB, y = Objects, size = Estimated_Minutes, color = Complexity)) +", " geom_point(alpha = 0.7) +", " geom_text(aes(label = ifelse(Estimated_Minutes > 10, Schema, '')), ", " size = 3, hjust = 0.5, vjust = -0.5, check_overlap = TRUE) +", " labs(title = 'Schema Size vs Number of Objects (16 Parallel)',", " x = 'Size (MB)',", " y = 'Number of Objects',", " size = 'Est. Time (min)',", " color = 'Complexity') +", " theme_minimal() +", " scale_x_log10(labels = scales::comma) +", " scale_y_log10(labels = scales::comma)", "```", "", "### Distribution of Estimated Times (16 PAR)", "```{r distribution-plot, fig.height=6, fig.width=10}", "ggplot(summary_data, aes(x = Estimated_Minutes, fill = Complexity)) +", " geom_histogram(binwidth = 1, alpha = 0.7) +", " labs(title = 'Distribution of Estimated Export Times (16 Parallel)',", " x = 'Estimated Time (minutes)',", " y = 'Number of Schemas') +", " theme_minimal() +", " theme(legend.position = 'bottom') +", " scale_x_continuous(labels = scales::comma)", "```", "", "## Data Pump Command Templates for Top Schemas", "", "### Recommended Parallel Export Strategy", "For optimal performance with 16 parallel processes:", "", "1. **Group small schemas** (Estimated time < 1 minute) into batches", "2. **Export medium schemas** (1-5 minutes) individually with reduced parallelism", "3. **Large schemas** (>5 minutes) benefit most from full 16 parallel processes", "", "```{r datapump-examples, results='asis'}", "cat('### Example Data Pump Commands for Different Schema Sizes\\n\\n')", "", "# Get top 3 largest schemas", "top_3 <- head(summary_data, 3)", "", "for(i in 1:min(3, nrow(summary_data))) {", " schema <- top_3$Schema[i]", " est <- valid_results[[schema]]$estimation", " dp_info <- generate_schema_datapump_16par(schema, est)", " ", " cat('#### ', schema, ' (', est$complexity, ')\\n')", " cat('- **Size:** ', est$size_mb, ' MB\\n')", " cat('- **Estimated Time:** ', est$formatted_time, '\\n')", " cat('- **Recommended Parallelism:** ', dp_info$actual_parallel, '\\n')", " cat('- **File Size:** ', dp_info$filesize_gb, ' GB\\n')", " cat('- **Number of Files:** ', dp_info$num_files, '\\n')", " cat('```bash\\n', dp_info$command, '\\n```\\n\\n')", "}", "```", "", "## Batch Export Recommendations", "", "Based on the analysis, schemas can be grouped for optimal parallel export:", "", "```{r batch-recommendations}", "# Create batch groups", "batch_groups <- summary_data %>%", " mutate(Batch_Group = case_when(", " Estimated_Minutes < 1 ~ 'Batch 1 (Very Fast)',", " Estimated_Minutes < 5 ~ 'Batch 2 (Fast)',", " Estimated_Minutes < 15 ~ 'Batch 3 (Medium)',", " Estimated_Minutes < 30 ~ 'Batch 4 (Large)',", " TRUE ~ 'Batch 5 (Very Large)'", " )) %>%", " group_by(Batch_Group) %>%", " summarise(", " Count = n(),", " Total_Size_MB = sum(Size_MB),", " Total_Time_Min = sum(Estimated_Minutes),", " Avg_Time_Min = mean(Estimated_Minutes)", " ) %>%", " arrange(Total_Time_Min)", "", "knitr::kable(batch_groups, caption = 'Recommended Batch Groups for Parallel Export')", "```", "", "## Performance Considerations for 16 Parallel Exports", "", "### Advantages:", "1. **Maximum throughput** for large schemas", "2. **Reduced wall-clock time** for batch exports", "3. **Better resource utilization** of modern multi-core servers", "", "### Requirements:", "1. **Sufficient I/O bandwidth** (minimum 200 MB/s per parallel process)", "2. **Adequate PGA memory** for parallel query slaves", "3. **Fast storage subsystem** (SSD recommended)", "4. **Properly configured TEMP tablespace**", "", "### Monitoring During Export:", "```sql", "-- Monitor parallel processes", "SELECT * FROM v$pq_sesstat;", "", "-- Monitor active Data Pump jobs", "SELECT * FROM dba_datapump_jobs;", "", "-- Check I/O statistics", "SELECT * FROM v$iostat_file;", "```", "", "## Risk Assessment for Parallel Exports", "", "### High Risk Schemas:", "- Schemas with **LOB columns** may not benefit fully from parallelism", "- Schemas with **many constraints** may experience serialization", "- **Very small schemas** (< 100MB) may have overhead > benefit", "", "### Recommended Actions:", "1. **Pre-export testing** for complex schemas", "2. **Monitor PGA usage** during first few exports", "3. **Adjust parallelism** based on actual performance", "4. **Schedule exports** during off-peak hours", "", "## Storage Requirements for Parallel Exports", "", "| Requirement | Calculation | Notes |", "|-------------|-------------|-------|", paste0("| Dump Files | ", round(sum(summary_data$Est_Dump_Size_GB), 1), " GB | Total compressed size |"), paste0("| Temp Space | ", round(total_size/1024 * 0.1, 1), " GB | 10% of total size for sorting |"), "| Log Files | 1-3 GB | Per parallel export job |", paste0("| **Total Required** | **", round(sum(summary_data$Est_Dump_Size_GB) + total_size/1024 * 0.1 + 3, 1), " GB** | Minimum storage requirement |"), "", "## Schedule Optimization", "", paste0("Based on total estimated time of ", format_time_hhmmss(total_time_parallel), ":"), "", "- **Recommended batch size:** 4-6 schemas concurrently", "- **Optimal concurrency level:** 75% of available CPU cores", "- **Memory requirement:** 2GB PGA per parallel process", "- **Monitoring frequency:** Every 15 minutes during exports", "", "## Notes", "- This report provides estimates for **16 parallel process exports**", "- System schemas are excluded from analysis", "- Estimated times include **75% parallel efficiency factor**", "- Actual export times may vary based on:", " - I/O subsystem performance", " - Database version and configuration", " - System load during export", " - Network bandwidth to target", "- **Parallel exports require careful monitoring** to avoid resource contention", "- **Test exports recommended** for production-critical schemas", "- Report generated by automated DBA script (16 Parallel Edition)" ), temp_rmd) # Render the report tryCatch({ rmarkdown::render(temp_rmd, output_file = report_file, quiet = TRUE) message("Comprehensive report saved to: ", report_file) }, error = function(e) { warning("Failed to render PDF report: ", conditionMessage(e)) # Try HTML as fallback report_file_html <- file.path(output_dir, paste0("All_Schemas_Export_16PAR_Estimation_", timestamp, ".html")) writeLines(gsub("output: pdf_document", "output: html_document", readLines(temp_rmd)), temp_rmd) rmarkdown::render(temp_rmd, output_file = report_file_html, quiet = TRUE) report_file <- report_file_html message("HTML report saved to: ", report_file) }) # Clean up temporary file unlink(temp_rmd) return(report_file)}
# -----------------------------------------------------------------------------------# Main function to predict export time for ALL schemas with 16 parallel# -----------------------------------------------------------------------------------predict_all_schemas_export_time_16par <- function(conn, model_type = "basic_16par") { # Get all schemas message("Retrieving all schemas from database...") all_schemas <- get_all_schemas(conn) message("Found ", length(all_schemas), " schemas to analyze") # Initialize results list results <- list() # Process each schema progress <- 0 for (schema in all_schemas) { progress <- progress + 1 message(sprintf("[%03d/%03d] Processing schema: %s", progress, length(all_schemas), schema)) tryCatch({ # Get schema statistics schema_stats <- get_schema_stats(conn, schema) # Estimate export time with 16 parallel processes estimation <- estimate_export_time_16par(schema_stats, model_type) # Generate data pump command for this schema datapump_info <- generate_schema_datapump_16par(schema, estimation) # Store results results[[schema]] <- list( schema_name = schema, estimation = estimation, datapump_info = datapump_info, stats = schema_stats ) message(sprintf(" - Size: %7.1f MB, Objects: %6d, Time: %s (%s)", estimation$size_mb, estimation$object_count, estimation$formatted_time, estimation$complexity)) }, error = function(e) { warning("Failed to process schema ", schema, ": ", conditionMessage(e)) # Store empty result for failed schemas results[[schema]] <- list( schema_name = schema, estimation = list( estimated_minutes = 0, formatted_time = "00:00:00", size_mb = 0, object_count = 0, model_used = "Failed to analyze", parallel_processes = 16, parallel_efficiency = 0.75, base_time_minutes = 0, complexity = "FAILED", estimated_dump_size_gb = 0, warning = conditionMessage(e) ), datapump_info = NULL, stats = NULL ) }) # Small delay to avoid overwhelming the database Sys.sleep(0.05) } # Generate comprehensive PDF report message("\nGenerating comprehensive report for 16 parallel processes...") report_path <- generate_comprehensive_report_16par(results) # Calculate summary statistics successful_schemas <- sum(sapply(results, function(x) !is.null(x$stats))) total_parallel_time <- sum(sapply(results, function(x) x$estimation$estimated_minutes)) total_base_time <- sum(sapply(results, function(x) x$estimation$base_time_minutes)) # Return results return(list( all_results = results, report_path = report_path, total_schemas = length(all_schemas), processed_schemas = successful_schemas, summary_stats = list( total_parallel_time_minutes = total_parallel_time, total_base_time_minutes = total_base_time, time_savings_percent = if (total_base_time > 0) { round((1 - total_parallel_time / total_base_time) * 100, 1) } else { 0 }, avg_parallel_time_per_schema = if (successful_schemas > 0) { round(total_parallel_time / successful_schemas, 2) } else { 0 } ) ))}
# -----------------------------------------------------------------------------------# Execute the analysis for ALL schemas with 16 parallel processes# -----------------------------------------------------------------------------------cat(paste0("\n", strrep("=", 80), "\n"))cat("STARTING EXPORT TIME ESTIMATION FOR ALL DATABASE SCHEMAS\n")cat("OPTIMIZED FOR 16 PARALLEL PROCESSES\n")cat(paste0(strrep("=", 80), "\n"))
all_results <- tryCatch({ predict_all_schemas_export_time_16par(conn, model_type = "basic_16par")}, error = function(e) { cat("Analysis failed: ", conditionMessage(e), "\n") NULL})
# Close connection when donedbDisconnect(conn)
# -----------------------------------------------------------------------------------# Output results and summary# -----------------------------------------------------------------------------------if (!is.null(all_results)) { # Print summary cat(paste0("\n", strrep("=", 80), "\n")) cat("ANALYSIS COMPLETE - 16 PARALLEL PROCESSES\n") cat(paste0(strrep("=", 80), "\n")) cat("Total schemas analyzed:", all_results$total_schemas, "\n") cat("Successfully processed:", all_results$processed_schemas, "\n") cat("Total estimated time (16 PAR):", format_time_hhmmss(all_results$summary_stats$total_parallel_time_minutes), "\n") cat("Base time (sequential):", format_time_hhmmss(all_results$summary_stats$total_base_time_minutes), "\n") cat("Time savings:", all_results$summary_stats$time_savings_percent, "% faster\n") cat("Average time per schema:", round(all_results$summary_stats$avg_parallel_time_per_schema, 2), "minutes\n") cat("Comprehensive report saved to:", all_results$report_path, "\n\n") # Print top 10 schemas by estimated time if (length(all_results$all_results) > 0) { # Filter out failed schemas for display valid_display <- all_results$all_results[sapply(all_results$all_results, function(x) !is.null(x$stats))] if (length(valid_display) > 0) { # Extract summary data summary_list <- lapply(names(valid_display), function(schema) { est <- valid_display[[schema]]$estimation c( Schema = schema, Size_MB = est$size_mb, Objects = est$object_count, Minutes = est$estimated_minutes, Time = est$formatted_time, Complexity = est$complexity, Parallel_Saving = if (est$base_time_minutes > 0 && est$estimated_minutes > 0) { round((1 - est$estimated_minutes / est$base_time_minutes) * 100, 1) } else { 0 } ) }) summary_df <- as.data.frame(do.call(rbind, summary_list)) summary_df$Size_MB <- as.numeric(summary_df$Size_MB) summary_df$Objects <- as.numeric(summary_df$Objects) summary_df$Minutes <- as.numeric(summary_df$Minutes) summary_df$Parallel_Saving <- as.numeric(summary_df$Parallel_Saving) # Order by estimated time summary_df <- summary_df[order(-summary_df$Minutes), ] cat("TOP 10 SCHEMAS BY ESTIMATED EXPORT TIME (16 PARALLEL):\n") cat(paste0(strrep("-", 100), "\n")) cat(sprintf("%-25s %10s %10s %10s %12s %15s %12s\n", "SCHEMA", "SIZE_MB", "OBJECTS", "MINUTES", "TIME", "COMPLEXITY", "PAR_SAVING")) cat(paste0(strrep("-", 100), "\n")) for (i in 1:min(10, nrow(summary_df))) { cat(sprintf("%-25s %10.1f %10d %10.2f %12s %15s %11.1f%%\n", summary_df$Schema[i], summary_df$Size_MB[i], summary_df$Objects[i], summary_df$Minutes[i], summary_df$Time[i], summary_df$Complexity[i], summary_df$Parallel_Saving[i])) } # Print batch recommendations cat(paste0("\n", strrep("=", 80), "\n")) cat("BATCH EXPORT RECOMMENDATIONS FOR 16 PARALLEL PROCESSES\n") cat(paste0(strrep("=", 80), "\n")) # Group schemas by complexity/time fast_schemas <- summary_df[summary_df$Minutes < 1, ] medium_schemas <- summary_df[summary_df$Minutes >= 1 & summary_df$Minutes < 5, ] large_schemas <- summary_df[summary_df$Minutes >= 5 & summary_df$Minutes < 15, ] xlarge_schemas <- summary_df[summary_df$Minutes >= 15, ] cat(sprintf("Fast schemas (<1 min): %d schemas, total time: %.1f min\n", nrow(fast_schemas), sum(fast_schemas$Minutes, na.rm = TRUE))) cat(" Recommended: Export in batches of 8-10 schemas concurrently\n\n") cat(sprintf("Medium schemas (1-5 min): %d schemas, total time: %.1f min\n", nrow(medium_schemas), sum(medium_schemas$Minutes, na.rm = TRUE))) cat(" Recommended: Export in batches of 4-6 schemas concurrently\n\n") cat(sprintf("Large schemas (5-15 min): %d schemas, total time: %.1f min\n", nrow(large_schemas), sum(large_schemas$Minutes, na.rm = TRUE))) cat(" Recommended: Export individually or 2-3 concurrently\n\n") cat(sprintf("Extra Large schemas (>15 min): %d schemas, total time: %.1f min\n", nrow(xlarge_schemas), sum(xlarge_schemas$Minutes, na.rm = TRUE))) cat(" Recommended: Export individually with full 16 parallelism\n\n") cat("Total estimated batch time (optimized):", format_time_hhmmss(sum(fast_schemas$Minutes)/10 + sum(medium_schemas$Minutes)/5 + sum(large_schemas$Minutes)/2 + sum(xlarge_schemas$Minutes)), "\n") } } # Save detailed results to RDS rds_file <- paste0("all_schemas_16par_analysis_", format(Sys.time(), "%Y%m%d_%H%M"), ".rds") saveRDS(all_results, file = rds_file) cat("\nDetailed results saved to RDS file:", rds_file, "\n")}
# -----------------------------------------------------------------------------------# End of Script# -----------------------------------------------------------------------------------cat(paste0("\n", strrep("=", 80), "\n"))cat("ANALYSIS COMPLETED AT:", Sys.time(), "\n")cat(paste0(strrep("=", 80), "\n"))message("All schemas export prediction script execution completed successfully.")# -----------------------------------------------------------------------------------
montbleau.ca Academic press
address
110 Rue Reid Vaudreuil-Dorion, Qc, Canada J7V 0G1
Mon-Sun 9 am - 7 pm
contact us
montbleau@hotmail.com
5149497697
follow us/
Copyright © 2024. All rights reserved. Powered by Domain.com.

We use cookies to enable essential functionality on our website, and analyze website traffic. By clicking Accept you consent to our use of cookies. Read about how we use cookies.

Your Cookie Settings

We use cookies to enable essential functionality on our website, and analyze website traffic. Read about how we use cookies.

Cookie Categories
Essential

These cookies are strictly necessary to provide you with services available through our websites. You cannot refuse these cookies without impacting how our websites function. You can block or delete them by changing your browser settings, as described under the heading "Managing cookies" in the Privacy and Cookies Policy.

Analytics

These cookies collect information that is used in aggregate form to help us understand how our websites are being used or how effective our marketing campaigns are.