7_2-Predict_Full_export_timing.r
# -----------------------------------------------------------------------------------# File Name : 7_2-Predict_Full_export_timing.r# Author : Pierre Montbleau# Description : Analyzes Oracle database schemas and estimates full database export timing # with 16 parallel processes using Data Pump.# Purpose : To provide detailed estimation, reporting, and command generation for # full Oracle database exports with optimized parallel processing.# Call Syntax : source("F:\\DBA\\Scripts\\R\\7_2-Predict_Full_export_timing.r")# Parameters : None (connects to predefined Oracle database via ODBC)# Last Modified: 2025-12-17# -----------------------------------------------------------------------------------
# -----------------------------------------------------------------------------------# Load required libraries# -----------------------------------------------------------------------------------library(DBI)library(odbc)library(dplyr)library(ggplot2)library(lubridate)library(knitr)library(rmarkdown)library(scales)
# -----------------------------------------------------------------------------------# Database connection# -----------------------------------------------------------------------------------conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]")
# -----------------------------------------------------------------------------------# Function: get_database_info# Description: Retrieves comprehensive database information including instance details,# parameters, tablespaces, and CPU count.# Parameters:# - conn: Database connection object# Returns: List containing database metadata# -----------------------------------------------------------------------------------get_database_info <- function(conn) { queries <- list( db_name = "SELECT name, dbid, created, log_mode FROM v$database", instance = "SELECT instance_name, version, startup_time FROM v$instance", parameters = "SELECT name, value FROM v$parameter WHERE name IN ('db_block_size', 'compatible')", tablespaces = "SELECT tablespace_name, sum(bytes)/1024/1024 as size_mb FROM dba_data_files GROUP BY tablespace_name", cpu_count = "SELECT value FROM v$parameter WHERE name = 'cpu_count'" ) info <- list() for (name in names(queries)) { tryCatch({ result <- dbGetQuery(conn, queries[[name]]) info[[name]] <- result }, error = function(e) { warning("Failed to get ", name, ": ", conditionMessage(e)) info[[name]] <- NULL }) } return(info)}
# -----------------------------------------------------------------------------------# Function: get_all_schemas_full# Description: Retrieves all database schemas including system schemas, excluding # internal Oracle users.# Parameters:# - conn: Database connection object# Returns: Data frame of schema names, creation dates, and account statuses# -----------------------------------------------------------------------------------get_all_schemas_full <- function(conn) { query <- " SELECT u.username as schema_name, u.created, u.account_status FROM dba_users u WHERE u.username NOT IN ('XS$NULL', 'REMOTE_SCHEDULER_AGENT') ORDER BY username" tryCatch({ schemas <- dbGetQuery(conn, query) return(schemas) }, error = function(e) { stop("Failed to retrieve schemas: ", conditionMessage(e)) })}
# -----------------------------------------------------------------------------------# Function: get_schema_stats_simple# Description: Retrieves simplified statistics for a specific schema including # object counts and sizes. Avoids ORA-00937 errors.# Parameters:# - conn: Database connection object# - schema_name: Name of the schema to analyze# Returns: Data frame with object_type, count, and size statistics# -----------------------------------------------------------------------------------get_schema_stats_simple <- function(conn, schema_name) { query <- paste0(" SELECT object_type, COUNT(*) as object_count, NVL(SUM(bytes)/1024/1024, 0) as size_mb FROM dba_objects o LEFT JOIN dba_segments s ON (o.owner = s.owner AND o.object_name = s.segment_name) WHERE o.owner = '", toupper(schema_name), "' GROUP BY object_type ORDER BY size_mb DESC NULLS LAST") tryCatch({ stats <- dbGetQuery(conn, query) names(stats) <- tolower(names(stats)) return(stats) }, error = function(e) { message("Failed to get statistics for schema ", schema_name, ": ", conditionMessage(e)) return(NULL) })}
# -----------------------------------------------------------------------------------# Function: format_duration# Description: Converts minutes to a human-readable time format (days, hours, minutes).# Parameters:# - total_minutes: Time duration in minutes# Returns: Formatted time string# -----------------------------------------------------------------------------------format_duration <- function(total_minutes) { if (is.na(total_minutes) || total_minutes == 0) { return("00:00:00") } days <- floor(total_minutes / (24 * 60)) hours <- floor((total_minutes %% (24 * 60)) / 60) minutes <- round(total_minutes %% 60) if (days > 0) { return(sprintf("%d days, %02d:%02d:00", days, hours, minutes)) } else { return(sprintf("%02d:%02d:00", hours, minutes)) }}
# -----------------------------------------------------------------------------------# Function: estimate_full_export_time_16par# Description: Enhanced estimation model for full database export using 16 parallel # processes. Considers size, object count, schema complexity, and # parallel efficiency.# Parameters:# - all_stats: List of schema statistics# - db_info: Database metadata# - export_type: Type of export (default: "FULL")# Returns: List containing detailed estimation results# -----------------------------------------------------------------------------------estimate_full_export_time_16par <- function(all_stats, db_info, export_type = "FULL") { total_size_mb <- sum(sapply(all_stats, function(x) x$total_size), na.rm = TRUE) total_objects <- sum(sapply(all_stats, function(x) x$total_objects), na.rm = TRUE) total_schemas <- length(all_stats) block_size <- if (!is.null(db_info$parameters)) { as.numeric(db_info$parameters$VALUE[db_info$parameters$NAME == 'db_block_size']) } else { 8192 } cpu_count <- if (!is.null(db_info$cpu_count)) { as.numeric(db_info$cpu_count$VALUE[1]) } else { 16 } size_factor <- 0.4 object_factor <- 0.005 schema_factor <- 0.3 system_factor <- 20 parallel_efficiency <- 0.75 size_time <- (total_size_mb / 1024) * size_factor object_time <- (total_objects / 100) * object_factor schema_time <- total_schemas * schema_factor base_time <- size_time + object_time + schema_time + system_factor parallel_time <- base_time / (16 * parallel_efficiency) system_schemas <- c('SYS', 'SYSTEM', 'SYSAUX') system_schema_count <- sum(names(all_stats) %in% system_schemas) if (system_schema_count > 0) { parallel_time <- parallel_time * (1 + (system_schema_count * 0.2)) } parallel_time <- max(parallel_time, 5) if (parallel_time < 15) { complexity <- "VERY SIMPLE" } else if (parallel_time < 60) { complexity <- "SIMPLE" } else if (parallel_time < 240) { complexity <- "MODERATE" } else if (parallel_time < 480) { complexity <- "COMPLEX" } else { complexity <- "VERY COMPLEX" } estimated_dump_size_gb <- total_size_mb / 1024 * 0.7 if (estimated_dump_size_gb > 500) { recommended_filesize <- 20 } else if (estimated_dump_size_gb > 100) { recommended_filesize <- 10 } else { recommended_filesize <- 5 } num_files_needed <- max(16, ceiling(estimated_dump_size_gb / recommended_filesize)) return(list( estimated_minutes = round(parallel_time, 1), formatted_time = format_duration(parallel_time), total_size_gb = round(total_size_mb / 1024, 2), total_size_mb = round(total_size_mb, 1), total_objects = total_objects, total_schemas = total_schemas, block_size_kb = round(block_size / 1024, 1), cpu_count = cpu_count, export_type = export_type, complexity = complexity, recommended_parallelism = 16, parallel_efficiency = parallel_efficiency, estimated_dump_size_gb = round(estimated_dump_size_gb, 2), recommended_filesize_gb = recommended_filesize, recommended_num_files = num_files_needed ))}
# -----------------------------------------------------------------------------------# Function: generate_datapump_command_16par# Description: Generates an Oracle Data Pump command template optimized for 16 # parallel processes with appropriate parameters.# Parameters:# - estimation: Export estimation results# - db_info: Database metadata# Returns: Formatted Data Pump command string# -----------------------------------------------------------------------------------generate_datapump_command_16par <- function(estimation, db_info) { db_name <- if (!is.null(db_info$db_name)) db_info$db_name$NAME[1] else "ORCL" timestamp <- format(Sys.time(), "%Y%m%d_%H%M") command <- paste0( "expdp system/<password> \\\n", " FULL=Y \\\n", " DIRECTORY=DATA_PUMP_DIR \\\n", " DUMPFILE=", db_name, "_full_%U.dmp \\\n", " FILESIZE=", estimation$recommended_filesize_gb, "G \\\n", " PARALLEL=16 \\\n", " LOGFILE=", db_name, "_full_", timestamp, ".log \\\n", " COMPRESSION=ALL \\\n", " EXCLUDE=STATISTICS \\\n", " CLUSTER=N \\\n", " JOB_NAME=FULL_EXPORT_16PAR_", timestamp ) if (estimation$total_size_gb > 100) { command <- paste0(command, " \\\n METRICS=Y \\\n COMPRESSION_ALGORITHM=BASIC") } return(command)}
# -----------------------------------------------------------------------------------# Function: generate_html_report_16par# Description: Generates a comprehensive HTML report with analysis results, # visualizations, and recommendations for 16 parallel processes.# Parameters:# - all_stats: Schema statistics# - estimation: Export estimation results# - db_info: Database metadata# - output_dir: Directory to save the report (default: "F:/DBA/Scripts/R/Reports")# Returns: Path to the generated HTML report# -----------------------------------------------------------------------------------generate_html_report_16par <- function(all_stats, estimation, db_info, output_dir = "F:/DBA/Scripts/R/Reports") { if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) } timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S") report_file <- file.path(output_dir, paste0("Full_DB_Export_16PAR_Analysis_", timestamp, ".html")) temp_rmd <- tempfile(fileext = ".Rmd") summary_data <- data.frame( Schema = names(all_stats), Size_MB = sapply(all_stats, function(x) x$total_size), Objects = sapply(all_stats, function(x) x$total_objects), Status = sapply(all_stats, function(x) x$account_status), Created = sapply(all_stats, function(x) as.character(x$created)), stringsAsFactors = FALSE ) summary_data <- summary_data[order(-summary_data$Size_MB), ] summary_data$Category <- ifelse( summary_data$Schema %in% c('SYS', 'SYSTEM', 'SYSAUX'), 'SYSTEM', ifelse(summary_data$Size_MB > 1024, 'LARGE', ifelse(summary_data$Size_MB > 100, 'MEDIUM', 'SMALL')) ) top_20 <- head(summary_data, 20) category_totals <- summary_data %>% group_by(Category) %>% summarise( Count = n(), Total_Size_MB = sum(Size_MB, na.rm = TRUE), Total_Objects = sum(Objects, na.rm = TRUE), .groups = 'drop' ) writeLines(c( "---", "title: 'Oracle Full Database Export Analysis Report (16 Parallel Processes)'", "author: 'Database Administration Team'", paste0("date: '", Sys.Date(), "'"), "output: html_document", "---", "", "```{r setup, include=FALSE}", "knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, fig.align = 'center')", "library(ggplot2)", "library(dplyr)", "library(scales)", "```", "", "# FULL DATABASE EXPORT ANALYSIS - 16 PARALLEL PROCESSES", "", paste0("**Database:** ", if (!is.null(db_info$db_name)) db_info$db_name$NAME[1] else "Unknown"), paste0("**Report Generated:** ", Sys.time()), paste0("**Analysis Date:** ", Sys.Date()), "", "## Executive Summary", "", "```{r exec-summary}", "cat('**Total Database Size:** ', estimation$total_size_gb, ' GB\\n\\n')", "cat('**Total Objects:** ', format(estimation$total_objects, big.mark = ','), '\\n\\n')", "cat('**Total Schemas:** ', estimation$total_schemas, '\\n\\n')", "cat('**CPU Count:** ', estimation$cpu_count, '\\n\\n')", "cat('**Estimated Export Time (16 PAR):** ', estimation$formatted_time, '\\n\\n')", "cat('**Export Complexity:** ', estimation$complexity, '\\n\\n')", "cat('**Estimated Dump Size:** ', estimation$estimated_dump_size_gb, ' GB (compressed)\\n\\n')", "cat('**Parallel Efficiency Factor:** ', estimation$parallel_efficiency * 100, '%\\n\\n')", "```", "", "## Recommended Export Strategy (16 Parallel Processes)", "", "```{r strategy}", "cat('**Parallel Processes:** 16 (FIXED)\\n\\n')", "cat('**File Size per Dumpfile:** ', estimation$recommended_filesize_gb, ' GB\\n\\n')", "cat('**Total Files Needed:** ', estimation$recommended_num_files, ' (minimum 16 for optimal parallelism)\\n\\n')", "cat('**Compression:** ALL (recommended)\\n\\n')", "cat('**Files per Parallel Process:** ', ceiling(estimation$recommended_num_files / 16), '\\n\\n')", "```", "", "## Database Information", "", "```{r db-info}", "if (!is.null(db_info$db_name)) {", " cat('**Database Name:** ', db_info$db_name$NAME[1], '\\n\\n')", " cat('**DBID:** ', db_info$db_name$DBID[1], '\\n\\n')", " cat('**Created:** ', db_info$db_name$CREATED[1], '\\n\\n')", " cat('**Log Mode:** ', db_info$db_name$LOG_MODE[1], '\\n\\n')", "}", "if (!is.null(db_info$instance)) {", " cat('**Instance:** ', db_info$instance$INSTANCE_NAME[1], '\\n\\n')", " cat('**Version:** ', db_info$instance$VERSION[1], '\\n\\n')", "}", "if (!is.null(db_info$cpu_count)) {", " cat('**CPU Count:** ', db_info$cpu_count$VALUE[1], '\\n\\n')", "}", "```", "", "## Performance Considerations for 16 Parallel Processes", "", "### Advantages:", "1. **Maximum Throughput:** Utilizes maximum available I/O bandwidth", "2. **Reduced Export Time:** Significantly faster than sequential export", "3. **Load Distribution:** Even distribution across system resources", "", "### Requirements:", "1. **Sufficient I/O Bandwidth:** Minimum 500 MB/s sustained write speed", "2. **Memory:** Additional PGA memory for parallel processes", "3. **CPU:** Ensure no CPU overcommitment on production system", "4. **Storage:** Fast disk subsystem (SSD recommended)", "", "### Monitoring Commands During Export:", "```sql", "-- Monitor parallel processes", "SELECT * FROM v$pq_sesstat;", "", "-- Monitor Data Pump job", "SELECT * FROM dba_datapump_jobs;", "", "-- Monitor I/O", "SELECT * FROM v$iostat_file;", "```", "", "## Schema Analysis", "", "### Schema Categories", "```{r category-table}", "knitr::kable(category_totals, caption = 'Schema Categories Summary')", "```", "", "### Top 20 Largest Schemas", "```{r top20-table}", "top_20_display <- top_20 %>%", " select(Schema, Size_MB, Objects, Category, Status)", "knitr::kable(top_20_display, caption = 'Top 20 Largest Schemas')", "```", "", "### Schema Size Distribution", "```{r size-distribution, fig.height=6, fig.width=10}", "ggplot(summary_data, aes(x = Size_MB, fill = Category)) +", " geom_histogram(bins = 30, alpha = 0.7) +", " scale_x_log10(labels = scales::comma) +", " labs(title = 'Schema Size Distribution (log scale)',", " x = 'Size (MB)',", " y = 'Number of Schemas') +", " theme_minimal() +", " theme(legend.position = 'bottom')", "```", "", "### Size vs Objects Scatter", "```{r size-vs-objects, fig.height=6, fig.width=10}", "ggplot(summary_data, aes(x = Size_MB, y = Objects, color = Category, size = Objects)) +", " geom_point(alpha = 0.6) +", " scale_x_log10(labels = scales::comma) +", " scale_y_log10(labels = scales::comma) +", " labs(title = 'Schema Size vs Number of Objects',", " x = 'Size (MB)',", " y = 'Number of Objects') +", " theme_minimal() +", " theme(legend.position = 'bottom')", "```", "", "## Data Pump Command Template (16 Parallel)", "", "```{r datapump-cmd}", "datapump_cmd <- generate_datapump_command_16par(estimation, db_info)", "cat('```bash')", "cat('\\n')", "cat(datapump_cmd)", "cat('\\n```')", "```", "", "### Command Parameters Explanation for 16 Parallel", "", "1. **FULL=Y** - Export entire database", "2. **DIRECTORY=DATA_PUMP_DIR** - Oracle directory object for dump files", "3. **DUMPFILE** - Naming pattern with %U for parallel file creation (will create 16+ files)", "4. **FILESIZE** - Size limit per dump file (adjusted based on total size)", "5. **PARALLEL=16** - Fixed 16 parallel processes", "6. **COMPRESSION=ALL** - Compress metadata and data", "7. **EXCLUDE=STATISTICS** - Exclude statistics (can be regenerated)", "8. **CLUSTER=N** - Disable RAC cluster export (if single instance)", "9. **JOB_NAME** - Unique job identifier with 16PAR prefix", "", "## Risk Assessment & Recommendations for 16 Parallel", "", "### High Risk Factors", "- **I/O Saturation:** 16 parallel processes can saturate storage I/O", "- **Memory Pressure:** Increased PGA memory usage", "- **CPU Contention:** May affect other applications if not scheduled properly", "", "### Recommended Actions", "1. **Pre-export checklist:**", " - Verify I/O subsystem can handle 16 parallel writes", " - Check PGA_AGGREGATE_TARGET setting", " - Monitor system load before starting", " - Ensure sufficient disk space (4x estimated dump size)", "2. **During export:**", " - Monitor `V$PQ_SESSTAT` for parallel process statistics", " - Watch for I/O wait events in `V$SESSION_WAIT`", " - Monitor system CPU and memory usage", " - Check alert log for parallel query errors", "3. **Post-export:**", " - Verify all 16+ dump files created", " - Check export log for any parallel process errors", " - Validate dump file sizes and counts", "", "## Storage Requirements for 16 Parallel", "", "| Component | Estimated Size | Notes |", "|-----------|----------------|-------|", paste0("| Dump Files | ", estimation$estimated_dump_size_gb, " GB | Compressed size, distributed across ", estimation$recommended_num_files, " files |"), "| Log Files | 2-5 GB | Larger logs due to 16 parallel processes |", paste0("| Temp Space | ", round(estimation$total_size_gb * 0.15, 1), " GB | Increased for parallel processing |"), paste0("| **Total Required** | **", round(estimation$estimated_dump_size_gb + estimation$total_size_gb * 0.15 + 5, 1), " GB** | Minimum requirement for 16 parallel |"), "", "## Schedule Recommendations for 16 Parallel", "", paste0("Based on estimated time of ", estimation$formatted_time, " with 16 parallel processes:"), "", "- **Maintenance Window Required:** Yes (critical)", "- **Recommended Start Time:** Friday 20:00 or Saturday 00:00 (off-peak)", paste0("- **Buffer Time:** Add 50% buffer (", round(estimation$estimated_minutes * 1.5, 1), " minutes) for parallel process coordination"), "- **Notification:** Inform stakeholders 72 hours in advance", "- **Fallback Plan:** Have script ready to reduce parallelism if issues occur", "", "## Performance Optimization Tips", "", "1. **I/O Optimization:**", " - Use multiple mount points for dump files", " - Consider ASM diskgroups for better I/O distribution", " - Ensure filesystem has sufficient inodes", "", "2. **Database Optimization:**", " ```sql", " -- Increase parallel process memory", " ALTER SYSTEM SET PGA_AGGREGATE_TARGET = 8G SCOPE=BOTH;", " ", " -- Set parallel degree policy", " ALTER SYSTEM SET PARALLEL_DEGREE_POLICY = AUTO;", " ", " -- Enable parallel DML", " ALTER SESSION ENABLE PARALLEL DML;", " ```", "", "3. **Monitoring Script:**", " ```bash", " # Monitor export progress", " while true; do", " echo '=== Export Progress ==='", " date", " du -sh /u01/app/oracle/admin/dpdump/*.dmp 2>/dev/null | tail -5", " ps -ef | grep -c 'ora_p.*EXP'", " sleep 60", " done", " ```", "", "---", "*Report generated automatically by Full Database Export Analysis Tool (16 Parallel)*", "*For questions, contact DBA Team*" ), temp_rmd) tryCatch({ rmarkdown::render(temp_rmd, output_file = report_file, quiet = TRUE) message("HTML report saved to: ", report_file) }, error = function(e) { stop("Failed to render HTML report: ", conditionMessage(e)) }) unlink(temp_rmd) return(report_file)}
# -----------------------------------------------------------------------------------# Function: analyze_full_database_export_16par# Description: Main orchestration function that executes the complete analysis # workflow for full database export with 16 parallel processes.# Parameters:# - conn: Database connection object# - export_type: Type of export (default: "FULL")# Returns: Comprehensive results list including statistics, estimations, commands, and report path# -----------------------------------------------------------------------------------analyze_full_database_export_16par <- function(conn, export_type = "FULL") { cat("\n") cat(paste0(rep("=", 70), collapse = ""), "\n") cat("FULL DATABASE EXPORT ANALYSIS WITH 16 PARALLEL PROCESSES\n") cat(paste0(rep("=", 70), collapse = ""), "\n\n") cat("1. Gathering database information (including CPU count)...\n") db_info <- get_database_info(conn) cat("2. Retrieving all database schemas...\n") all_schemas <- get_all_schemas_full(conn) cat(" Found ", nrow(all_schemas), " schemas in the database\n") cat("3. Analyzing schema statistics...\n") all_stats <- list() progress <- 0 for (i in 1:nrow(all_schemas)) { schema <- all_schemas$SCHEMA_NAME[i] progress <- progress + 1 if (progress %% 10 == 0 || progress == nrow(all_schemas)) { cat(" Processed ", progress, "/", nrow(all_schemas), " schemas\n") } tryCatch({ schema_stats <- get_schema_stats_simple(conn, schema) total_size <- 0 total_objects <- 0 if (!is.null(schema_stats) && nrow(schema_stats) > 0) { total_size <- sum(schema_stats$size_mb, na.rm = TRUE) total_objects <- sum(schema_stats$object_count, na.rm = TRUE) } all_stats[[schema]] <- list( schema_name = schema, total_size = total_size, total_objects = total_objects, account_status = all_schemas$ACCOUNT_STATUS[i], created = all_schemas$CREATED[i], details = schema_stats ) }, error = function(e) { cat("Warning: Failed to analyze schema ", schema, ": ", conditionMessage(e), "\n") all_stats[[schema]] <- list( schema_name = schema, total_size = 0, total_objects = 0, account_status = all_schemas$ACCOUNT_STATUS[i], created = all_schemas$CREATED[i], details = NULL ) }) } cat("4. Calculating export time estimation for 16 parallel processes...\n") estimation <- estimate_full_export_time_16par(all_stats, db_info, export_type) cat("5. Generating Data Pump command template for 16 parallel processes...\n") datapump_cmd <- generate_datapump_command_16par(estimation, db_info) cat("6. Generating HTML report for 16 parallel processes...\n") report_path <- generate_html_report_16par(all_stats, estimation, db_info) return(list( database_info = db_info, schemas = all_schemas, schema_stats = all_stats, estimation = estimation, datapump_command = datapump_cmd, report_path = report_path, summary = list( total_schemas = nrow(all_schemas), total_size_gb = estimation$total_size_gb, total_objects = estimation$total_objects, cpu_count = estimation$cpu_count, estimated_time = estimation$formatted_time, complexity = estimation$complexity, parallel_processes = 16 ) ))}
# -----------------------------------------------------------------------------------# MAIN EXECUTION BLOCK# -----------------------------------------------------------------------------------cat("\n")cat(paste0(rep("=", 70), collapse = ""), "\n")cat("STARTING FULL DATABASE EXPORT ANALYSIS WITH 16 PARALLEL PROCESSES\n")cat(paste0(rep("=", 70), collapse = ""), "\n\n")
tryCatch({ results <- analyze_full_database_export_16par(conn, export_type = "FULL") dbDisconnect(conn) cat("\n") cat(paste0(rep("=", 70), collapse = ""), "\n") cat("ANALYSIS COMPLETE - 16 PARALLEL PROCESSES SUMMARY\n") cat(paste0(rep("=", 70), collapse = ""), "\n\n") cat("DATABASE SUMMARY:\n") cat(" Database Name:", if (!is.null(results$database_info$db_name)) results$database_info$db_name$NAME[1] else "Unknown", "\n") cat(" CPU Count:", results$estimation$cpu_count, "\n") cat(" Total Schemas:", results$summary$total_schemas, "\n") cat(" Total Size:", results$summary$total_size_gb, "GB\n") cat(" Total Objects:", format(results$summary$total_objects, big.mark = ","), "\n\n") cat("EXPORT ESTIMATION (16 PARALLEL PROCESSES):\n") cat(" Estimated Time:", results$estimation$formatted_time, "\n") cat(" Complexity Level:", results$estimation$complexity, "\n") cat(" Fixed Parallelism:", results$estimation$recommended_parallelism, "\n") cat(" Parallel Efficiency:", results$estimation$parallel_efficiency * 100, "%\n") cat(" Estimated Dump Size:", results$estimation$estimated_dump_size_gb, "GB\n") cat(" Recommended File Size:", results$estimation$recommended_filesize_gb, "GB\n") cat(" Number of Files:", results$estimation$recommended_num_files, "\n\n") cat("PERFORMANCE CONSIDERATIONS:\n") cat(" Minimum I/O Bandwidth Required: 500 MB/s\n") cat(" Recommended Start: Off-peak hours (Friday 20:00 or later)\n") cat(" Buffer Time: +50% (", round(results$estimation$estimated_minutes * 1.5, 1), " minutes total)\n\n") cat("REPORT GENERATED:\n") cat(" ", results$report_path, "\n\n") if (length(results$schema_stats) > 0) { sizes <- sapply(results$schema_stats, function(x) x$total_size) sizes <- sizes[!is.na(sizes)] if (length(sizes) > 0) { top_5 <- head(sort(sizes, decreasing = TRUE), 5) cat("TOP 5 LARGEST SCHEMAS:\n") for (i in 1:length(top_5)) { schema_name <- names(top_5)[i] size_gb <- round(top_5[i] / 1024, 2) objects <- results$schema_stats[[schema_name]]$total_objects cat(sprintf(" %d. %-30s %8.2f GB %8d objects\n", i, schema_name, size_gb, objects)) } cat("\n") } } rds_file <- paste0("full_db_export_16par_analysis_", format(Sys.time(), "%Y%m%d_%H%M"), ".rds") saveRDS(results, file = rds_file) cat("Analysis results saved to RDS file:", rds_file, "\n") txt_file <- paste0("full_db_export_16par_summary_", format(Sys.time(), "%Y%m%d_%H%M"), ".txt") sink(txt_file) cat("FULL DATABASE EXPORT ANALYSIS SUMMARY - 16 PARALLEL PROCESSES\n") cat("=============================================================\n\n") cat("Generated: ", format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\n") cat("Database: ", if (!is.null(results$database_info$db_name)) results$database_info$db_name$NAME[1] else "Unknown", "\n") cat("CPU Count: ", results$estimation$cpu_count, "\n") cat("Total Schemas: ", results$summary$total_schemas, "\n") cat("Total Size: ", results$summary$total_size_gb, " GB\n") cat("Total Objects: ", format(results$summary$total_objects, big.mark = ","), "\n") cat("Estimated Export Time (16 PAR): ", results$estimation$formatted_time, "\n") cat("Complexity: ", results$estimation$complexity, "\n") cat("Parallel Processes: ", results$estimation$recommended_parallelism, "\n") cat("Parallel Efficiency: ", results$estimation$parallel_efficiency * 100, "%\n") cat("Estimated Dump Size: ", results$estimation$estimated_dump_size_gb, " GB\n") cat("Recommended File Size: ", results$estimation$recommended_filesize_gb, " GB\n") cat("Number of Files: ", results$estimation$recommended_num_files, "\n\n") cat("Data Pump Command (16 Parallel):\n") cat(results$datapump_command, "\n\n") cat("PERFORMANCE REQUIREMENTS:\n") cat("- Minimum I/O Bandwidth: 500 MB/s\n") cat("- Minimum Memory: 8GB PGA_AGGREGATE_TARGET\n") cat("- Storage Space: ", round(results$estimation$estimated_dump_size_gb + results$estimation$total_size_gb * 0.15 + 5, 1), " GB\n") cat("- Recommended Schedule: Off-peak hours with 50% buffer\n") sink() cat("Text summary saved to:", txt_file, "\n") }, error = function(e) { dbDisconnect(conn) cat("Analysis failed: ", conditionMessage(e), "\n")})
# -----------------------------------------------------------------------------------# END MESSAGE# -----------------------------------------------------------------------------------cat("\n")cat(paste0(rep("=", 70), collapse = ""), "\n")cat("ANALYSIS FINISHED AT: ", format(Sys.time(), "%Y-%m-%d %H:%M:%S"),
# -----------------------------------------------------------------------------------# Load required libraries# -----------------------------------------------------------------------------------library(DBI)library(odbc)library(dplyr)library(ggplot2)library(lubridate)library(knitr)library(rmarkdown)library(scales)
# -----------------------------------------------------------------------------------# Database connection# -----------------------------------------------------------------------------------conn <- dbConnect(odbc::odbc(), dsn = "[DB_NAME]", uid = "[USERNAME]", pwd = "[PASSWORD]")
# -----------------------------------------------------------------------------------# Function: get_database_info# Description: Retrieves comprehensive database information including instance details,# parameters, tablespaces, and CPU count.# Parameters:# - conn: Database connection object# Returns: List containing database metadata# -----------------------------------------------------------------------------------get_database_info <- function(conn) { queries <- list( db_name = "SELECT name, dbid, created, log_mode FROM v$database", instance = "SELECT instance_name, version, startup_time FROM v$instance", parameters = "SELECT name, value FROM v$parameter WHERE name IN ('db_block_size', 'compatible')", tablespaces = "SELECT tablespace_name, sum(bytes)/1024/1024 as size_mb FROM dba_data_files GROUP BY tablespace_name", cpu_count = "SELECT value FROM v$parameter WHERE name = 'cpu_count'" ) info <- list() for (name in names(queries)) { tryCatch({ result <- dbGetQuery(conn, queries[[name]]) info[[name]] <- result }, error = function(e) { warning("Failed to get ", name, ": ", conditionMessage(e)) info[[name]] <- NULL }) } return(info)}
# -----------------------------------------------------------------------------------# Function: get_all_schemas_full# Description: Retrieves all database schemas including system schemas, excluding # internal Oracle users.# Parameters:# - conn: Database connection object# Returns: Data frame of schema names, creation dates, and account statuses# -----------------------------------------------------------------------------------get_all_schemas_full <- function(conn) { query <- " SELECT u.username as schema_name, u.created, u.account_status FROM dba_users u WHERE u.username NOT IN ('XS$NULL', 'REMOTE_SCHEDULER_AGENT') ORDER BY username" tryCatch({ schemas <- dbGetQuery(conn, query) return(schemas) }, error = function(e) { stop("Failed to retrieve schemas: ", conditionMessage(e)) })}
# -----------------------------------------------------------------------------------# Function: get_schema_stats_simple# Description: Retrieves simplified statistics for a specific schema including # object counts and sizes. Avoids ORA-00937 errors.# Parameters:# - conn: Database connection object# - schema_name: Name of the schema to analyze# Returns: Data frame with object_type, count, and size statistics# -----------------------------------------------------------------------------------get_schema_stats_simple <- function(conn, schema_name) { query <- paste0(" SELECT object_type, COUNT(*) as object_count, NVL(SUM(bytes)/1024/1024, 0) as size_mb FROM dba_objects o LEFT JOIN dba_segments s ON (o.owner = s.owner AND o.object_name = s.segment_name) WHERE o.owner = '", toupper(schema_name), "' GROUP BY object_type ORDER BY size_mb DESC NULLS LAST") tryCatch({ stats <- dbGetQuery(conn, query) names(stats) <- tolower(names(stats)) return(stats) }, error = function(e) { message("Failed to get statistics for schema ", schema_name, ": ", conditionMessage(e)) return(NULL) })}
# -----------------------------------------------------------------------------------# Function: format_duration# Description: Converts minutes to a human-readable time format (days, hours, minutes).# Parameters:# - total_minutes: Time duration in minutes# Returns: Formatted time string# -----------------------------------------------------------------------------------format_duration <- function(total_minutes) { if (is.na(total_minutes) || total_minutes == 0) { return("00:00:00") } days <- floor(total_minutes / (24 * 60)) hours <- floor((total_minutes %% (24 * 60)) / 60) minutes <- round(total_minutes %% 60) if (days > 0) { return(sprintf("%d days, %02d:%02d:00", days, hours, minutes)) } else { return(sprintf("%02d:%02d:00", hours, minutes)) }}
# -----------------------------------------------------------------------------------# Function: estimate_full_export_time_16par# Description: Enhanced estimation model for full database export using 16 parallel # processes. Considers size, object count, schema complexity, and # parallel efficiency.# Parameters:# - all_stats: List of schema statistics# - db_info: Database metadata# - export_type: Type of export (default: "FULL")# Returns: List containing detailed estimation results# -----------------------------------------------------------------------------------estimate_full_export_time_16par <- function(all_stats, db_info, export_type = "FULL") { total_size_mb <- sum(sapply(all_stats, function(x) x$total_size), na.rm = TRUE) total_objects <- sum(sapply(all_stats, function(x) x$total_objects), na.rm = TRUE) total_schemas <- length(all_stats) block_size <- if (!is.null(db_info$parameters)) { as.numeric(db_info$parameters$VALUE[db_info$parameters$NAME == 'db_block_size']) } else { 8192 } cpu_count <- if (!is.null(db_info$cpu_count)) { as.numeric(db_info$cpu_count$VALUE[1]) } else { 16 } size_factor <- 0.4 object_factor <- 0.005 schema_factor <- 0.3 system_factor <- 20 parallel_efficiency <- 0.75 size_time <- (total_size_mb / 1024) * size_factor object_time <- (total_objects / 100) * object_factor schema_time <- total_schemas * schema_factor base_time <- size_time + object_time + schema_time + system_factor parallel_time <- base_time / (16 * parallel_efficiency) system_schemas <- c('SYS', 'SYSTEM', 'SYSAUX') system_schema_count <- sum(names(all_stats) %in% system_schemas) if (system_schema_count > 0) { parallel_time <- parallel_time * (1 + (system_schema_count * 0.2)) } parallel_time <- max(parallel_time, 5) if (parallel_time < 15) { complexity <- "VERY SIMPLE" } else if (parallel_time < 60) { complexity <- "SIMPLE" } else if (parallel_time < 240) { complexity <- "MODERATE" } else if (parallel_time < 480) { complexity <- "COMPLEX" } else { complexity <- "VERY COMPLEX" } estimated_dump_size_gb <- total_size_mb / 1024 * 0.7 if (estimated_dump_size_gb > 500) { recommended_filesize <- 20 } else if (estimated_dump_size_gb > 100) { recommended_filesize <- 10 } else { recommended_filesize <- 5 } num_files_needed <- max(16, ceiling(estimated_dump_size_gb / recommended_filesize)) return(list( estimated_minutes = round(parallel_time, 1), formatted_time = format_duration(parallel_time), total_size_gb = round(total_size_mb / 1024, 2), total_size_mb = round(total_size_mb, 1), total_objects = total_objects, total_schemas = total_schemas, block_size_kb = round(block_size / 1024, 1), cpu_count = cpu_count, export_type = export_type, complexity = complexity, recommended_parallelism = 16, parallel_efficiency = parallel_efficiency, estimated_dump_size_gb = round(estimated_dump_size_gb, 2), recommended_filesize_gb = recommended_filesize, recommended_num_files = num_files_needed ))}
# -----------------------------------------------------------------------------------# Function: generate_datapump_command_16par# Description: Generates an Oracle Data Pump command template optimized for 16 # parallel processes with appropriate parameters.# Parameters:# - estimation: Export estimation results# - db_info: Database metadata# Returns: Formatted Data Pump command string# -----------------------------------------------------------------------------------generate_datapump_command_16par <- function(estimation, db_info) { db_name <- if (!is.null(db_info$db_name)) db_info$db_name$NAME[1] else "ORCL" timestamp <- format(Sys.time(), "%Y%m%d_%H%M") command <- paste0( "expdp system/<password> \\\n", " FULL=Y \\\n", " DIRECTORY=DATA_PUMP_DIR \\\n", " DUMPFILE=", db_name, "_full_%U.dmp \\\n", " FILESIZE=", estimation$recommended_filesize_gb, "G \\\n", " PARALLEL=16 \\\n", " LOGFILE=", db_name, "_full_", timestamp, ".log \\\n", " COMPRESSION=ALL \\\n", " EXCLUDE=STATISTICS \\\n", " CLUSTER=N \\\n", " JOB_NAME=FULL_EXPORT_16PAR_", timestamp ) if (estimation$total_size_gb > 100) { command <- paste0(command, " \\\n METRICS=Y \\\n COMPRESSION_ALGORITHM=BASIC") } return(command)}
# -----------------------------------------------------------------------------------# Function: generate_html_report_16par# Description: Generates a comprehensive HTML report with analysis results, # visualizations, and recommendations for 16 parallel processes.# Parameters:# - all_stats: Schema statistics# - estimation: Export estimation results# - db_info: Database metadata# - output_dir: Directory to save the report (default: "F:/DBA/Scripts/R/Reports")# Returns: Path to the generated HTML report# -----------------------------------------------------------------------------------generate_html_report_16par <- function(all_stats, estimation, db_info, output_dir = "F:/DBA/Scripts/R/Reports") { if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) } timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S") report_file <- file.path(output_dir, paste0("Full_DB_Export_16PAR_Analysis_", timestamp, ".html")) temp_rmd <- tempfile(fileext = ".Rmd") summary_data <- data.frame( Schema = names(all_stats), Size_MB = sapply(all_stats, function(x) x$total_size), Objects = sapply(all_stats, function(x) x$total_objects), Status = sapply(all_stats, function(x) x$account_status), Created = sapply(all_stats, function(x) as.character(x$created)), stringsAsFactors = FALSE ) summary_data <- summary_data[order(-summary_data$Size_MB), ] summary_data$Category <- ifelse( summary_data$Schema %in% c('SYS', 'SYSTEM', 'SYSAUX'), 'SYSTEM', ifelse(summary_data$Size_MB > 1024, 'LARGE', ifelse(summary_data$Size_MB > 100, 'MEDIUM', 'SMALL')) ) top_20 <- head(summary_data, 20) category_totals <- summary_data %>% group_by(Category) %>% summarise( Count = n(), Total_Size_MB = sum(Size_MB, na.rm = TRUE), Total_Objects = sum(Objects, na.rm = TRUE), .groups = 'drop' ) writeLines(c( "---", "title: 'Oracle Full Database Export Analysis Report (16 Parallel Processes)'", "author: 'Database Administration Team'", paste0("date: '", Sys.Date(), "'"), "output: html_document", "---", "", "```{r setup, include=FALSE}", "knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, fig.align = 'center')", "library(ggplot2)", "library(dplyr)", "library(scales)", "```", "", "# FULL DATABASE EXPORT ANALYSIS - 16 PARALLEL PROCESSES", "", paste0("**Database:** ", if (!is.null(db_info$db_name)) db_info$db_name$NAME[1] else "Unknown"), paste0("**Report Generated:** ", Sys.time()), paste0("**Analysis Date:** ", Sys.Date()), "", "## Executive Summary", "", "```{r exec-summary}", "cat('**Total Database Size:** ', estimation$total_size_gb, ' GB\\n\\n')", "cat('**Total Objects:** ', format(estimation$total_objects, big.mark = ','), '\\n\\n')", "cat('**Total Schemas:** ', estimation$total_schemas, '\\n\\n')", "cat('**CPU Count:** ', estimation$cpu_count, '\\n\\n')", "cat('**Estimated Export Time (16 PAR):** ', estimation$formatted_time, '\\n\\n')", "cat('**Export Complexity:** ', estimation$complexity, '\\n\\n')", "cat('**Estimated Dump Size:** ', estimation$estimated_dump_size_gb, ' GB (compressed)\\n\\n')", "cat('**Parallel Efficiency Factor:** ', estimation$parallel_efficiency * 100, '%\\n\\n')", "```", "", "## Recommended Export Strategy (16 Parallel Processes)", "", "```{r strategy}", "cat('**Parallel Processes:** 16 (FIXED)\\n\\n')", "cat('**File Size per Dumpfile:** ', estimation$recommended_filesize_gb, ' GB\\n\\n')", "cat('**Total Files Needed:** ', estimation$recommended_num_files, ' (minimum 16 for optimal parallelism)\\n\\n')", "cat('**Compression:** ALL (recommended)\\n\\n')", "cat('**Files per Parallel Process:** ', ceiling(estimation$recommended_num_files / 16), '\\n\\n')", "```", "", "## Database Information", "", "```{r db-info}", "if (!is.null(db_info$db_name)) {", " cat('**Database Name:** ', db_info$db_name$NAME[1], '\\n\\n')", " cat('**DBID:** ', db_info$db_name$DBID[1], '\\n\\n')", " cat('**Created:** ', db_info$db_name$CREATED[1], '\\n\\n')", " cat('**Log Mode:** ', db_info$db_name$LOG_MODE[1], '\\n\\n')", "}", "if (!is.null(db_info$instance)) {", " cat('**Instance:** ', db_info$instance$INSTANCE_NAME[1], '\\n\\n')", " cat('**Version:** ', db_info$instance$VERSION[1], '\\n\\n')", "}", "if (!is.null(db_info$cpu_count)) {", " cat('**CPU Count:** ', db_info$cpu_count$VALUE[1], '\\n\\n')", "}", "```", "", "## Performance Considerations for 16 Parallel Processes", "", "### Advantages:", "1. **Maximum Throughput:** Utilizes maximum available I/O bandwidth", "2. **Reduced Export Time:** Significantly faster than sequential export", "3. **Load Distribution:** Even distribution across system resources", "", "### Requirements:", "1. **Sufficient I/O Bandwidth:** Minimum 500 MB/s sustained write speed", "2. **Memory:** Additional PGA memory for parallel processes", "3. **CPU:** Ensure no CPU overcommitment on production system", "4. **Storage:** Fast disk subsystem (SSD recommended)", "", "### Monitoring Commands During Export:", "```sql", "-- Monitor parallel processes", "SELECT * FROM v$pq_sesstat;", "", "-- Monitor Data Pump job", "SELECT * FROM dba_datapump_jobs;", "", "-- Monitor I/O", "SELECT * FROM v$iostat_file;", "```", "", "## Schema Analysis", "", "### Schema Categories", "```{r category-table}", "knitr::kable(category_totals, caption = 'Schema Categories Summary')", "```", "", "### Top 20 Largest Schemas", "```{r top20-table}", "top_20_display <- top_20 %>%", " select(Schema, Size_MB, Objects, Category, Status)", "knitr::kable(top_20_display, caption = 'Top 20 Largest Schemas')", "```", "", "### Schema Size Distribution", "```{r size-distribution, fig.height=6, fig.width=10}", "ggplot(summary_data, aes(x = Size_MB, fill = Category)) +", " geom_histogram(bins = 30, alpha = 0.7) +", " scale_x_log10(labels = scales::comma) +", " labs(title = 'Schema Size Distribution (log scale)',", " x = 'Size (MB)',", " y = 'Number of Schemas') +", " theme_minimal() +", " theme(legend.position = 'bottom')", "```", "", "### Size vs Objects Scatter", "```{r size-vs-objects, fig.height=6, fig.width=10}", "ggplot(summary_data, aes(x = Size_MB, y = Objects, color = Category, size = Objects)) +", " geom_point(alpha = 0.6) +", " scale_x_log10(labels = scales::comma) +", " scale_y_log10(labels = scales::comma) +", " labs(title = 'Schema Size vs Number of Objects',", " x = 'Size (MB)',", " y = 'Number of Objects') +", " theme_minimal() +", " theme(legend.position = 'bottom')", "```", "", "## Data Pump Command Template (16 Parallel)", "", "```{r datapump-cmd}", "datapump_cmd <- generate_datapump_command_16par(estimation, db_info)", "cat('```bash')", "cat('\\n')", "cat(datapump_cmd)", "cat('\\n```')", "```", "", "### Command Parameters Explanation for 16 Parallel", "", "1. **FULL=Y** - Export entire database", "2. **DIRECTORY=DATA_PUMP_DIR** - Oracle directory object for dump files", "3. **DUMPFILE** - Naming pattern with %U for parallel file creation (will create 16+ files)", "4. **FILESIZE** - Size limit per dump file (adjusted based on total size)", "5. **PARALLEL=16** - Fixed 16 parallel processes", "6. **COMPRESSION=ALL** - Compress metadata and data", "7. **EXCLUDE=STATISTICS** - Exclude statistics (can be regenerated)", "8. **CLUSTER=N** - Disable RAC cluster export (if single instance)", "9. **JOB_NAME** - Unique job identifier with 16PAR prefix", "", "## Risk Assessment & Recommendations for 16 Parallel", "", "### High Risk Factors", "- **I/O Saturation:** 16 parallel processes can saturate storage I/O", "- **Memory Pressure:** Increased PGA memory usage", "- **CPU Contention:** May affect other applications if not scheduled properly", "", "### Recommended Actions", "1. **Pre-export checklist:**", " - Verify I/O subsystem can handle 16 parallel writes", " - Check PGA_AGGREGATE_TARGET setting", " - Monitor system load before starting", " - Ensure sufficient disk space (4x estimated dump size)", "2. **During export:**", " - Monitor `V$PQ_SESSTAT` for parallel process statistics", " - Watch for I/O wait events in `V$SESSION_WAIT`", " - Monitor system CPU and memory usage", " - Check alert log for parallel query errors", "3. **Post-export:**", " - Verify all 16+ dump files created", " - Check export log for any parallel process errors", " - Validate dump file sizes and counts", "", "## Storage Requirements for 16 Parallel", "", "| Component | Estimated Size | Notes |", "|-----------|----------------|-------|", paste0("| Dump Files | ", estimation$estimated_dump_size_gb, " GB | Compressed size, distributed across ", estimation$recommended_num_files, " files |"), "| Log Files | 2-5 GB | Larger logs due to 16 parallel processes |", paste0("| Temp Space | ", round(estimation$total_size_gb * 0.15, 1), " GB | Increased for parallel processing |"), paste0("| **Total Required** | **", round(estimation$estimated_dump_size_gb + estimation$total_size_gb * 0.15 + 5, 1), " GB** | Minimum requirement for 16 parallel |"), "", "## Schedule Recommendations for 16 Parallel", "", paste0("Based on estimated time of ", estimation$formatted_time, " with 16 parallel processes:"), "", "- **Maintenance Window Required:** Yes (critical)", "- **Recommended Start Time:** Friday 20:00 or Saturday 00:00 (off-peak)", paste0("- **Buffer Time:** Add 50% buffer (", round(estimation$estimated_minutes * 1.5, 1), " minutes) for parallel process coordination"), "- **Notification:** Inform stakeholders 72 hours in advance", "- **Fallback Plan:** Have script ready to reduce parallelism if issues occur", "", "## Performance Optimization Tips", "", "1. **I/O Optimization:**", " - Use multiple mount points for dump files", " - Consider ASM diskgroups for better I/O distribution", " - Ensure filesystem has sufficient inodes", "", "2. **Database Optimization:**", " ```sql", " -- Increase parallel process memory", " ALTER SYSTEM SET PGA_AGGREGATE_TARGET = 8G SCOPE=BOTH;", " ", " -- Set parallel degree policy", " ALTER SYSTEM SET PARALLEL_DEGREE_POLICY = AUTO;", " ", " -- Enable parallel DML", " ALTER SESSION ENABLE PARALLEL DML;", " ```", "", "3. **Monitoring Script:**", " ```bash", " # Monitor export progress", " while true; do", " echo '=== Export Progress ==='", " date", " du -sh /u01/app/oracle/admin/dpdump/*.dmp 2>/dev/null | tail -5", " ps -ef | grep -c 'ora_p.*EXP'", " sleep 60", " done", " ```", "", "---", "*Report generated automatically by Full Database Export Analysis Tool (16 Parallel)*", "*For questions, contact DBA Team*" ), temp_rmd) tryCatch({ rmarkdown::render(temp_rmd, output_file = report_file, quiet = TRUE) message("HTML report saved to: ", report_file) }, error = function(e) { stop("Failed to render HTML report: ", conditionMessage(e)) }) unlink(temp_rmd) return(report_file)}
# -----------------------------------------------------------------------------------# Function: analyze_full_database_export_16par# Description: Main orchestration function that executes the complete analysis # workflow for full database export with 16 parallel processes.# Parameters:# - conn: Database connection object# - export_type: Type of export (default: "FULL")# Returns: Comprehensive results list including statistics, estimations, commands, and report path# -----------------------------------------------------------------------------------analyze_full_database_export_16par <- function(conn, export_type = "FULL") { cat("\n") cat(paste0(rep("=", 70), collapse = ""), "\n") cat("FULL DATABASE EXPORT ANALYSIS WITH 16 PARALLEL PROCESSES\n") cat(paste0(rep("=", 70), collapse = ""), "\n\n") cat("1. Gathering database information (including CPU count)...\n") db_info <- get_database_info(conn) cat("2. Retrieving all database schemas...\n") all_schemas <- get_all_schemas_full(conn) cat(" Found ", nrow(all_schemas), " schemas in the database\n") cat("3. Analyzing schema statistics...\n") all_stats <- list() progress <- 0 for (i in 1:nrow(all_schemas)) { schema <- all_schemas$SCHEMA_NAME[i] progress <- progress + 1 if (progress %% 10 == 0 || progress == nrow(all_schemas)) { cat(" Processed ", progress, "/", nrow(all_schemas), " schemas\n") } tryCatch({ schema_stats <- get_schema_stats_simple(conn, schema) total_size <- 0 total_objects <- 0 if (!is.null(schema_stats) && nrow(schema_stats) > 0) { total_size <- sum(schema_stats$size_mb, na.rm = TRUE) total_objects <- sum(schema_stats$object_count, na.rm = TRUE) } all_stats[[schema]] <- list( schema_name = schema, total_size = total_size, total_objects = total_objects, account_status = all_schemas$ACCOUNT_STATUS[i], created = all_schemas$CREATED[i], details = schema_stats ) }, error = function(e) { cat("Warning: Failed to analyze schema ", schema, ": ", conditionMessage(e), "\n") all_stats[[schema]] <- list( schema_name = schema, total_size = 0, total_objects = 0, account_status = all_schemas$ACCOUNT_STATUS[i], created = all_schemas$CREATED[i], details = NULL ) }) } cat("4. Calculating export time estimation for 16 parallel processes...\n") estimation <- estimate_full_export_time_16par(all_stats, db_info, export_type) cat("5. Generating Data Pump command template for 16 parallel processes...\n") datapump_cmd <- generate_datapump_command_16par(estimation, db_info) cat("6. Generating HTML report for 16 parallel processes...\n") report_path <- generate_html_report_16par(all_stats, estimation, db_info) return(list( database_info = db_info, schemas = all_schemas, schema_stats = all_stats, estimation = estimation, datapump_command = datapump_cmd, report_path = report_path, summary = list( total_schemas = nrow(all_schemas), total_size_gb = estimation$total_size_gb, total_objects = estimation$total_objects, cpu_count = estimation$cpu_count, estimated_time = estimation$formatted_time, complexity = estimation$complexity, parallel_processes = 16 ) ))}
# -----------------------------------------------------------------------------------# MAIN EXECUTION BLOCK# -----------------------------------------------------------------------------------cat("\n")cat(paste0(rep("=", 70), collapse = ""), "\n")cat("STARTING FULL DATABASE EXPORT ANALYSIS WITH 16 PARALLEL PROCESSES\n")cat(paste0(rep("=", 70), collapse = ""), "\n\n")
tryCatch({ results <- analyze_full_database_export_16par(conn, export_type = "FULL") dbDisconnect(conn) cat("\n") cat(paste0(rep("=", 70), collapse = ""), "\n") cat("ANALYSIS COMPLETE - 16 PARALLEL PROCESSES SUMMARY\n") cat(paste0(rep("=", 70), collapse = ""), "\n\n") cat("DATABASE SUMMARY:\n") cat(" Database Name:", if (!is.null(results$database_info$db_name)) results$database_info$db_name$NAME[1] else "Unknown", "\n") cat(" CPU Count:", results$estimation$cpu_count, "\n") cat(" Total Schemas:", results$summary$total_schemas, "\n") cat(" Total Size:", results$summary$total_size_gb, "GB\n") cat(" Total Objects:", format(results$summary$total_objects, big.mark = ","), "\n\n") cat("EXPORT ESTIMATION (16 PARALLEL PROCESSES):\n") cat(" Estimated Time:", results$estimation$formatted_time, "\n") cat(" Complexity Level:", results$estimation$complexity, "\n") cat(" Fixed Parallelism:", results$estimation$recommended_parallelism, "\n") cat(" Parallel Efficiency:", results$estimation$parallel_efficiency * 100, "%\n") cat(" Estimated Dump Size:", results$estimation$estimated_dump_size_gb, "GB\n") cat(" Recommended File Size:", results$estimation$recommended_filesize_gb, "GB\n") cat(" Number of Files:", results$estimation$recommended_num_files, "\n\n") cat("PERFORMANCE CONSIDERATIONS:\n") cat(" Minimum I/O Bandwidth Required: 500 MB/s\n") cat(" Recommended Start: Off-peak hours (Friday 20:00 or later)\n") cat(" Buffer Time: +50% (", round(results$estimation$estimated_minutes * 1.5, 1), " minutes total)\n\n") cat("REPORT GENERATED:\n") cat(" ", results$report_path, "\n\n") if (length(results$schema_stats) > 0) { sizes <- sapply(results$schema_stats, function(x) x$total_size) sizes <- sizes[!is.na(sizes)] if (length(sizes) > 0) { top_5 <- head(sort(sizes, decreasing = TRUE), 5) cat("TOP 5 LARGEST SCHEMAS:\n") for (i in 1:length(top_5)) { schema_name <- names(top_5)[i] size_gb <- round(top_5[i] / 1024, 2) objects <- results$schema_stats[[schema_name]]$total_objects cat(sprintf(" %d. %-30s %8.2f GB %8d objects\n", i, schema_name, size_gb, objects)) } cat("\n") } } rds_file <- paste0("full_db_export_16par_analysis_", format(Sys.time(), "%Y%m%d_%H%M"), ".rds") saveRDS(results, file = rds_file) cat("Analysis results saved to RDS file:", rds_file, "\n") txt_file <- paste0("full_db_export_16par_summary_", format(Sys.time(), "%Y%m%d_%H%M"), ".txt") sink(txt_file) cat("FULL DATABASE EXPORT ANALYSIS SUMMARY - 16 PARALLEL PROCESSES\n") cat("=============================================================\n\n") cat("Generated: ", format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\n") cat("Database: ", if (!is.null(results$database_info$db_name)) results$database_info$db_name$NAME[1] else "Unknown", "\n") cat("CPU Count: ", results$estimation$cpu_count, "\n") cat("Total Schemas: ", results$summary$total_schemas, "\n") cat("Total Size: ", results$summary$total_size_gb, " GB\n") cat("Total Objects: ", format(results$summary$total_objects, big.mark = ","), "\n") cat("Estimated Export Time (16 PAR): ", results$estimation$formatted_time, "\n") cat("Complexity: ", results$estimation$complexity, "\n") cat("Parallel Processes: ", results$estimation$recommended_parallelism, "\n") cat("Parallel Efficiency: ", results$estimation$parallel_efficiency * 100, "%\n") cat("Estimated Dump Size: ", results$estimation$estimated_dump_size_gb, " GB\n") cat("Recommended File Size: ", results$estimation$recommended_filesize_gb, " GB\n") cat("Number of Files: ", results$estimation$recommended_num_files, "\n\n") cat("Data Pump Command (16 Parallel):\n") cat(results$datapump_command, "\n\n") cat("PERFORMANCE REQUIREMENTS:\n") cat("- Minimum I/O Bandwidth: 500 MB/s\n") cat("- Minimum Memory: 8GB PGA_AGGREGATE_TARGET\n") cat("- Storage Space: ", round(results$estimation$estimated_dump_size_gb + results$estimation$total_size_gb * 0.15 + 5, 1), " GB\n") cat("- Recommended Schedule: Off-peak hours with 50% buffer\n") sink() cat("Text summary saved to:", txt_file, "\n") }, error = function(e) { dbDisconnect(conn) cat("Analysis failed: ", conditionMessage(e), "\n")})
# -----------------------------------------------------------------------------------# END MESSAGE# -----------------------------------------------------------------------------------cat("\n")cat(paste0(rep("=", 70), collapse = ""), "\n")cat("ANALYSIS FINISHED AT: ", format(Sys.time(), "%Y-%m-%d %H:%M:%S"),