using Aspose.Gis;
using Aspose.Gis.Formats.Gml;
using GisConverter.Lib.Models;
using GisConverter.Lib.Logging;
using SharpCompress.Archives;
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
namespace GisConverter.Lib.Converters
{
///
/// Utility helpers used by individual converter implementations.
///
///
///
/// Purpose
/// - ConverterUtils centralizes reusable, testable helper logic required by converters and the
/// orchestration layer (for example UniversalGisConverter). Responsibilities include:
/// - Validation of input/output/temp paths.
/// - Creation and writability probes of directories.
/// - Archive inspection and safe extraction (with zip‑slip protection).
/// - Mapping canonical conversion option keys to Aspose drivers.
/// - Construction of driver-specific , deterministic output filenames,
/// and lightweight cleanup helpers.
///
///
///
/// Design principles
/// - Methods are intentionally conservative and best‑effort: validation helpers return
/// objects for expected error conditions rather than throwing. Callers should treat non‑null results as
/// fatal to the current conversion attempt.
/// - Helpers prefer to log diagnostic details (via the Log façade) and return concise messages suitable
/// for display in UIs or assertions in tests.
/// - Keep the surface stateless and easy to unit test. File system side effects are limited to methods that
/// explicitly create, probe or delete directories/files.
///
///
///
/// Thread-safety
/// - Most methods are stateless and safe to call concurrently provided the caller supplies distinct paths
/// for concurrent runs. The helper uses the shared Log facility for diagnostics; ensure the configured
/// logger is thread-safe when running tests in parallel.
///
///
///
/// Error handling policy
/// - Expected, user‑facing errors (invalid args, missing files, permission problems, archive extraction failures)
/// are surfaced as values or as null to indicate success
/// per the method contract. Unexpected exceptions are caught, logged, and translated to failure results
/// where reasonable.
/// - Helpers avoid throwing for common failures to simplify caller code and to allow callers (or tests) to assert
/// on stable error tokens instead of handling exceptions.
///
///
///
/// Logging guidance
/// - Use log tokens that are stable and compact (for example "PrepareSourceFile: ...", "BuildOutputPath: ...")
/// so tests and maintainers can reliably assert on presence/absence of important lifecycle events.
/// - Helpers emit appropriate levels:
/// - Debug for internal traces and successful checkpoints.
/// - Info for significant lifecycle events (computed output path, extracted source path).
/// - Warn for recoverable but noteworthy conditions (fallbacks).
/// - Error for validation or extraction failures.
///
///
///
/// Archive handling & safety
/// - Archive inspection uses SharpCompress for broad format support with a ZipArchive fallback.
/// - Extraction implements a zip‑slip guard by comparing normalized absolute paths against the intended
/// extraction root. Entries that would extract outside the temp folder are skipped and reported.
/// - After extraction the helpers verify required companion files for multi-file formats (for example shapefile
/// components: .shp/.shx/.dbf). If required components are missing the extraction is treated as failed.
///
///
///
/// Detection & driver mapping
/// - is a heuristic based on common archive suffixes. Callers must still handle
/// failures opening archives.
/// - maps canonical option keys to Aspose drivers. Extend this mapping
/// when adding new supported formats; tests should assert the factory and mapping behavior.
///
///
///
/// Testing guidance
/// - Keep unit tests small and deterministic:
/// - Inject small temporary folders (GUID‑based roots) and remove them in test teardown.
/// - Use and with small test archives created
/// via ZipArchive or ZipFile.CreateFromDirectory.
/// - For error assertions prefer case‑insensitive substring checks on returned
/// and on captured log tokens (via TestLogger and LogAssert helpers).
/// - When adding or changing detection heuristics, add unit tests exercising the boundary conditions (BOMs,
/// truncated JSON, minified JSON, NDJSON lines, archive voting).
///
///
///
/// Performance considerations
/// - Helpers perform streaming copy when extracting large entries; where possible prefer bounded reading rather than
/// loading entire files into memory.
/// - The writability probe creates and deletes a tiny file; keep that probe cheap to avoid costs in high‑volume scenarios.
///
///
///
/// Security considerations
/// - Treat untrusted archives conservatively: validate entry target paths and do not write files outside the intended
/// extraction root. Avoid executing or opening unknown file types beyond what is needed for detection.
/// - Sanitize names used to create files and limit resource usage to avoid DoS risks on shared CI agents.
///
///
///
/// Extensibility & maintenance
/// - Add new driver mappings in and update tests and documentation.
/// - If new formats require additional required companion files, update .
/// - Keep messages concise and stable; if wording must change, update tests to assert on stable tokens rather than full messages.
///
///
///
/// Example patterns
/// - Use before calling and .
/// - After extraction call to ensure all required components exist.
///
///
public static class ConverterUtils
{
///
/// Validate that the input, output and temporary paths are sensical for a conversion run.
///
/// Path to the GIS input. May be a file or an archive.
/// Target output folder path where produced files will be written.
/// Temporary working folder used for archive extraction and intermediates.
///
/// Returns a describing the error when validation fails; returns
/// null when validation succeeds. Callers should treat non-null returns as fatal for the
/// current conversion attempt and surface the contained message to the caller.
///
///
///
/// The method performs lightweight checks:
/// - Ensures non-empty strings for required paths.
/// - Verifies that the input path either exists as a file or directory.
/// - Ensures the output and temp paths are not files (i.e., they must be directories or not yet exist).
///
///
/// This method intentionally does not attempt to create the output or temp folders; callers should
/// invoke when they intend to create and verify
/// write permissions for those folders.
///
///
/// The method logs at Error level for failing conditions and at Debug on success.
///
///
public static ConversionResult ValidateInputs(string gisInputFilePath, string outputFilePath, string tempFolderPath)
{
// Validate input path
if (string.IsNullOrWhiteSpace(gisInputFilePath))
{
Log.Error("ValidateInputs: Input path is null or empty.");
return ConversionResult.Failure("Input path is required.");
}
if (!File.Exists(gisInputFilePath) && !Directory.Exists(gisInputFilePath))
{
Log.Error($"ValidateInputs: Input path does not exist: '{gisInputFilePath}'.");
return ConversionResult.Failure($"Input path does not exist: '{gisInputFilePath}'.");
}
// Validate output path
if (string.IsNullOrWhiteSpace(outputFilePath))
{
Log.Error("ValidateInputs: Output path is null or empty.");
return ConversionResult.Failure("Output path is required.");
}
// If an output path exists and is a file -> error
if (File.Exists(outputFilePath) && !Directory.Exists(outputFilePath))
{
Log.Error($"ValidateInputs: Output path '{outputFilePath}' is a file, not a directory.");
return ConversionResult.Failure($"Cannot use output path '{outputFilePath}' because is a file, not a directory.");
}
// Validate temp folder path
if (string.IsNullOrWhiteSpace(tempFolderPath))
{
Log.Error("ValidateInputs: Temporary folder path is null or empty.");
return ConversionResult.Failure("Temporary folder path is required.");
}
if (File.Exists(tempFolderPath) && !Directory.Exists(tempFolderPath))
{
Log.Error($"ValidateInputs: Temp path '{tempFolderPath}' is a file, not a directory.");
return ConversionResult.Failure($"Cannot use temp path '{tempFolderPath}' because is a file, not a directory.");
}
Log.Debug("ValidateInputs: Path validations succeeded.");
return null;
}
///
/// Ensure output and temp directories exist and are writable.
///
/// Desired output folder path.
/// Desired temporary folder path.
/// Null on success or a describing the failure.
///
/// This helper attempts to create each directory if it does not already exist and performs a
/// lightweight writability probe by creating and deleting a small temporary file. Any failure
/// during creation or the writability probe is returned as a failure result so callers can abort
/// early and present a meaningful message to the user.
///
public static ConversionResult PreparePaths(string outputFilePath, string tempFolderPath)
{
Log.Debug($"PreparePaths: ensuring output folder exists and is writable: '{outputFilePath}'");
// Prepare output path
var outPrep = TryCreateAndVerifyDirectory(outputFilePath);
if (outPrep != null)
{
Log.Warn($"PreparePaths: failed to prepare output folder '{outputFilePath}': {outPrep.Message}");
return outPrep;
}
// Prepare temp folder path
var tempPrep = TryCreateAndVerifyDirectory(tempFolderPath);
if (tempPrep != null) return tempPrep;
Log.Debug("PreparePaths: Preparation paths succeeded.");
return null;
}
///
/// Heuristic: returns true when the path looks like a common archive file by extension.
///
/// Path or file name to inspect.
/// True when the path ends with a recognized archive extension (case-insensitive).
///
/// The method uses a combined list of composite suffixes (e.g. ".tar.gz") and common single
/// extensions (e.g. ".zip", ".7z"). This is only a heuristic — files without an archive
/// extension may still be archives and archives with unusual extensions will not be detected.
/// Callers should handle failures from archive opening operations regardless of this method's
/// result.
///
public static bool IsArchiveFile(string path)
{
if (string.IsNullOrWhiteSpace(path)) return false;
var lower = path.Trim().ToLowerInvariant();
// Composite extensions - check first
var compositeSuffixes = new[] { ".tar.gz", ".tar.bz2", ".tar.xz" };
foreach (var suf in compositeSuffixes)
{
if (lower.EndsWith(suf, StringComparison.OrdinalIgnoreCase)) return true;
}
var extensions = new[]
{
".zip", ".kmz", ".tar", ".tgz", ".gz", ".bz2", ".xz", ".7z", ".rar"
};
var ext = Path.GetExtension(lower);
if (string.IsNullOrEmpty(ext)) return false;
return extensions.Contains(ext, StringComparer.OrdinalIgnoreCase);
}
///
/// Read entry names from an archive without extracting.
/// Returns null when the archive cannot be inspected.
///
/// Path to the archive file.
/// A list of entry names (paths inside the archive) or null when the archive could not be read.
///
/// The method prefers SharpCompress for broad format support and falls back to
/// when SharpCompress fails. Any failures are logged. This method
/// intentionally swallows exceptions and returns null to indicate the caller should treat
/// the archive as unreadable.
///
public static IEnumerable TryListArchiveEntries(string archivePath)
{
if (string.IsNullOrWhiteSpace(archivePath))
{
Log.Warn("TryListArchiveEntries: archivePath is null or empty.");
return null;
}
if (!File.Exists(archivePath))
{
Log.Warn($"TryListArchiveEntries: archive file not found: '{archivePath}'.");
return null;
}
// Prefer SharpCompress for broad archive support.
try
{
using (var archive = ArchiveFactory.Open(archivePath))
{
var list = new List();
foreach (var entry in archive.Entries)
{
try
{
if (entry == null) continue;
if (entry.IsDirectory) continue;
var name = entry.Key;
if (!string.IsNullOrEmpty(name))
list.Add(name);
}
catch (Exception inner)
{
Log.Debug($"TryListArchiveEntries: skipped entry due to error: {inner.Message}");
}
}
return list;
}
}
catch (Exception ex)
{
Log.Debug($"TryListArchiveEntries: SharpCompress failed to open '{archivePath}': {ex.Message}. Attempting Zip fallback.");
try
{
using (var stream = File.OpenRead(archivePath))
using (var za = new ZipArchive(stream, ZipArchiveMode.Read, leaveOpen: false))
{
return za.Entries.Select(e => e.FullName).ToList();
}
}
catch (Exception zipEx)
{
Log.Warn($"TryListArchiveEntries: failed to read archive '{archivePath}' with SharpCompress and ZipArchive: {zipEx.Message}");
return null;
}
}
}
///
/// Check whether the archive contains at least one entry with each of the provided extensions (case-insensitive).
/// This method now also inspects path segments for directory markers like "Name.gdb" so zipped FileGDB folders are detected.
///
/// Path to the archive file.
/// Sequence of required extensions including the leading dot (e.g. ".shp").
/// True when the archive contains entries covering all required extensions; false otherwise.
///
/// The method normalizes discovered entry extensions by inspecting both the last path segment extension as well as
/// any extension-like suffixes on intermediate path segments (useful to detect zipped folder markers such as "MyGdb.gdb").
/// The behavior is conservative: when the archive cannot be inspected (e.g. unreadable) the method returns false.
///
public static bool ArchiveContainsAllRequiredExtensions(string archivePath, IEnumerable requiredExtensions)
{
if (string.IsNullOrWhiteSpace(archivePath)) return false;
if (requiredExtensions == null) return false;
var entries = TryListArchiveEntries(archivePath);
if (entries == null) return false;
// Build a normalized set of discovered "extensions" including:
// - extension of the entry name (last segment)
// - any suffixes found on intermediate path segments (e.g. "MyGdb.gdb")
var normalized = entries
.SelectMany(e =>
{
var found = new List();
try
{
var ext = Path.GetExtension(e);
if (!string.IsNullOrEmpty(ext))
found.Add(ext.ToLowerInvariant());
var segments = e.Split(new[] { '/', '\\' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var seg in segments)
{
var idx = seg.LastIndexOf('.');
if (idx > 0 && idx < seg.Length - 1)
{
var segExt = seg.Substring(idx).ToLowerInvariant();
found.Add(segExt);
}
}
}
catch { }
return found;
})
.Where(x => !string.IsNullOrEmpty(x))
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToList();
foreach (var req in requiredExtensions)
{
if (string.IsNullOrWhiteSpace(req)) continue;
var r = req.ToLowerInvariant();
if (!normalized.Any(n => n == r)) return false;
}
return true;
}
///
/// Map a canonical conversion option name to an Aspose instance.
/// Returns null when unknown.
///
/// Canonical option string (case-insensitive) such as "Shapefile", "GeoJson".
/// An Aspose or null when the option is not recognized.
///
/// The mapping intentionally uses simple string comparisons. If the set of supported options changes
/// extend this method accordingly. Several option aliases (e.g. "kml" and "kmz") map to the same
/// Aspose driver when appropriate.
///
public static Driver ConversionOptionToDriver(string option)
{
if (string.IsNullOrWhiteSpace(option)) return null;
switch (option.Trim().ToLowerInvariant())
{
case "geojson": return Drivers.GeoJson;
case "geojsonseq": return Drivers.GeoJsonSeq;
case "esrijson": return Drivers.EsriJson;
case "gdb": return Drivers.FileGdb;
case "kml": return Drivers.Kml;
case "kmz": return Drivers.Kml;
case "shapefile": return Drivers.Shapefile;
case "topojson": return Drivers.TopoJson;
case "osm": return Drivers.OsmXml;
case "gpx": return Drivers.Gpx;
case "gml": return Drivers.Gml;
case "mapinfointerchange": return Drivers.MapInfoInterchange;
case "mapinfotab": return Drivers.MapInfoTab;
case "csv": return Drivers.Csv;
case "geopackage": return Drivers.GeoPackage;
default: return null;
}
}
///
/// Best-effort cleanup of the temp folder. This deletes the directory recursively.
/// Swallows exceptions and logs warnings.
///
/// Path to the temporary folder to remove.
///
/// This helper is idempotent and safe to call when the directory does not exist. It logs the
/// action at Debug and Info levels on success and uses Warn level when cleanup
/// fails. The method avoids throwing to simplify cleanup calls in finally blocks.
///
public static void TryCleanupTempFolder(string tempFolderPath)
{
if (string.IsNullOrWhiteSpace(tempFolderPath)) return;
try
{
if (Directory.Exists(tempFolderPath))
{
Log.Debug($"Cleaning up temp folder '{tempFolderPath}'.");
Directory.Delete(tempFolderPath, recursive: true);
Log.Info($"Temp folder '{tempFolderPath}' deleted.");
}
}
catch (Exception ex)
{
Log.Warn($"Failed to clean up temp folder '{tempFolderPath}': {ex.Message}");
}
}
///
/// Best-effort delete of a set of extracted/intermediate files.
///
/// Enumerable of file paths to delete.
///
/// The helper iterates each provided path and attempts deletion. Failures for individual files are
/// logged as warnings and do not abort the overall cleanup process. Use this helper to remove
/// intermediate artifacts without propagating exceptions.
///
public static void CleanupExtractedFiles(IEnumerable files)
{
if (files == null) return;
foreach (var f in files)
{
if (string.IsNullOrWhiteSpace(f)) continue;
try
{
if (File.Exists(f))
{
File.Delete(f);
Log.Debug($"CleanupExtractedFiles: deleted '{f}'.");
}
}
catch (Exception ex)
{
Log.Warn($"CleanupExtractedFiles: failed to delete '{f}': {ex.Message}");
}
}
}
///
/// Format a UTC timestamp for human presentation.
/// Includes both local time and canonical UTC value.
///
/// A value that will be normalized to UTC for display.
/// Formatted string containing local and UTC ISO-8601 timestamps.
///
/// The method ensures the provided is treated as UTC. It returns
/// a string containing the local-time representation (in ISO-8601 round-trip format) and the
/// canonical UTC value so log messages are unambiguous across time zones.
///
public static string FormatTimestampForDisplay(DateTime utcTimestamp)
{
if (utcTimestamp.Kind != DateTimeKind.Utc)
utcTimestamp = DateTime.SpecifyKind(utcTimestamp, DateTimeKind.Utc);
var local = utcTimestamp.ToLocalTime();
return $"{local:o} (Local) | {utcTimestamp:o} (UTC)";
}
// --- internal helpers ------------------------------------------------
///
/// Attempt to create the directory and verify writable by creating a short probe file.
/// Returns a ConversionResult.Failure on error, or null on success.
///
/// Directory path to create and probe.
/// Null on success; describing the failure otherwise.
///
/// The helper performs three responsibilities:
/// 1. Create the directory if it does not exist.
/// 2. Probe writability by creating and deleting a tiny temporary file.
/// 3. Translate common exceptions (unauthorized access, IO errors) into user-friendly
/// return values while logging details.
///
private static ConversionResult TryCreateAndVerifyDirectory(string folderPath)
{
try
{
if (!Directory.Exists(folderPath))
{
Log.Debug($"TryCreateAndVerifyDirectory: Creating folder '{folderPath}'.");
Directory.CreateDirectory(folderPath);
}
// Quick writability probe: create and delete a small temp file.
var probeFile = Path.Combine(folderPath, Path.GetRandomFileName());
try
{
using (var fs = File.Create(probeFile)) { /* zero-byte */ }
File.Delete(probeFile);
}
catch (UnauthorizedAccessException uex)
{
Log.Error($"TryCreateAndVerifyDirectory: access denied for '{folderPath}': {uex.Message}", uex);
return ConversionResult.Failure($"Access denied for folder '{folderPath}': {uex.Message}");
}
catch (IOException ioex)
{
Log.Error($"TryCreateAndVerifyDirectory: I/O error for '{folderPath}': {ioex.Message}", ioex);
return ConversionResult.Failure($"I/O error for folder '{folderPath}': {ioex.Message}");
}
catch (Exception ex)
{
Log.Error($"TryCreateAndVerifyDirectory: unexpected error for '{folderPath}': {ex.Message}", ex);
return ConversionResult.Failure($"Unable to prepare folder '{folderPath}': {ex.Message}");
}
return null;
}
catch (UnauthorizedAccessException uex)
{
Log.Error($"TryCreateAndVerifyDirectory: access denied creating '{folderPath}': {uex.Message}", uex);
return ConversionResult.Failure($"Access denied creating folder '{folderPath}': {uex.Message}");
}
catch (IOException ioex)
{
Log.Error($"TryCreateAndVerifyDirectory: I/O error creating '{folderPath}': {ioex.Message}", ioex);
return ConversionResult.Failure($"I/O error creating folder '{folderPath}': {ioex.Message}");
}
catch (Exception ex)
{
Log.Error($"TryCreateAndVerifyDirectory: failed to create '{folderPath}': {ex.Message}", ex);
return ConversionResult.Failure($"Unable to create folder '{folderPath}': {ex.Message}");
}
}
///
/// Build a destination output file path using the output folder and a target format option.
///
/// Destination folder where the output file will be placed.
/// Canonical target format option (e.g. "Shapefile").
/// Full path to the intended output file. The file is not created by this method.
///
/// The method uses a UTC timestamp to produce a collision-resistant file name and attempts to map
/// the provided to a file extension using
/// . When the option cannot be mapped the method
/// falls back to the Shapefile extension and logs a warning.
///
internal static string BuildOutputPath(string outputFolderPath, string gisTargetFormatOption)
{
var timeStamp = DateTime.UtcNow.ToString("yyyyMMdd_HHmmss");
var maybeOutFileExt = FileExtensionHelpers.FromOption(gisTargetFormatOption);
var outFileExt = maybeOutFileExt ?? FileExtension.Shapefile;
if (maybeOutFileExt == null)
Log.Warn($"BuildOutputPath: could not map option '{gisTargetFormatOption}' to a known FileExtension; falling back to '{outFileExt}'.");
var extDot = FileExtensionHelpers.ToDotExtension(outFileExt);
// Base candidate filename
var baseName = $"output_{timeStamp}";
var candidate = Path.Combine(outputFolderPath, baseName + extDot);
Log.Info($"BuildOutputPath: target output file will be '{candidate}'.");
return candidate;
}
///
/// Construct converter-specific options for Aspose conversions.
///
/// Source format option (e.g. "Gml").
/// Target format option (not currently used but present for future use).
/// A populated instance with format-specific settings applied.
///
/// Presently this method only applies GML-specific reader options (restore schema). Additional
/// source or target specific configuration should be added here as needed. The returned
/// object is a lightweight carrier of options for callers of
/// the actual converter implementations.
///
public static ConversionOptions BuildConversionOptions(string sourceFormat, string targetFormat)
{
var options = new ConversionOptions();
// Handle source-specific options
// Gml handling
// The input file/s can be with or without attribute collection schema.
// To handle both cases we need to set gmlOptions.RestoreSchema = true (it is false by default).
if (sourceFormat.Equals("Gml", StringComparison.OrdinalIgnoreCase))
{
options.SourceDriverOptions = new GmlOptions { RestoreSchema = true };
}
// Add more format-specific handling as needed
return options;
}
internal static (string sourcePath, bool wasExtracted, ConversionResult) PrepareSourceFile(string inputFilePath, string sourceFormatOption, string tempFolderPath)
{
try
{
var fileExtension = FileExtensionHelpers.FromOption(sourceFormatOption);
if (fileExtension == null)
{
Log.Warn($"PrepareSourceFile: could not map option '{sourceFormatOption}' to a known FileExtension.");
return (null, false, ConversionResult.Failure($"Could not map option '{sourceFormatOption}' to a known FileExtension."));
}
var extDot = FileExtensionHelpers.ToDotExtension((FileExtension)fileExtension);
var inputIsArchive = IsArchiveFile(inputFilePath);
if (inputIsArchive)
{
Log.Debug($"{sourceFormatOption} Converter: input detected as archive. Inspecting entries.");
var entries = TryListArchiveEntries(inputFilePath);
if (entries == null)
{
Log.Error($"PrepareSourceFile: failed to list archive entries from {inputFilePath}.");
return (null, false, ConversionResult.Failure($"Failed to inspect archive contents from {inputFilePath}."));
}
// Find first file extension entry (case-insensitive)
var fileExtensionEntry = entries.FirstOrDefault(e => string.Equals(Path.GetExtension(e), $"{extDot}", StringComparison.OrdinalIgnoreCase));
if (string.IsNullOrWhiteSpace(fileExtensionEntry))
{
Log.Error($"PrepareSourceFile: archive does not contain {extDot} entry.");
return (null, false, ConversionResult.Failure($"Archive does not contain a {extDot} entry."));
}
// Extract archive safely into tempFolderPath
try
{
Log.Debug($"PrepareSourceFile: extracting archive '{inputFilePath}' into '{tempFolderPath}'.");
var tempFull = Path.GetFullPath(tempFolderPath).TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar;
var skipped = new List();
var extractedFiles = new List();
using (var archive = ArchiveFactory.Open(inputFilePath))
{
foreach (var entry in archive.Entries.Where(en => !en.IsDirectory))
{
var entryKey = entry.Key;
if (string.IsNullOrEmpty(entryKey)) continue;
var destPath = Path.Combine(tempFolderPath, entryKey);
var destFull = Path.GetFullPath(destPath);
// zip-slip guard
if (!destFull.StartsWith(tempFull, StringComparison.OrdinalIgnoreCase))
{
Log.Warn($"PrepareSourceFile: skipping entry '{entryKey}' which would extract outside the temp folder.");
skipped.Add(entryKey);
continue;
}
try
{
Directory.CreateDirectory(Path.GetDirectoryName(destFull) ?? tempFolderPath);
using (var src = entry.OpenEntryStream())
using (var dst = File.Create(destFull))
{
src.CopyTo(dst);
}
extractedFiles.Add(destFull);
Log.Debug($"PrepareSourceFile: extracted '{entryKey}' -> '{destFull}'.");
}
catch (Exception exEntry)
{
Log.Error($"PrepareSourceFile: failed to extract '{entryKey}': {exEntry.Message}");
skipped.Add(entryKey);
}
}
}
// If any entries were skipped or failed to extract -> treat as extraction failure
if (skipped.Count > 0)
{
Log.Error($"PrepareSourceFile: some archive entries skipped or failed: {string.Join(", ", skipped)}");
return (null, false, ConversionResult.Failure($"Archive extraction failed for entries: {string.Join(", ", skipped)}"));
}
// Check for required file extensions
var extractedExts = new HashSet(StringComparer.OrdinalIgnoreCase);
foreach (var f in Directory.GetFiles(tempFolderPath, "*.*", SearchOption.AllDirectories))
extractedExts.Add(Path.GetExtension(f));
var requiredFileExtensions = GetRequiredFileExtensions(sourceFormatOption);
var stillMissing = requiredFileExtensions.Where(r => !extractedExts.Contains(r)).ToArray();
if (stillMissing.Length > 0)
{
Log.Error($"PrepareSourceFile: after extraction missing components: {string.Join(", ", stillMissing)}");
return (null, false, ConversionResult.Failure($"Archive missing required components after extraction: {string.Join(", ", stillMissing)}"));
}
// Locate extracted file with the desired extension
var found = Directory.GetFiles(tempFolderPath, $"*{extDot}", SearchOption.AllDirectories).FirstOrDefault();
if (found == null)
{
Log.Error($"PrepareSourceFile: extracted archive but no {extDot} file found.");
return (null, false, ConversionResult.Failure($"Archive extraction did not yield a {extDot} file."));
}
var sourcePath = Path.GetFullPath(found);
Log.Info($"PrepareSourceFile: extracted {sourcePath} from archive.");
return (found, true, ConversionResult.Success($"Archive extraction successful: {sourcePath}"));
}
catch (Exception ex)
{
Log.Error($"PrepareSourceFile: extraction failed: {ex.Message}", ex);
var requiredExts = GetRequiredFileExtensions(sourceFormatOption);
return (null, false, ConversionResult.Failure($"Failed to extract required {string.Join(", ", requiredExts)} files from archive into {tempFolderPath}. Message: {ex.Message}"));
}
}
else
{
// Single-file path must include the format extension and exist
var ext = Path.GetExtension(inputFilePath) ?? string.Empty;
if (!string.Equals(ext, $"{extDot}", StringComparison.OrdinalIgnoreCase))
{
Log.Error($"PrepareSourceFile: input '{inputFilePath}' is not a {extDot} file.");
return (null, false, ConversionResult.Failure($"{fileExtension.ToString().ToUpper()} requires a {extDot} file."));
}
var sourcePath = Path.GetFullPath(inputFilePath);
Log.Info($"PrepareSourceFile: using input {fileExtension.ToString().ToUpperInvariant()} '{sourcePath}'.");
return (inputFilePath, false, ConversionResult.Success($"Using input {fileExtension.ToString().ToUpperInvariant()} '{sourcePath}'. "));
}
}
catch (Exception ex)
{
Log.Error($"PrepareSourceFile: prepare source file from {inputFilePath} and source format option: {sourceFormatOption} into temp folder: {tempFolderPath} failed. Error message: {ex.Message}", ex);
return (null, false, ConversionResult.Failure($"Prepare source file from {inputFilePath} and source format option: {sourceFormatOption} into temp folder: {tempFolderPath} failed: Error message: {ex.Message}"));
}
}
///
/// Returns the list of required file extensions for a given source format option.
/// Each extension includes the leading dot and is returned in lower-case.
/// Returns an empty collection when no strict requirements exist for the format.
///
/// Canonical source format option (e.g. "Shapefile").
/// Read-only collection of required extensions (e.g. ".shp", ".shx", ".dbf") or empty collection.
///
/// The mapping is intentionally conservative and only includes extensions required by the
/// format's on-disk representation. Update this method when supporting additional formats or
/// when the set of required components evolves.
///
public static IReadOnlyCollection GetRequiredFileExtensions(string sourceFormatOption)
{
if (string.IsNullOrWhiteSpace(sourceFormatOption))
return Array.Empty();
switch (sourceFormatOption.Trim().ToLowerInvariant())
{
case "csv":
return new[] { ".csv" };
case "esrijson":
return new[] { ".json", ".esrijson" };
case "gdb":
return new[] { ".gdb", ".gdbtable", ".gdbtablx" };
case "geojson":
return new[] { ".json", ".geojson" };
case "geojsonseq":
return new[] { ".json", ".jsonl", ".ndjson" };
case "geopackage":
return new[] { ".gpkg" };
case "gml":
return new[] { ".gml" };
case "gpx":
return new[] { ".gpx" };
case "kml":
return new[] { ".kml" };
case "kmz":
return new[] { ".kmz" };
case "mapinfointerchange":
return new[] { ".mif" };
case "mapinfotab":
return new[] { ".tab", ".map", ".dat", ".id" };
case "osm":
return new[] { ".osm" };
case "shapefile":
return new[] { ".shp", ".shx", ".dbf" };
case "topojson":
return new[] { ".json", ".topojson" };
default:
return Array.Empty();
}
}
}
}