using System.IO.Compression; using System.Text; using GitConverter.Lib.Converters; using GitConverter.Lib.Factories; using GitConverter.Lib.Models; namespace GitConverter.TestsApp.Factories { /// /// Unit tests for focusing on input-based detection /// (single-file JSON classification and archive inspection) used to resolve an appropriate /// for a given input path. /// /// /// /// Purpose /// - Verify the detector logic that chooses a converter based on the input artifact rather than /// an explicit user-supplied format option. This covers both single-file inputs (including /// JSON content inspection) and archive inputs where entries are inspected without extraction. /// /// /// Scope /// - Classification of JSON variants: GeoJSON (FeatureCollection/Feature), EsriJSON (spatialReference or geometryType), /// TopoJSON (Topology), and GeoJSON Sequence / NDJSON (array or newline-delimited JSON objects). /// - Archive detection: extension collection, KMZ detection (doc.kml), JSON voting across multiple entries, /// and strict requirement matching for multi-file formats (e.g., Shapefile .shp/.shx/.dbf, FileGDB markers). /// - Defensive validation: missing files, zero-byte files, files without extensions, corrupted/truncated inputs, and /// ambiguous or tie scenarios during voting. /// /// /// Test strategy /// - Use a lightweight FakeFactory that implements and records /// the last requested converter key. This isolates the detection logic from actual converter implementations /// (no Aspose dependencies) and lets tests assert which key the detector attempted to resolve. /// - Create temporary files and zip archives under a per-test unique temp folder (system temp) and remove them /// in to avoid leaving artifacts on the developer machine or CI agents. /// - Keep test inputs minimal but representative; for header-sniffing tests ensure content exceeds the minimum /// sniff length to exercise the heuristics. /// - Assertions focus on boolean success/failure of , /// the requested converter key captured by the fake factory, and stable substrings in the detect reason to avoid /// brittle text comparisons. /// /// /// Implementation notes & assumptions /// - Tests assume header-based JSON sniffing reads up to a fixed number of bytes and that NDJSON detection /// may parse the first non-empty line when full-document parsing fails. /// - Archive voting uses a deterministic tiebreaker preferring more specific formats (EsriJson > TopoJson > GeoJson > GeoJsonSeq). /// - The fake factory intentionally returns a dummy converter for any requested key to allow callers to proceed when detection succeeds. /// /// /// Extensibility /// - When adding tests for new formats follow the established pattern: small temp file or zip, call TryCreateForInput, /// assert the factory was asked for the expected converter key and that the detection reason contains a stable diagnostic. /// /// /// Environment & CI considerations /// - Tests are deterministic and do not require network access or private data. Integration tests that require large or private /// artifacts should be placed under the TestData folder and guarded to skip in CI when samples are absent. /// /// public class ConverterFactoryInputExtensionsTests : IDisposable { private readonly string _tmpFolder; public ConverterFactoryInputExtensionsTests() { _tmpFolder = Path.Combine(Path.GetTempPath(), "GitConverter.Tests", Guid.NewGuid().ToString("N")); Directory.CreateDirectory(_tmpFolder); } public void Dispose() { try { if (Directory.Exists(_tmpFolder)) Directory.Delete(_tmpFolder, true); } catch { } } private string CreateTempFile(string extension, string content) { var path = Path.Combine(_tmpFolder, Guid.NewGuid().ToString("N") + extension); File.WriteAllText(path, content ?? string.Empty, Encoding.UTF8); return path; } private string CreateTempFileNoExtension(string content) { var path = Path.Combine(_tmpFolder, Guid.NewGuid().ToString("N")); File.WriteAllText(path, content ?? string.Empty, Encoding.UTF8); return path; } private string CreateZeroByteFile() { var path = Path.Combine(_tmpFolder, Guid.NewGuid().ToString("N")); File.WriteAllBytes(path, Array.Empty()); return path; } private string CreateZipWithEntries(params (string Name, string Content)[] entries) { var zipPath = Path.Combine(_tmpFolder, Guid.NewGuid().ToString("N") + ".zip"); using (var fs = File.Create(zipPath)) using (var za = new ZipArchive(fs, ZipArchiveMode.Create, leaveOpen: false)) { foreach (var e in entries) { var ze = za.CreateEntry(e.Name, CompressionLevel.Fastest); using (var s = ze.Open()) using (var sw = new StreamWriter(s, Encoding.UTF8)) { sw.Write(e.Content ?? string.Empty); } } } return zipPath; } private class FakeFactory : IConverterFactory { public string? LastRequestedKey { get; private set; } public IConverter? CreatedConverter { get; private set; } public IConverter Create(string formatOption) => throw new KeyNotFoundException(); public bool TryCreate(string formatOption, out IConverter converter) { LastRequestedKey = formatOption; converter = new DummyConverter(formatOption); CreatedConverter = converter; return true; } public System.Collections.Generic.IReadOnlyCollection GetSupportedOptions() => new string[0]; } private class DummyConverter : IConverter { public string Option { get; } public DummyConverter(string option) { Option = option; } public ConversionResult Convert(string gisInputFilePath, string gisSourceFormatOption, string gisTargetFormatOption, string outputFolderPath, string tempFolderPath) { return ConversionResult.Success("ok"); } } /// /// Verify that files with explicit .geojson extension map to the GeoJson converter key. /// /// /// Purpose: ensure extension-based fast-path mapping works for .geojson files. /// Behavior: TryCreateForInput should return true, request the "GeoJson" converter and provide a mapping reason. /// [Fact(DisplayName = "Explicit .geojson extension maps to GeoJson converter")] public void GeoJson_Extension_Mapped() { var f = new FakeFactory(); var file = CreateTempFile(".geojson", "{ \"type\": \"FeatureCollection\", \"features\": [] }"); var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok); Assert.NotNull(conv); Assert.Equal("GeoJson", f.LastRequestedKey, ignoreCase: true); // Ensure reason is present and contains an indication that extension mapping occurred. Assert.False(string.IsNullOrWhiteSpace(reason), "Expected non-empty detection reason."); Assert.Contains("Mapped extension", reason, StringComparison.OrdinalIgnoreCase); } /// /// Verify that files with explicit .esrijson extension map to the EsriJson converter key. /// /// /// Purpose: ensure extension-based fast-path mapping works for .esrijson. /// Behavior: TryCreateForInput should return true and request "EsriJson". /// [Fact(DisplayName = "Explicit .esrijson extension maps to EsriJson converter")] public void EsriJson_Extension_Mapped() { var f = new FakeFactory(); var file = CreateTempFile(".esrijson", "{ \"spatialReference\": { \"wkid\": 4326 } }"); var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok); Assert.NotNull(conv); Assert.Equal("EsriJson", f.LastRequestedKey, ignoreCase: true); Assert.Contains("Mapped extension", reason, StringComparison.OrdinalIgnoreCase); } /// /// Ensure FeatureCollection JSON (single-file) is detected as GeoJson. /// /// /// Purpose: validate JSON content-based detection for a typical GeoJSON structure. /// Behavior: TryCreateForInput should detect GeoJson and request that converter. /// [Fact(DisplayName = "Generic .json with FeatureCollection detected as GeoJson")] public void Json_FeatureCollection_Detected_As_GeoJson() { var f = new FakeFactory(); var file = CreateTempFile(".json", "{ \"type\": \"FeatureCollection\", \"features\": [] }"); var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok); Assert.NotNull(conv); Assert.Equal("GeoJson", f.LastRequestedKey, ignoreCase: true); Assert.Contains("Detected JSON format", reason, StringComparison.OrdinalIgnoreCase); } /// /// Ensure JSON containing spatialReference is detected as EsriJson. /// /// /// Purpose: validate EsriJSON fingerprint detection using "spatialReference". /// Behavior: TryCreateForInput should detect EsriJson and request that converter. /// [Fact(DisplayName = "Generic .json with spatialReference detected as EsriJson")] public void Json_With_SpatialReference_Detected_As_EsriJson() { var f = new FakeFactory(); var file = CreateTempFile(".json", "{ \"spatialReference\": { \"wkid\": 3857 }, \"features\": [] }"); var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok); Assert.NotNull(conv); Assert.Equal("EsriJson", f.LastRequestedKey, ignoreCase: true); Assert.Contains("Detected JSON format", reason, StringComparison.OrdinalIgnoreCase); } /// /// Verify NDJSON (newline-delimited JSON) detection for .json files containing multiple JSON objects separated by newlines. /// /// /// Purpose: ensure NDJSON / GeoJSON sequence detection works when multiple JSON-like lines are present. /// Behavior: TryCreateForInput should detect GeoJsonSeq and request that converter key. /// [Fact(DisplayName = "NDJSON (.json with multiple JSON lines) detected as GeoJsonSeq")] public void Json_Ndjson_Detected_As_GeoJsonSeq() { var f = new FakeFactory(); var content = "{\"type\":\"Feature\",\"properties\":{}}\n{\"type\":\"Feature\",\"properties\":{}}\n"; var file = CreateTempFile(".json", content); var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok); Assert.NotNull(conv); Assert.Equal("GeoJsonSeq", f.LastRequestedKey, ignoreCase: true); // The detector reason can come from the JsonFormatDetector or from header sniffing. // Assert on the stable detected key instead of a specific phrase like "NDJSON". Assert.Contains("GeoJsonSeq", reason, StringComparison.OrdinalIgnoreCase); } /// /// Verify TopoJSON fingerprint detection from a .json header. /// /// /// Purpose: ensure TopoJSON detection via presence of topology-related keywords. /// Behavior: TryCreateForInput should request "TopoJson". /// [Fact(DisplayName = "TopoJSON fingerprint detected from .json header")] public void Json_TopoJson_Detected_As_TopoJson() { var f = new FakeFactory(); var file = CreateTempFile(".json", "{ \"type\": \"Topology\", \"topology\": {} }"); var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok); Assert.NotNull(conv); Assert.Equal("TopoJson", f.LastRequestedKey, ignoreCase: true); Assert.Contains("Detected JSON format", reason, StringComparison.OrdinalIgnoreCase); } /// /// Empty or zero-byte files are rejected by the detector. /// /// /// Purpose: /// - Validate that the detection code performs a file-size check and fails early for zero-byte files. /// /// Expected behavior: /// - TryCreateForInput returns false, converter is null, and detectReason contains an explanatory phrase. /// [Fact(DisplayName = "Empty (zero-byte) file is rejected")] public void EmptyFile_IsRejected() { var f = new FakeFactory(); var file = CreateZeroByteFile(); var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.False(ok); Assert.Null(conv); Assert.Contains("file is empty", reason, StringComparison.OrdinalIgnoreCase); } /// /// Files without extensions are rejected by the detector. /// /// /// Purpose: /// - Ensure that inputs lacking an extension do not silently proceed to ambiguous detection. /// /// Expected behavior: /// - TryCreateForInput returns false and the detectReason indicates a missing extension. /// [Fact(DisplayName = "File without extension is rejected")] public void FileWithoutExtension_IsRejected() { var f = new FakeFactory(); var file = CreateTempFileNoExtension("{ \"type\": \"FeatureCollection\" }"); var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.False(ok); Assert.Null(conv); Assert.Contains("no extension", reason, StringComparison.OrdinalIgnoreCase); } /// /// Archives containing only unknown/minimal JSON entries should fail detection (no winner). /// /// /// Purpose: /// - Validate archive JSON voting behavior when entries lack distinguishing fingerprints. /// /// Expected behavior: /// - TryCreateForInput returns false and detectReason indicates no classification or ambiguity. /// [Fact(DisplayName = "Archive with only unknown JSON entries fails detection")] public void Archive_With_UnknownJsonEntries_Fails() { var f = new FakeFactory(); // small JSON blobs that lack distinguishing fingerprints var zip = CreateZipWithEntries( ("a.json", "{ \"foo\": \"bar\" }"), ("b.json", "{ \"x\": 1 }") ); var ok = f.TryCreateForInput(zip, out var conv, out var detectedSourceFormat, out var reason); Assert.False(ok); Assert.Null(conv); Assert.True(reason != null && (reason.IndexOf("no json entries", StringComparison.OrdinalIgnoreCase) >= 0 || reason.IndexOf("ambiguous", StringComparison.OrdinalIgnoreCase) >= 0), $"Unexpected detectReason: {reason}"); } /// /// When JSON voting produces a tie the deterministic tiebreaker is applied; EsriJson is preferred over GeoJson. /// /// /// Purpose: /// - Ensure the tie-resolution logic is deterministic and favors more specific formats. /// /// Test construction: /// - Create a zip with two JSON entries: one that fingerprints as EsriJson and one as GeoJson. /// - Both entries are padded to exceed the header-sniffing minimum so heuristics can match. /// /// Expected behavior: /// - TryCreateForInput returns true and the factory was asked for "EsriJson". /// [Fact(DisplayName = "Archive with tied JSON votes uses tiebreaker (EsriJson preferred)")] public void Archive_TiedJsonVotes_Tiebreaker() { var f = new FakeFactory(); string MakeLarge(string marker) { var sb = new StringBuilder(); sb.Append("{"); sb.Append($"\"{marker}\": true,"); while (sb.Length < 700) { sb.Append("\"padding\":\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\","); } sb.Append("\"end\":true}"); return sb.ToString(); } var esriJsonContent = MakeLarge("spatialReference"); // Esri marker var geoJsonContent = MakeLarge("FeatureCollection"); // GeoJSON marker var zip = CreateZipWithEntries( ("esri.json", esriJsonContent), ("geo.json", geoJsonContent) ); var ok = f.TryCreateForInput(zip, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok, $"Expected detection to succeed via tiebreaker; reason: {reason}"); Assert.NotNull(conv); Assert.Equal("EsriJson", f.LastRequestedKey, ignoreCase: true); } /// /// Corrupted or truncated JSON files should not be misclassified. /// /// /// Purpose: /// - Provide a syntactically truncated JSON and confirm detection fails gracefully. /// /// Expected behavior: /// - TryCreateForInput returns false and detectReason indicates inability to determine JSON format. /// [Fact(DisplayName = "Corrupted / truncated JSON file is rejected")] public void Corrupted_Truncated_Json_IsRejected() { var f = new FakeFactory(); var file = CreateTempFile(".json", "{ \"type\": \"FeatureCollection\", \"features\": [ { \"type\": \"Feature\" "); // truncated var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.False(ok); Assert.Null(conv); Assert.Contains("could not be determined", reason, StringComparison.OrdinalIgnoreCase); } /// /// Minified (no-whitespace) but sufficiently large GeoJSON should still be detected as GeoJson. /// /// /// Purpose: /// - Ensure header-based substring heuristics work on minified JSON if the identifying tokens are present /// and the header length is large enough for reliable classification. /// /// Expected behavior: /// - TryCreateForInput returns true and the factory was asked for "GeoJson". /// [Fact(DisplayName = "Large minified GeoJSON (no whitespace) is classified as GeoJson")] public void Minified_Large_Json_Detected_As_GeoJson() { var f = new FakeFactory(); // Build minified featurecollection with many features to exceed the classifier threshold. var sb = new StringBuilder(); sb.Append("{\"type\":\"FeatureCollection\",\"features\":["); for (int i = 0; i < 40; i++) { sb.Append("{\"type\":\"Feature\",\"geometry\":{\"type\":\"Point\",\"coordinates\":["); sb.Append(i); sb.Append(","); sb.Append(i + 0.1); sb.Append("]},\"properties\":{\"id\":"); sb.Append(i); sb.Append("}}"); if (i < 39) sb.Append(","); } sb.Append("]}"); var minified = sb.ToString(); var file = CreateTempFile(".json", minified); var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok, $"Expected minified large GeoJSON to be detected. Reason: {reason}"); Assert.NotNull(conv); Assert.Equal("GeoJson", f.LastRequestedKey, ignoreCase: true); } [Fact(DisplayName = "KMZ detection by outer extension and doc.kml inside archive")] public void Kmz_Detected_By_OuterExtension_And_DocKml() { var f = new FakeFactory(); // Create a zip that contains doc.kml and then move/rename it to .kmz so the outer extension is .kmz var zip = CreateZipWithEntries(("doc.kml", "")); var kmzPath = Path.ChangeExtension(zip, ".kmz"); File.Copy(zip, kmzPath); var ok = f.TryCreateForInput(kmzPath, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok); Assert.NotNull(conv); Assert.Equal("Kmz", f.LastRequestedKey, ignoreCase: true); // Also test that a regular .zip containing doc.kml is treated as KMZ by content var zip2 = CreateZipWithEntries(("doc.kml", "")); var ok2 = f.TryCreateForInput(zip2, out var conv2, out var detected2, out var reason2); Assert.True(ok2); Assert.NotNull(conv2); Assert.Equal("Kmz", f.LastRequestedKey, ignoreCase: true); } [Fact(DisplayName = ".ndjson and .jsonl extensions map to GeoJsonSeq")] public void Ndjson_Extensions_Mapped_To_GeoJsonSeq() { var f = new FakeFactory(); var ndjson = CreateTempFile(".ndjson", "{ \"type\": \"Feature\" }\n{ \"type\": \"Feature\" }\n"); var ok = f.TryCreateForInput(ndjson, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok); Assert.Equal("GeoJsonSeq", f.LastRequestedKey, ignoreCase: true); var jsonl = CreateTempFile(".jsonl", "{ \"type\": \"Feature\" }\n{ \"type\": \"Feature\" }\n"); var ok2 = f.TryCreateForInput(jsonl, out var conv2, out var detected2, out var reason2); Assert.True(ok2); Assert.Equal("GeoJsonSeq", f.LastRequestedKey, ignoreCase: true); } [Fact(DisplayName = "Archive shapefile components detected as Shapefile")] public void Archive_Shapefile_Detected_As_Shapefile() { var f = new FakeFactory(); var zip = CreateZipWithEntries(("a.shp", "shp"), ("a.shx", "shx"), ("a.dbf", "dbf")); var ok = f.TryCreateForInput(zip, out var conv, out var detectedSourceFormat, out var reason); Assert.True(ok); Assert.NotNull(conv); Assert.Equal("Shapefile", f.LastRequestedKey, ignoreCase: true); } [Fact(DisplayName = "TryCreateForInput overload without diagnostics returns converter")] public void TryCreateForInput_Overload_NoDiagnostics() { var f = new FakeFactory(); var file = CreateTempFile(".geojson", "{ \"type\": \"FeatureCollection\", \"features\": [] }"); // Use the lightweight overload that doesn't return detectedSourceFormat/reason var simpleOk = ConverterFactoryInputExtensions.TryCreateForInput(f, file, out var convSimple); Assert.True(simpleOk); Assert.NotNull(convSimple); Assert.Equal("GeoJson", f.LastRequestedKey, ignoreCase: true); } } }