using System.IO.Compression;
using System.Text;
using GitConverter.Lib.Converters;
using GitConverter.Lib.Factories;
using GitConverter.Lib.Models;
namespace GitConverter.TestsApp.Factories
{
///
/// Unit tests for focusing on input-based detection
/// (single-file JSON classification and archive inspection) used to resolve an appropriate
/// for a given input path.
///
///
///
/// Purpose
/// - Verify the detector logic that chooses a converter based on the input artifact rather than
/// an explicit user-supplied format option. This covers both single-file inputs (including
/// JSON content inspection) and archive inputs where entries are inspected without extraction.
///
///
/// Scope
/// - Classification of JSON variants: GeoJSON (FeatureCollection/Feature), EsriJSON (spatialReference or geometryType),
/// TopoJSON (Topology), and GeoJSON Sequence / NDJSON (array or newline-delimited JSON objects).
/// - Archive detection: extension collection, KMZ detection (doc.kml), JSON voting across multiple entries,
/// and strict requirement matching for multi-file formats (e.g., Shapefile .shp/.shx/.dbf, FileGDB markers).
/// - Defensive validation: missing files, zero-byte files, files without extensions, corrupted/truncated inputs, and
/// ambiguous or tie scenarios during voting.
///
///
/// Test strategy
/// - Use a lightweight FakeFactory that implements and records
/// the last requested converter key. This isolates the detection logic from actual converter implementations
/// (no Aspose dependencies) and lets tests assert which key the detector attempted to resolve.
/// - Create temporary files and zip archives under a per-test unique temp folder (system temp) and remove them
/// in to avoid leaving artifacts on the developer machine or CI agents.
/// - Keep test inputs minimal but representative; for header-sniffing tests ensure content exceeds the minimum
/// sniff length to exercise the heuristics.
/// - Assertions focus on boolean success/failure of ,
/// the requested converter key captured by the fake factory, and stable substrings in the detect reason to avoid
/// brittle text comparisons.
///
///
/// Implementation notes & assumptions
/// - Tests assume header-based JSON sniffing reads up to a fixed number of bytes and that NDJSON detection
/// may parse the first non-empty line when full-document parsing fails.
/// - Archive voting uses a deterministic tiebreaker preferring more specific formats (EsriJson > TopoJson > GeoJson > GeoJsonSeq).
/// - The fake factory intentionally returns a dummy converter for any requested key to allow callers to proceed when detection succeeds.
///
///
/// Extensibility
/// - When adding tests for new formats follow the established pattern: small temp file or zip, call TryCreateForInput,
/// assert the factory was asked for the expected converter key and that the detection reason contains a stable diagnostic.
///
///
/// Environment & CI considerations
/// - Tests are deterministic and do not require network access or private data. Integration tests that require large or private
/// artifacts should be placed under the TestData folder and guarded to skip in CI when samples are absent.
///
///
public class ConverterFactoryInputExtensionsTests : IDisposable
{
private readonly string _tmpFolder;
public ConverterFactoryInputExtensionsTests()
{
_tmpFolder = Path.Combine(Path.GetTempPath(), "GitConverter.Tests", Guid.NewGuid().ToString("N"));
Directory.CreateDirectory(_tmpFolder);
}
public void Dispose()
{
try { if (Directory.Exists(_tmpFolder)) Directory.Delete(_tmpFolder, true); } catch { }
}
private string CreateTempFile(string extension, string content)
{
var path = Path.Combine(_tmpFolder, Guid.NewGuid().ToString("N") + extension);
File.WriteAllText(path, content ?? string.Empty, Encoding.UTF8);
return path;
}
private string CreateTempFileNoExtension(string content)
{
var path = Path.Combine(_tmpFolder, Guid.NewGuid().ToString("N"));
File.WriteAllText(path, content ?? string.Empty, Encoding.UTF8);
return path;
}
private string CreateZeroByteFile()
{
var path = Path.Combine(_tmpFolder, Guid.NewGuid().ToString("N"));
File.WriteAllBytes(path, Array.Empty());
return path;
}
private string CreateZipWithEntries(params (string Name, string Content)[] entries)
{
var zipPath = Path.Combine(_tmpFolder, Guid.NewGuid().ToString("N") + ".zip");
using (var fs = File.Create(zipPath))
using (var za = new ZipArchive(fs, ZipArchiveMode.Create, leaveOpen: false))
{
foreach (var e in entries)
{
var ze = za.CreateEntry(e.Name, CompressionLevel.Fastest);
using (var s = ze.Open())
using (var sw = new StreamWriter(s, Encoding.UTF8))
{
sw.Write(e.Content ?? string.Empty);
}
}
}
return zipPath;
}
private class FakeFactory : IConverterFactory
{
public string? LastRequestedKey { get; private set; }
public IConverter? CreatedConverter { get; private set; }
public IConverter Create(string formatOption) => throw new KeyNotFoundException();
public bool TryCreate(string formatOption, out IConverter converter)
{
LastRequestedKey = formatOption;
converter = new DummyConverter(formatOption);
CreatedConverter = converter;
return true;
}
public System.Collections.Generic.IReadOnlyCollection GetSupportedOptions() =>
new string[0];
}
private class DummyConverter : IConverter
{
public string Option { get; }
public DummyConverter(string option) { Option = option; }
public ConversionResult Convert(string gisInputFilePath, string gisSourceFormatOption, string gisTargetFormatOption, string outputFolderPath, string tempFolderPath)
{
return ConversionResult.Success("ok");
}
}
///
/// Verify that files with explicit .geojson extension map to the GeoJson converter key.
///
///
/// Purpose: ensure extension-based fast-path mapping works for .geojson files.
/// Behavior: TryCreateForInput should return true, request the "GeoJson" converter and provide a mapping reason.
///
[Fact(DisplayName = "Explicit .geojson extension maps to GeoJson converter")]
public void GeoJson_Extension_Mapped()
{
var f = new FakeFactory();
var file = CreateTempFile(".geojson", "{ \"type\": \"FeatureCollection\", \"features\": [] }");
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok);
Assert.NotNull(conv);
Assert.Equal("GeoJson", f.LastRequestedKey, ignoreCase: true);
// Ensure reason is present and contains an indication that extension mapping occurred.
Assert.False(string.IsNullOrWhiteSpace(reason), "Expected non-empty detection reason.");
Assert.Contains("Mapped extension", reason, StringComparison.OrdinalIgnoreCase);
}
///
/// Verify that files with explicit .esrijson extension map to the EsriJson converter key.
///
///
/// Purpose: ensure extension-based fast-path mapping works for .esrijson.
/// Behavior: TryCreateForInput should return true and request "EsriJson".
///
[Fact(DisplayName = "Explicit .esrijson extension maps to EsriJson converter")]
public void EsriJson_Extension_Mapped()
{
var f = new FakeFactory();
var file = CreateTempFile(".esrijson", "{ \"spatialReference\": { \"wkid\": 4326 } }");
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok);
Assert.NotNull(conv);
Assert.Equal("EsriJson", f.LastRequestedKey, ignoreCase: true);
Assert.Contains("Mapped extension", reason, StringComparison.OrdinalIgnoreCase);
}
///
/// Ensure FeatureCollection JSON (single-file) is detected as GeoJson.
///
///
/// Purpose: validate JSON content-based detection for a typical GeoJSON structure.
/// Behavior: TryCreateForInput should detect GeoJson and request that converter.
///
[Fact(DisplayName = "Generic .json with FeatureCollection detected as GeoJson")]
public void Json_FeatureCollection_Detected_As_GeoJson()
{
var f = new FakeFactory();
var file = CreateTempFile(".json", "{ \"type\": \"FeatureCollection\", \"features\": [] }");
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok);
Assert.NotNull(conv);
Assert.Equal("GeoJson", f.LastRequestedKey, ignoreCase: true);
Assert.Contains("Detected JSON format", reason, StringComparison.OrdinalIgnoreCase);
}
///
/// Ensure JSON containing spatialReference is detected as EsriJson.
///
///
/// Purpose: validate EsriJSON fingerprint detection using "spatialReference".
/// Behavior: TryCreateForInput should detect EsriJson and request that converter.
///
[Fact(DisplayName = "Generic .json with spatialReference detected as EsriJson")]
public void Json_With_SpatialReference_Detected_As_EsriJson()
{
var f = new FakeFactory();
var file = CreateTempFile(".json", "{ \"spatialReference\": { \"wkid\": 3857 }, \"features\": [] }");
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok);
Assert.NotNull(conv);
Assert.Equal("EsriJson", f.LastRequestedKey, ignoreCase: true);
Assert.Contains("Detected JSON format", reason, StringComparison.OrdinalIgnoreCase);
}
///
/// Verify NDJSON (newline-delimited JSON) detection for .json files containing multiple JSON objects separated by newlines.
///
///
/// Purpose: ensure NDJSON / GeoJSON sequence detection works when multiple JSON-like lines are present.
/// Behavior: TryCreateForInput should detect GeoJsonSeq and request that converter key.
///
[Fact(DisplayName = "NDJSON (.json with multiple JSON lines) detected as GeoJsonSeq")]
public void Json_Ndjson_Detected_As_GeoJsonSeq()
{
var f = new FakeFactory();
var content = "{\"type\":\"Feature\",\"properties\":{}}\n{\"type\":\"Feature\",\"properties\":{}}\n";
var file = CreateTempFile(".json", content);
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok);
Assert.NotNull(conv);
Assert.Equal("GeoJsonSeq", f.LastRequestedKey, ignoreCase: true);
// The detector reason can come from the JsonFormatDetector or from header sniffing.
// Assert on the stable detected key instead of a specific phrase like "NDJSON".
Assert.Contains("GeoJsonSeq", reason, StringComparison.OrdinalIgnoreCase);
}
///
/// Verify TopoJSON fingerprint detection from a .json header.
///
///
/// Purpose: ensure TopoJSON detection via presence of topology-related keywords.
/// Behavior: TryCreateForInput should request "TopoJson".
///
[Fact(DisplayName = "TopoJSON fingerprint detected from .json header")]
public void Json_TopoJson_Detected_As_TopoJson()
{
var f = new FakeFactory();
var file = CreateTempFile(".json", "{ \"type\": \"Topology\", \"topology\": {} }");
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok);
Assert.NotNull(conv);
Assert.Equal("TopoJson", f.LastRequestedKey, ignoreCase: true);
Assert.Contains("Detected JSON format", reason, StringComparison.OrdinalIgnoreCase);
}
///
/// Empty or zero-byte files are rejected by the detector.
///
///
/// Purpose:
/// - Validate that the detection code performs a file-size check and fails early for zero-byte files.
///
/// Expected behavior:
/// - TryCreateForInput returns false, converter is null, and detectReason contains an explanatory phrase.
///
[Fact(DisplayName = "Empty (zero-byte) file is rejected")]
public void EmptyFile_IsRejected()
{
var f = new FakeFactory();
var file = CreateZeroByteFile();
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.False(ok);
Assert.Null(conv);
Assert.Contains("file is empty", reason, StringComparison.OrdinalIgnoreCase);
}
///
/// Files without extensions are rejected by the detector.
///
///
/// Purpose:
/// - Ensure that inputs lacking an extension do not silently proceed to ambiguous detection.
///
/// Expected behavior:
/// - TryCreateForInput returns false and the detectReason indicates a missing extension.
///
[Fact(DisplayName = "File without extension is rejected")]
public void FileWithoutExtension_IsRejected()
{
var f = new FakeFactory();
var file = CreateTempFileNoExtension("{ \"type\": \"FeatureCollection\" }");
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.False(ok);
Assert.Null(conv);
Assert.Contains("no extension", reason, StringComparison.OrdinalIgnoreCase);
}
///
/// Archives containing only unknown/minimal JSON entries should fail detection (no winner).
///
///
/// Purpose:
/// - Validate archive JSON voting behavior when entries lack distinguishing fingerprints.
///
/// Expected behavior:
/// - TryCreateForInput returns false and detectReason indicates no classification or ambiguity.
///
[Fact(DisplayName = "Archive with only unknown JSON entries fails detection")]
public void Archive_With_UnknownJsonEntries_Fails()
{
var f = new FakeFactory();
// small JSON blobs that lack distinguishing fingerprints
var zip = CreateZipWithEntries(
("a.json", "{ \"foo\": \"bar\" }"),
("b.json", "{ \"x\": 1 }")
);
var ok = f.TryCreateForInput(zip, out var conv, out var detectedSourceFormat, out var reason);
Assert.False(ok);
Assert.Null(conv);
Assert.True(reason != null &&
(reason.IndexOf("no json entries", StringComparison.OrdinalIgnoreCase) >= 0
|| reason.IndexOf("ambiguous", StringComparison.OrdinalIgnoreCase) >= 0),
$"Unexpected detectReason: {reason}");
}
///
/// When JSON voting produces a tie the deterministic tiebreaker is applied; EsriJson is preferred over GeoJson.
///
///
/// Purpose:
/// - Ensure the tie-resolution logic is deterministic and favors more specific formats.
///
/// Test construction:
/// - Create a zip with two JSON entries: one that fingerprints as EsriJson and one as GeoJson.
/// - Both entries are padded to exceed the header-sniffing minimum so heuristics can match.
///
/// Expected behavior:
/// - TryCreateForInput returns true and the factory was asked for "EsriJson".
///
[Fact(DisplayName = "Archive with tied JSON votes uses tiebreaker (EsriJson preferred)")]
public void Archive_TiedJsonVotes_Tiebreaker()
{
var f = new FakeFactory();
string MakeLarge(string marker)
{
var sb = new StringBuilder();
sb.Append("{");
sb.Append($"\"{marker}\": true,");
while (sb.Length < 700)
{
sb.Append("\"padding\":\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\",");
}
sb.Append("\"end\":true}");
return sb.ToString();
}
var esriJsonContent = MakeLarge("spatialReference"); // Esri marker
var geoJsonContent = MakeLarge("FeatureCollection"); // GeoJSON marker
var zip = CreateZipWithEntries(
("esri.json", esriJsonContent),
("geo.json", geoJsonContent)
);
var ok = f.TryCreateForInput(zip, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok, $"Expected detection to succeed via tiebreaker; reason: {reason}");
Assert.NotNull(conv);
Assert.Equal("EsriJson", f.LastRequestedKey, ignoreCase: true);
}
///
/// Corrupted or truncated JSON files should not be misclassified.
///
///
/// Purpose:
/// - Provide a syntactically truncated JSON and confirm detection fails gracefully.
///
/// Expected behavior:
/// - TryCreateForInput returns false and detectReason indicates inability to determine JSON format.
///
[Fact(DisplayName = "Corrupted / truncated JSON file is rejected")]
public void Corrupted_Truncated_Json_IsRejected()
{
var f = new FakeFactory();
var file = CreateTempFile(".json", "{ \"type\": \"FeatureCollection\", \"features\": [ { \"type\": \"Feature\" "); // truncated
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.False(ok);
Assert.Null(conv);
Assert.Contains("could not be determined", reason, StringComparison.OrdinalIgnoreCase);
}
///
/// Minified (no-whitespace) but sufficiently large GeoJSON should still be detected as GeoJson.
///
///
/// Purpose:
/// - Ensure header-based substring heuristics work on minified JSON if the identifying tokens are present
/// and the header length is large enough for reliable classification.
///
/// Expected behavior:
/// - TryCreateForInput returns true and the factory was asked for "GeoJson".
///
[Fact(DisplayName = "Large minified GeoJSON (no whitespace) is classified as GeoJson")]
public void Minified_Large_Json_Detected_As_GeoJson()
{
var f = new FakeFactory();
// Build minified featurecollection with many features to exceed the classifier threshold.
var sb = new StringBuilder();
sb.Append("{\"type\":\"FeatureCollection\",\"features\":[");
for (int i = 0; i < 40; i++)
{
sb.Append("{\"type\":\"Feature\",\"geometry\":{\"type\":\"Point\",\"coordinates\":[");
sb.Append(i);
sb.Append(",");
sb.Append(i + 0.1);
sb.Append("]},\"properties\":{\"id\":");
sb.Append(i);
sb.Append("}}");
if (i < 39) sb.Append(",");
}
sb.Append("]}");
var minified = sb.ToString();
var file = CreateTempFile(".json", minified);
var ok = f.TryCreateForInput(file, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok, $"Expected minified large GeoJSON to be detected. Reason: {reason}");
Assert.NotNull(conv);
Assert.Equal("GeoJson", f.LastRequestedKey, ignoreCase: true);
}
[Fact(DisplayName = "KMZ detection by outer extension and doc.kml inside archive")]
public void Kmz_Detected_By_OuterExtension_And_DocKml()
{
var f = new FakeFactory();
// Create a zip that contains doc.kml and then move/rename it to .kmz so the outer extension is .kmz
var zip = CreateZipWithEntries(("doc.kml", ""));
var kmzPath = Path.ChangeExtension(zip, ".kmz");
File.Copy(zip, kmzPath);
var ok = f.TryCreateForInput(kmzPath, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok);
Assert.NotNull(conv);
Assert.Equal("Kmz", f.LastRequestedKey, ignoreCase: true);
// Also test that a regular .zip containing doc.kml is treated as KMZ by content
var zip2 = CreateZipWithEntries(("doc.kml", ""));
var ok2 = f.TryCreateForInput(zip2, out var conv2, out var detected2, out var reason2);
Assert.True(ok2);
Assert.NotNull(conv2);
Assert.Equal("Kmz", f.LastRequestedKey, ignoreCase: true);
}
[Fact(DisplayName = ".ndjson and .jsonl extensions map to GeoJsonSeq")]
public void Ndjson_Extensions_Mapped_To_GeoJsonSeq()
{
var f = new FakeFactory();
var ndjson = CreateTempFile(".ndjson", "{ \"type\": \"Feature\" }\n{ \"type\": \"Feature\" }\n");
var ok = f.TryCreateForInput(ndjson, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok);
Assert.Equal("GeoJsonSeq", f.LastRequestedKey, ignoreCase: true);
var jsonl = CreateTempFile(".jsonl", "{ \"type\": \"Feature\" }\n{ \"type\": \"Feature\" }\n");
var ok2 = f.TryCreateForInput(jsonl, out var conv2, out var detected2, out var reason2);
Assert.True(ok2);
Assert.Equal("GeoJsonSeq", f.LastRequestedKey, ignoreCase: true);
}
[Fact(DisplayName = "Archive shapefile components detected as Shapefile")]
public void Archive_Shapefile_Detected_As_Shapefile()
{
var f = new FakeFactory();
var zip = CreateZipWithEntries(("a.shp", "shp"), ("a.shx", "shx"), ("a.dbf", "dbf"));
var ok = f.TryCreateForInput(zip, out var conv, out var detectedSourceFormat, out var reason);
Assert.True(ok);
Assert.NotNull(conv);
Assert.Equal("Shapefile", f.LastRequestedKey, ignoreCase: true);
}
[Fact(DisplayName = "TryCreateForInput overload without diagnostics returns converter")]
public void TryCreateForInput_Overload_NoDiagnostics()
{
var f = new FakeFactory();
var file = CreateTempFile(".geojson", "{ \"type\": \"FeatureCollection\", \"features\": [] }");
// Use the lightweight overload that doesn't return detectedSourceFormat/reason
var simpleOk = ConverterFactoryInputExtensions.TryCreateForInput(f, file, out var convSimple);
Assert.True(simpleOk);
Assert.NotNull(convSimple);
Assert.Equal("GeoJson", f.LastRequestedKey, ignoreCase: true);
}
}
}