using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.IO;
namespace GitConverter.Lib.Converters
{
///
/// Helper that detects the specific JSON GIS format for a given JSON payload or file.
///
///
///
/// JsonFormatDetector provides heuristic detection for a small set of common JSON-based
/// GIS formats. Detection is based on top-level JSON structure and property names and is
/// intentionally conservative: when detection is ambiguous the detector returns
/// rather than guessing.
///
///
/// Supported format detections:
/// - GeoJson: JSON objects where "type" : "FeatureCollection" or similar
/// GeoJSON structures are present.
/// - EsriJson: Esri JSON feature sets often include both "features" and
/// "spatialReference" properties; the detector recognizes the common variants.
/// - GeoJsonSeq: Newline-delimited GeoJSON (NDJSON/JSON sequence) or a top-level JSON
/// array of GeoJSON objects. The detector probes the first non-empty line when a full
/// document parse fails to support NDJSON detection.
/// - TopoJson: TopoJSON documents identified by "type":"Topology".
///
///
/// Design notes and limitations:
/// - The detector uses Newtonsoft.Json (Json.NET) and parses the provided payload into a
/// . For very large files or streaming scenarios callers should perform
/// their own streaming-based sniffing rather than passing large payloads to these helpers.
/// - Property name checks are case-insensitive. The detector does not perform deep content
/// validation beyond presence checks and the top-level type value.
/// - NDJSON detection is heuristic: when full-document parsing throws a ,
/// the detector attempts to parse the first non-empty line as a JSON value and treats a
/// successful parse as evidence of GeoJSON sequence input.
/// - Ambiguous inputs (for example JSON objects containing both GeoJSON and Esri-like keys in
/// unusual combinations) will produce Format.Unknown. Use explicit format options when
/// reliable conversion behavior is required.
///
///
/// Thread-safety: the methods in this static helper are stateless and safe for concurrent use.
///
///
public static class JsonFormatDetector
{
///
/// Enumeration of detected JSON GIS formats.
///
public enum Format
{
Unknown = 0,
GeoJson,
EsriJson,
GeoJsonSeq,
TopoJson
}
///
/// Detect format from a JSON file. Throws on IO or parse errors.
///
/// Path to the JSON file to inspect.
/// The detected . Returns when
/// the content cannot be classified.
///
///
/// This method reads the entire file into memory via
/// and calls . It will throw exceptions originating
/// from file I/O or JSON parsing to the caller; callers that prefer a non-throwing API should
/// use .
///
///
/// Example usage:
///
/// var fmt = JsonFormatDetector.DetectFromFile("data.geojson");
/// if (fmt == JsonFormatDetector.Format.GeoJson) { /* handle GeoJSON */ }
///
///
///
public static Format DetectFromFile(string filePath)
{
if (string.IsNullOrWhiteSpace(filePath)) throw new ArgumentNullException(nameof(filePath));
var text = File.ReadAllText(filePath);
return DetectFromString(text);
}
///
/// Non-throwing variant that attempts to detect format from a JSON file.
///
/// Path to the JSON file to inspect.
/// When the method returns true this contains the detected format (may be ).
/// True when detection ran without unhandled exceptions; false when file access or parsing failed.
///
/// Use this method when calling code must avoid exceptions. The method returns false
/// when the file does not exist, the path is invalid, or an unexpected parsing error occurs.
/// When it returns true the value contains the detection
/// outcome which may still be .
///
public static bool TryDetectFromFile(string filePath, out Format result)
{
result = Format.Unknown;
try
{
if (string.IsNullOrWhiteSpace(filePath)) return false;
if (!File.Exists(filePath)) return false;
var text = File.ReadAllText(filePath);
result = DetectFromString(text);
return true;
}
catch
{
return false;
}
}
///
/// Detect format from a JSON string payload.
///
/// String containing JSON content; may represent an object, array or NDJSON.
/// The detected value. Returns when the
/// input cannot be classified.
///
///
/// The detection strategy:
/// - Attempt to parse the entire payload into a . This supports arrays and
/// objects.
/// - If parsing fails with , probe the first non-empty line
/// and attempt to parse it; a successful parse is treated as a newline-delimited JSON (NDJSON)
/// sequence and reported as .
/// - When a is available, inspect its type and top-level properties:
/// - Top-level arrays -> .
/// - Objects: check top-level type property for "Topology" (TopoJSON) or
/// "FeatureCollection" (GeoJSON). Also check for presence of both
/// "features" and "spatialReference" to detect Esri JSON.
/// - The method performs case-insensitive checks for property names and values.
///
///
/// Performance considerations: this method parses the provided string into a DOM-style JSON
/// representation which may be memory intensive for large payloads. For large files prefer
/// streaming or line-based sniffing prior to calling this helper.
///
///
public static Format DetectFromString(string json)
{
if (string.IsNullOrWhiteSpace(json)) return Format.Unknown;
// Try parse as JToken to handle arrays and objects.
JToken token;
bool ndjsonProbe = false;
try
{
token = JToken.Parse(json);
}
catch (JsonReaderException)
{
// Could be NDJSON (newline-delimited JSON). Try to detect by reading first non-empty line.
var firstLine = ReadFirstNonEmptyLine(json);
if (firstLine != null)
{
try
{
token = JToken.Parse(firstLine);
// If we successfully parsed the first line of a non-JSON-root document, treat as NDJSON -> GeoJsonSeq.
ndjsonProbe = true;
}
catch
{
return Format.Unknown;
}
}
else
{
return Format.Unknown;
}
}
// If we discovered this by probing the first line of NDJSON, consider it a GeoJsonSeq.
if (ndjsonProbe)
{
return Format.GeoJsonSeq;
}
// GeoJsonSeq: top-level array of GeoJSON objects
if (token.Type == JTokenType.Array)
{
return Format.GeoJsonSeq;
}
if (token.Type != JTokenType.Object)
{
return Format.Unknown;
}
var obj = (JObject)token;
// Scan all top-level properties once and exit early on a decisive match.
bool hasFeatures = false;
bool hasSpatialRef = false;
string typeValue = null;
foreach (var prop in obj.Properties())
{
var name = prop.Name;
if (string.Equals(name, "type", StringComparison.OrdinalIgnoreCase))
{
typeValue = prop.Value?.ToString();
// Immediate recognition for TopoJSON or GeoJSON FeatureCollection
if (!string.IsNullOrEmpty(typeValue))
{
if (string.Equals(typeValue, "Topology", StringComparison.OrdinalIgnoreCase))
return Format.TopoJson;
if (string.Equals(typeValue, "FeatureCollection", StringComparison.OrdinalIgnoreCase))
return Format.GeoJson;
}
}
else if (string.Equals(name, "features", StringComparison.OrdinalIgnoreCase))
{
hasFeatures = true;
if (hasSpatialRef)
return Format.EsriJson; // both present -> Esri JSON
}
else if (string.Equals(name, "spatialReference", StringComparison.OrdinalIgnoreCase))
{
hasSpatialRef = true;
if (hasFeatures)
return Format.EsriJson; // both present -> Esri JSON
}
// continue scanning other properties until a decisive match occurs
}
// If we saw features but no spatialReference, treat as EsriJson (common variant)
if (hasFeatures)
return Format.EsriJson;
return Format.Unknown;
}
///
/// Return the first non-empty line from the provided text (used to sniff NDJSON).
///
/// Input text to search for a non-empty line.
/// The first non-empty line, or null when none found.
///
/// The helper enumerates lines using a and returns the first
/// line that contains non-whitespace characters. It is used as a lightweight probe when
/// full-document parsing fails and NDJSON-like input is suspected.
///
private static string ReadFirstNonEmptyLine(string text)
{
using (var sr = new StringReader(text))
{
string line;
while ((line = sr.ReadLine()) != null)
{
if (!string.IsNullOrWhiteSpace(line))
return line;
}
}
return null;
}
}
}