using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System; using System.IO; namespace GitConverter.Lib.Converters { /// /// Helper that detects the specific JSON GIS format for a given JSON payload or file. /// /// /// /// JsonFormatDetector provides heuristic detection for a small set of common JSON-based /// GIS formats. Detection is based on top-level JSON structure and property names and is /// intentionally conservative: when detection is ambiguous the detector returns /// rather than guessing. /// /// /// Supported format detections: /// - GeoJson: JSON objects where "type" : "FeatureCollection" or similar /// GeoJSON structures are present. /// - EsriJson: Esri JSON feature sets often include both "features" and /// "spatialReference" properties; the detector recognizes the common variants. /// - GeoJsonSeq: Newline-delimited GeoJSON (NDJSON/JSON sequence) or a top-level JSON /// array of GeoJSON objects. The detector probes the first non-empty line when a full /// document parse fails to support NDJSON detection. /// - TopoJson: TopoJSON documents identified by "type":"Topology". /// /// /// Design notes and limitations: /// - The detector uses Newtonsoft.Json (Json.NET) and parses the provided payload into a /// . For very large files or streaming scenarios callers should perform /// their own streaming-based sniffing rather than passing large payloads to these helpers. /// - Property name checks are case-insensitive. The detector does not perform deep content /// validation beyond presence checks and the top-level type value. /// - NDJSON detection is heuristic: when full-document parsing throws a , /// the detector attempts to parse the first non-empty line as a JSON value and treats a /// successful parse as evidence of GeoJSON sequence input. /// - Ambiguous inputs (for example JSON objects containing both GeoJSON and Esri-like keys in /// unusual combinations) will produce Format.Unknown. Use explicit format options when /// reliable conversion behavior is required. /// /// /// Thread-safety: the methods in this static helper are stateless and safe for concurrent use. /// /// public static class JsonFormatDetector { /// /// Enumeration of detected JSON GIS formats. /// public enum Format { Unknown = 0, GeoJson, EsriJson, GeoJsonSeq, TopoJson } /// /// Detect format from a JSON file. Throws on IO or parse errors. /// /// Path to the JSON file to inspect. /// The detected . Returns when /// the content cannot be classified. /// /// /// This method reads the entire file into memory via /// and calls . It will throw exceptions originating /// from file I/O or JSON parsing to the caller; callers that prefer a non-throwing API should /// use . /// /// /// Example usage: /// /// var fmt = JsonFormatDetector.DetectFromFile("data.geojson"); /// if (fmt == JsonFormatDetector.Format.GeoJson) { /* handle GeoJSON */ } /// /// /// public static Format DetectFromFile(string filePath) { if (string.IsNullOrWhiteSpace(filePath)) throw new ArgumentNullException(nameof(filePath)); var text = File.ReadAllText(filePath); return DetectFromString(text); } /// /// Non-throwing variant that attempts to detect format from a JSON file. /// /// Path to the JSON file to inspect. /// When the method returns true this contains the detected format (may be ). /// True when detection ran without unhandled exceptions; false when file access or parsing failed. /// /// Use this method when calling code must avoid exceptions. The method returns false /// when the file does not exist, the path is invalid, or an unexpected parsing error occurs. /// When it returns true the value contains the detection /// outcome which may still be . /// public static bool TryDetectFromFile(string filePath, out Format result) { result = Format.Unknown; try { if (string.IsNullOrWhiteSpace(filePath)) return false; if (!File.Exists(filePath)) return false; var text = File.ReadAllText(filePath); result = DetectFromString(text); return true; } catch { return false; } } /// /// Detect format from a JSON string payload. /// /// String containing JSON content; may represent an object, array or NDJSON. /// The detected value. Returns when the /// input cannot be classified. /// /// /// The detection strategy: /// - Attempt to parse the entire payload into a . This supports arrays and /// objects. /// - If parsing fails with , probe the first non-empty line /// and attempt to parse it; a successful parse is treated as a newline-delimited JSON (NDJSON) /// sequence and reported as . /// - When a is available, inspect its type and top-level properties: /// - Top-level arrays -> . /// - Objects: check top-level type property for "Topology" (TopoJSON) or /// "FeatureCollection" (GeoJSON). Also check for presence of both /// "features" and "spatialReference" to detect Esri JSON. /// - The method performs case-insensitive checks for property names and values. /// /// /// Performance considerations: this method parses the provided string into a DOM-style JSON /// representation which may be memory intensive for large payloads. For large files prefer /// streaming or line-based sniffing prior to calling this helper. /// /// public static Format DetectFromString(string json) { if (string.IsNullOrWhiteSpace(json)) return Format.Unknown; // Try parse as JToken to handle arrays and objects. JToken token; bool ndjsonProbe = false; try { token = JToken.Parse(json); } catch (JsonReaderException) { // Could be NDJSON (newline-delimited JSON). Try to detect by reading first non-empty line. var firstLine = ReadFirstNonEmptyLine(json); if (firstLine != null) { try { token = JToken.Parse(firstLine); // If we successfully parsed the first line of a non-JSON-root document, treat as NDJSON -> GeoJsonSeq. ndjsonProbe = true; } catch { return Format.Unknown; } } else { return Format.Unknown; } } // If we discovered this by probing the first line of NDJSON, consider it a GeoJsonSeq. if (ndjsonProbe) { return Format.GeoJsonSeq; } // GeoJsonSeq: top-level array of GeoJSON objects if (token.Type == JTokenType.Array) { return Format.GeoJsonSeq; } if (token.Type != JTokenType.Object) { return Format.Unknown; } var obj = (JObject)token; // Scan all top-level properties once and exit early on a decisive match. bool hasFeatures = false; bool hasSpatialRef = false; string typeValue = null; foreach (var prop in obj.Properties()) { var name = prop.Name; if (string.Equals(name, "type", StringComparison.OrdinalIgnoreCase)) { typeValue = prop.Value?.ToString(); // Immediate recognition for TopoJSON or GeoJSON FeatureCollection if (!string.IsNullOrEmpty(typeValue)) { if (string.Equals(typeValue, "Topology", StringComparison.OrdinalIgnoreCase)) return Format.TopoJson; if (string.Equals(typeValue, "FeatureCollection", StringComparison.OrdinalIgnoreCase)) return Format.GeoJson; } } else if (string.Equals(name, "features", StringComparison.OrdinalIgnoreCase)) { hasFeatures = true; if (hasSpatialRef) return Format.EsriJson; // both present -> Esri JSON } else if (string.Equals(name, "spatialReference", StringComparison.OrdinalIgnoreCase)) { hasSpatialRef = true; if (hasFeatures) return Format.EsriJson; // both present -> Esri JSON } // continue scanning other properties until a decisive match occurs } // If we saw features but no spatialReference, treat as EsriJson (common variant) if (hasFeatures) return Format.EsriJson; return Format.Unknown; } /// /// Return the first non-empty line from the provided text (used to sniff NDJSON). /// /// Input text to search for a non-empty line. /// The first non-empty line, or null when none found. /// /// The helper enumerates lines using a and returns the first /// line that contains non-whitespace characters. It is used as a lightweight probe when /// full-document parsing fails and NDJSON-like input is suspected. /// private static string ReadFirstNonEmptyLine(string text) { using (var sr = new StringReader(text)) { string line; while ((line = sr.ReadLine()) != null) { if (!string.IsNullOrWhiteSpace(line)) return line; } } return null; } } }