using Microsoft.SharePoint.Client; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Xml; namespace ConfluenceToSharePointMigrator { class HTMLFileProcess { /// /// Get the page name from either html file name or title from the html content /// /// file name of html /// html file path /// public string SharePointPageNameFromHtml(string pageName, string htmlFile) { int fileNamePosition = pageName.LastIndexOf('_'); pageName = pageName.Replace("---", "@$"); pageName = pageName.Replace("-", " "); if (pageName.Contains('_')) fileNamePosition = pageName.LastIndexOf('_'); else fileNamePosition = pageName.LastIndexOf('.'); pageName = pageName.Replace(pageName.Substring(fileNamePosition, pageName.Length - fileNamePosition), "").Replace("@$", " - "); return pageName; } /// /// Remove the unwanted content like header, author. /// /// html file content /// public string FrameHTMLContentForSharePointPage(string fileContent) { //Remove main-header Regex headerPattern = new Regex(@"
.*?\\d\\d, \\d\\d\\d\\d", ""); //Remove title fileContent = Regex.Replace(fileContent, ".*?", ""); //Remove attachment content section Regex attachmentPattern = new Regex("
(.*?\n.*?)*.*?"); fileContent = attachmentPattern.Replace(fileContent, "
"); //Remove the document generated information fileContent = Regex.Replace(fileContent, "

Document generated by Confluence on.*?

", ""); fileContent = Regex.Replace(fileContent, "
", ""); //Add strong tag for table header. fileContent = fileContent.Replace("class=\"confluenceTh\">", "class=\"confluenceTh\">"); fileContent = fileContent.Replace("", ""); //fileContent = fileContent.Replace("class=\"confluenceTh\"",""); //fileContent = fileContent.Replace("", ""); //fileContent = fileContent.Replace("", ""); fileContent = fileContent.Replace("", ""); //Add table row alignment as left if its single row. foreach (Match rowMatch in Regex.Matches(fileContent, "")) { if (Regex.Matches(rowMatch.ToString(), "", "
")); } return fileContent; } /// /// Upload the file or image. Replace the uploaded url in html file. /// /// /// public string UploadImageURLToHtml(string _fileContent, string _ConfluencePath, string _sharePointSiteURL, ClientContext _ctx) { Console.WriteLine(" - Image upload"); Migration upload = new Migration(); Regex imageFile = new Regex(" src=\"attachments/.*?\""); foreach (Match imageSrc1 in imageFile.Matches(_fileContent)) { string imageSrc = imageSrc1.ToString(); string imageLocation = imageSrc.ToString().Replace(" src=\"", "").Replace("?width=226","").Replace("?height=250\"", "").Replace("\"", "").Replace("?effects=drop-shadow","").Replace("width=917","").Replace("height=917",""); imageLocation = Regex.Replace(imageLocation, ".png\\?.*", ".png"); if (imageLocation.Contains('.')) { //Upload image to SharePoint upload.SharePointFileUpload(Path.Combine(_ConfluencePath, imageLocation.Replace("/","\\")), _ctx); //Replace u-ploaded image URL string uploadedURL = string.Empty; string thumbnailURL = string.Empty; string thumbnailURLInHtml = string.Empty; if (imageSrc.ToString().Contains("src")) uploadedURL = " src=\"" + _sharePointSiteURL + "/Shared%20Documents" + imageLocation.Remove(0, imageLocation.LastIndexOf('/')) + "?height=250\""; _fileContent = _fileContent.Replace(imageSrc.ToString(), uploadedURL); if (!String.IsNullOrEmpty(thumbnailURL)) _fileContent = _fileContent.Replace(thumbnailURLInHtml, thumbnailURL); } } return _fileContent; } } }