Handle import of documents with self-referring styles.
As a workaround, you can go through the RTF document structure of a single file and utilize Regex to resolve the self-referring styles like this:
string rtf = File.ReadAllText("inputFile.rtf");
rtf = FixSelfReferringStyles(rtf);
Telerik.Windows.Documents.Flow.FormatProviders.Rtf.RtfFormatProvider provider = new Telerik.Windows.Documents.Flow.FormatProviders.Rtf.RtfFormatProvider();
var document = provider.Import(rtf);
...
private static string FixSelfReferringStyles(string rtf)
{
string regexString = @"{\\s([0-9]+)[^}]*\\slink([0-9]+)";
var matches = Regex.Matches(rtf, regexString);
foreach (Match match in matches)
{
if (match.Groups[1].Value == match.Groups[2].Value)
{
var oldValue = match.Groups[0].Value;
var newValue = oldValue.Replace(@" \slink" + match.Groups[1].Value, string.Empty);
rtf = rtf.Replace(oldValue, newValue);
}
}
return rtf;
}
Run the attached sample project. The available Doc1.docx contains an image place holder with borders around all sides. After inserting an image in the place holder, some of the borders are overlapping with the image:
The expected result is to have the borders around all sides:
When a cell is removed due to having no content its table cell borders are not transferred to neighboring cell.
When a table cell contains a paragraph with no runs it is stripped from the document. However, its table cell borders should be transferred/reapplied to its neighboring cell to maintain consistency.
When a document containing a field without a separator is inserted using the RadFlowDocumentEditor.InsertDocument(*) method, the following error is thrown:
System.InvalidOperationException: 'Start and end inlines should belong to paragraph in one block container.'
Workaround:
static void Main(string[] args)
{
DocxFormatProvider _DocXProvider = new DocxFormatProvider();
string outputFile = $@"..\..\MergedResult.docx";
File.Delete(outputFile);
RadFlowDocument target = _DocXProvider.Import(File.ReadAllBytes(@"..\..\HeaderFooter.docx"));
RadFlowDocument source = _DocXProvider.Import(File.ReadAllBytes(@"..\..\SubHeaderFooter.docx"));
RadFlowDocument contentSource = _DocXProvider.Import(File.ReadAllBytes(@"..\..\ContentOnly.docx"));
MergeHeaders(target, source);
MergeFooters(target, source);
MergeContent(target, source);
MergeContent(target, contentSource);
System.IO.File.WriteAllBytes(outputFile, _DocXProvider.Export(target));
Process.Start(outputFile);
}
private static void MergeContent(RadFlowDocument target, RadFlowDocument source)
{
DocumentElementImporter importer = new DocumentElementImporter(target, source, ConflictingStylesResolutionMode.UseTargetStyle);
foreach (Telerik.Windows.Documents.Flow.Model.Section section in source.Sections)
{
foreach (BlockBase block in section.Blocks)
{
BlockBase importBlock = importer.Import(block);
target.Sections.Last().Blocks.Add(importBlock);
}
}
}
private static void MergeHeaders(RadFlowDocument target, RadFlowDocument source)
{
Header targetHeader = target.Sections.First().Headers.Default;
Header sourceHeader = source.Sections.First().Headers.Default;
DocumentElementImporter importer = new DocumentElementImporter(target, source, ConflictingStylesResolutionMode.UseTargetStyle);
foreach (BlockBase block in sourceHeader.Blocks)
{
BlockBase importedBlock = importer.Import(block);
targetHeader.Blocks.Add(importedBlock);
}
}
private static void MergeFooters(RadFlowDocument target, RadFlowDocument source)
{
Footer targetFooter = target.Sections.First().Footers.Default;
Footer sourceFooter = source.Sections.First().Footers.Default;
DocumentElementImporter importer = new DocumentElementImporter(target, source, ConflictingStylesResolutionMode.UseTargetStyle);
int i = 0;
foreach (BlockBase block in sourceFooter.Blocks)
{
BlockBase importedBlock = importer.Import(block);
targetFooter.Blocks.Insert(i++, importedBlock);
}
}
StackOverflowException when importing a document with style based on itself.
Use the following code to strip the faulty "based on" definition from the RTF:
RtfFormatProvider provider = new RtfFormatProvider();
var rtf = File.ReadAllText(ofd.FileName);
rtf = this.ReplaceSelfBasedOnStyle(rtf);
this.flowDocument = provider.Import(rtf);
...
private string ReplaceSelfBasedOnStyle(string rtf)
{
string pattern = @"{[\n]*\\s[0-9]+[^;]* \\\w* (Normal);}";
var matches = Regex.Matches(rtf, pattern);
foreach (Match match in matches)
{
string oldValue = match.Value;
string newValue = oldValue.Replace(@"\sbasedon0 ", string.Empty);
rtf = rtf.Replace(oldValue, newValue);
}
return rtf;
}
Styles with names only different in spaces are treated as one.
Workaround:
var rtf = File.ReadAllText(fileName);
rtf = this.RenameStyleDifferentInOnlySpaces(rtf);
var document = provider.Import(rtf);
...
private string RenameStyleDifferentInOnlySpaces(string rtf)
{
HashSet<string> styles = new HashSet<string>();
string pattern = @"{\\(?:\*\\c)?s([0-9]+)[^}]*\n?[^}]*\\[^' ]* ?'?([^;]*)";
var matches = Regex.Matches(rtf, pattern);
foreach (Match match in matches)
{
string styleName = match.Groups[2].Value.Replace(" ", string.Empty);
if (styles.Contains(styleName))
{
styleName = this.ReplaceOldStyleName(ref rtf, styles, match).Replace(" ", string.Empty);
}
styles.Add(styleName);
}
return rtf;
}
private string ReplaceOldStyleName(ref string rtf, HashSet<string> styles, Match match)
{
string oldStyleName = match.Groups[2].Value;
StringBuilder styleNameBuilder = new StringBuilder(oldStyleName + "0");
while (styles.Contains(styleNameBuilder.ToString().Replace(" ", string.Empty)))
{
styleNameBuilder.Append("0");
}
string oldMatch = match.Groups[0].Value;
string newMatch = oldMatch.Replace(oldStyleName, styleNameBuilder.ToString());
rtf = rtf.Replace(oldMatch, newMatch);
return styleNameBuilder.ToString();
}
PdfFormatProvider: Tab stop distance different from the default is not exported correctly.
Workaround: Use spaces instead.
Original DOCX document:
Exported DOCX document:
Workaround:
Telerik.Windows.Documents.Flow.Model.RadFlowDocument document;
Telerik.Windows.Documents.Flow.FormatProviders.Docx.DocxFormatProvider docXprovider = new Telerik.Windows.Documents.Flow.FormatProviders.Docx.DocxFormatProvider();
using (Stream input = File.OpenRead("PT1987 VU22888 Moodle Specification List [2024032716].docx"))
{
document = docXprovider.Import(input);
}
string normalStyleId = BuiltInStyleNames.NormalStyleId;
Style normalStyle = document.StyleRepository.AddBuiltInStyle(normalStyleId);
normalStyle.ParagraphProperties.SpacingAfter.LocalValue = 0;
normalStyle.ParagraphProperties.LineSpacing.LocalValue = 1;
Use the code for inserting the code:
static void Main(string[] args)
{
Telerik.Windows.Documents.Flow.Model.RadFlowDocument templateDocument = GetDocument("Template.rtf");
Telerik.Windows.Documents.Flow.Model.RadFlowDocument contentDocument = GetDocument("Content.rtf");
InsertDocumentOptions options = new InsertDocumentOptions();
options.ConflictingStylesResolutionMode = ConflictingStylesResolutionMode.RenameSourceStyle;
options.InsertLastParagraphMarker = true;
RadFlowDocumentEditor editor = new RadFlowDocumentEditor(templateDocument);
editor.InsertDocument(contentDocument, options);
string mergedDocumentFilePath ="MergeDocumentsWithWordsProcessing.rtf";
File.Delete(mergedDocumentFilePath);
WriteDocToFile(templateDocument, mergedDocumentFilePath);
}
private static Telerik.Windows.Documents.Flow.Model.RadFlowDocument GetDocument(string rtfFilePath)
{
Telerik.Windows.Documents.Flow.Model.RadFlowDocument document = null;
var rtfImporter = new Telerik.Windows.Documents.Flow.FormatProviders.Rtf.RtfFormatProvider();
using (Stream stream = File.OpenRead(rtfFilePath))
{
document = rtfImporter.Import(stream);
}
return document;
}
private static void WriteDocToFile(Telerik.Windows.Documents.Flow.Model.RadFlowDocument doc, string filename)
{
var rtfExporter = new Telerik.Windows.Documents.Flow.FormatProviders.Rtf.RtfFormatProvider();
string rtfText = rtfExporter.Export(doc);
File.WriteAllText(filename, rtfText);
Process.Start(filename);
}
Observed result: The After spacing is reset
Expected result: keep the style settings from the original documents.
The generated document looks OK before printing:
However, hitting the print preview button in MS Words leads to missing text in the fields: