Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .changelog/20250828140415_ck_8219.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
type: Fix
scope:
- ckeditor5-paste-from-office
closes:
- https://github.com/ckeditor/ckeditor5/issues/19029
---

Improve the merging and numbering of multi-level lists in Microsoft Word content when list items are separated by borders.
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
/**
* @license Copyright (c) 2003-2025, CKSource Holding sp. z o.o. All rights reserved.
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-licensing-options
*/

/**
* @module paste-from-office/filters/unwrapmslistsparaborders
*/

import {
ViewTreeWalker,
type ViewElement,
type ViewDocumentFragment,
type ViewItem,
type ViewUpcastWriter
} from 'ckeditor5/src/engine.js';

const MS_PARA_BORDER_DIRECTIONS = [ 'top', 'bottom', 'left', 'right' ] as const;

type MSParaBorderDirections = typeof MS_PARA_BORDER_DIRECTIONS[number];

/**
* When a user sets border top or border bottom on list items, Microsoft Word wraps
* whole list items (which may be H1 elements) with <div> elements. It's problematic
* because it separates block lists using div elements and continuation of multi level
* lists is hard to achieve.
*
* The HTML generated by Microsoft Word looks like this:
*
* <div style="mso-element:para-border-div;border-bottom:solid #A7A9AC 2.25pt; ...">
* <h1 style="mso-list:l2 level1 lfo1; mso-border-bottom-alt:solid #A7A9AC 2.25pt; ...">Title</h1>
* </div>
*
* Items within such wrappers obtain `mso-border-${ direction }-alt` styles that are directly linked
* to the wrapper border styles. This is a fallback mechanism used by Microsoft Word.
*
* In the scenario above, the <div> element should be removed and the alternative border bottom style can be applied to the <h1> element.
* It results in the following HTML structure:
*
* <h1 style="mso-list:l2 level1 lfo1; border-bottom:solid #A7A9AC 2.25pt; ...">Title</h1>
*
* The problem is that Microsoft Word sometimes adds additional elements with `mso-border-${ direction }-alt`
* that are empty (or contain only &nbsp;). In that case, we should skip moving the border style to the actual element.
*
* This function will unwrap Microsoft Word's `mso-element: para-border-div` elements and move them to the parent element if:
*
* 1. There is at least one non-empty child element with matching border alt styles.
* 3. There is at least one list-like item.
*
* The border will be assigned to non-empty elements.
*
* See more: https://github.com/ckeditor/ckeditor5-commercial/issues/8044
Comment thread
Mati365 marked this conversation as resolved.
Outdated
*
* @internal
*/
export function unwrapMSListsParaBorders( documentFragment: ViewDocumentFragment, writer: ViewUpcastWriter ): void {
const range = writer.createRangeIn( documentFragment );
const walker = new ViewTreeWalker( {
boundaries: range,
ignoreElementEnd: true
} );

const unwrapElements = new Set<ViewElement>();

for ( const { item } of walker ) {
if ( !isParaBorderWrapperElement( item ) || !hasAnyListLikeElement( item, writer ) ) {
continue;
}

// Wrapper elements do not use MSO alternative borders. However, it may
// have normal borders along with alternative borders. In that case, we
// need to check both border styles.
const wrapperBorders = pickElementBorders( item );
const wrapperAltBorders = pickElementBorders( item, true );

// Check which elements have borders that match the wrapper alternative borders.
// Assume that wrapper borders refer directly to children with matching alt borders.
const childrenWithMatchingAltBorders = [ ...item.getChildren() ].filter( child => {
// Microsoft Word seems to place alternative borders on every child element.
// Even empty ones. We need to check only non-empty children to avoid setting
// border on blank elements (as Word does).
if ( !child.is( 'element' ) || isViewItemTextEmpty( child ) ) {
return false;
}

const childAltBorders = pickElementBorders( child, true );

// Depending on the Word version, sometimes wrapper obtain alternative borders
// along with normal borders. These borders might have a little different border-widths.
// In that case, we check if at least one border type matches.
return (
compareElementBorders( wrapperBorders, childAltBorders ) ||
compareElementBorders( wrapperAltBorders, childAltBorders )
);
} ) as Array<ViewElement>;

// Looks like we can inherit all parent border styles to children and mark them as ready to insert after parent.
if ( childrenWithMatchingAltBorders.length ) {
// Move all wrapper borders to child element and remove old fallback styles.
for ( const child of childrenWithMatchingAltBorders ) {
writer.removeStyle( 'border', child );

for ( const [ direction, style ] of wrapperBorders ) {
writer.setStyle( `border-${ direction }`, style, child );
writer.removeStyle( `mso-border-${ direction }-alt`, child );
}
}

unwrapElements.add( item );
}

// Avoid processing children of the current item as they will be processed by the walker.
walker.jumpTo( writer.createPositionAfter( item ) );
}

// Unwrap all collected elements after first iteration to avoid malfunction of the tree walker.
for ( const wrapper of unwrapElements ) {
const { parent } = wrapper;

if ( parent ) {
writer.insertChild( wrapper.index!, wrapper.getChildren(), parent );
writer.remove( wrapper );
}
}
}

/**
* Checks if the element or any of its children have a list-like style.
*/
function hasAnyListLikeElement( element: ViewElement, writer: ViewUpcastWriter ) {
for ( const child of writer.createRangeIn( element ).getItems() ) {
if ( child.is( 'element' ) && child.getStyle( 'mso-list' ) ) {
return true;
}
}

return false;
}

/**
* Para borders seem to be identified by `mso-element: para-border-div` style.
*/
function isParaBorderWrapperElement( wrapper: ViewItem ): wrapper is ViewElement {
return wrapper.is( 'element' ) && wrapper.getStyle( 'mso-element' ) === 'para-border-div';
}

/**
* Picks all border styles from the element. The alternative ones are used by Microsoft Word as compatibility
* fallbacks which we can use to unwrap the element.
*/
function pickElementBorders( wrapper: ViewElement, alternative?: boolean ): ElementBordersMap {
return MS_PARA_BORDER_DIRECTIONS.reduce<ElementBordersMap>(
( borders, direction ) => {
const key = alternative ? `mso-border-${ direction }-alt` : `border-${ direction }`;

if ( wrapper.hasStyle( key ) && wrapper.getStyle( key ) !== 'none' ) {
borders.set( direction, wrapper.getStyle( key )! );
}

return borders;
},
new Map()
);
}

/**
* Compares two border maps.
*/
function compareElementBorders( a: ElementBordersMap, b: ElementBordersMap ): boolean {
if ( a.size !== b.size ) {
return false;
}

for ( const [ direction, styleA ] of a.entries() ) {
const styleB = b.get( direction );

if ( styleB !== styleA ) {
return false;
}
}

return true;
}

type ElementBordersMap = Map<MSParaBorderDirections, string>;

/**
* Lookups if item has no text
Comment thread
Mati365 marked this conversation as resolved.
Outdated
*/
function isViewItemTextEmpty( viewItem: ViewItem ) {
if ( viewItem.is( '$text' ) ) {
return !viewItem.data || viewItem.data.trim() === '';
}

if ( viewItem.is( 'element' ) ) {
for ( const child of viewItem.getChildren() ) {
if ( !isViewItemTextEmpty( child ) ) {
return false;
}
}
}

return true;
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { removeMSAttributes } from '../filters/removemsattributes.js';
import { transformTables } from '../filters/table.js';
import { removeInvalidTableWidth } from '../filters/removeinvalidtablewidth.js';
import { ViewUpcastWriter, type ViewDocument } from 'ckeditor5/src/engine.js';
import { unwrapMSListsParaBorders } from '../filters/unwrapmslistsparaborders.js';
import type { PasteFromOfficeNormalizer, PasteFromOfficeNormalizerData } from '../normalizer.js';

const msWordMatch1 = /<meta\s*name="?generator"?\s*content="?microsoft\s*word\s*\d+"?\/?>/i;
Expand Down Expand Up @@ -51,6 +52,7 @@ export class PasteFromOfficeMSWordNormalizer implements PasteFromOfficeNormalize
const writer = new ViewUpcastWriter( this.document );
const { body: documentFragment, stylesString } = data._parsedData;

unwrapMSListsParaBorders( documentFragment, writer );
transformBookmarks( documentFragment, writer );
transformListItemLikeElementsIntoLists( documentFragment, stylesString, this.hasMultiLevelListPlugin );
replaceImagesSourceWithBase64( documentFragment, data.dataTransfer.getData( 'text/rtf' ) );
Expand Down
Loading