$attributes Optional attributes as array or JSON string. * * @return array Array with opening and closing comment tags. */ public function create_block_comments( string $block_name, array $attributes = [] ): array { $attr_str = ''; if ( ! empty( $attributes ) ) { $attr_str = ' ' . WPSEO_Utils::format_json_encode( $attributes ); } return [ 'open' => '', 'close' => '', ]; } /** * Extracts code blocks from the content. * * @param DOMElement|null $body The body element. * @param DOM_Processor|null $dom_processor The DOM processor instance. * * @return array, content: string}> The code blocks. */ public function extract_code_blocks( ?DOMElement $body, ?DOM_Processor $dom_processor ): array { if ( $body === null || $dom_processor === null ) { return []; } // Pre-process the content to identify code blocks enclosed by special markers. // This first pass ensures code isn't broken up or incorrectly parsed as HTML. // The markers (HTML entity codes) are used to identify the start and end of code sections. $in_code_block = false; $code_content = ''; $code_blocks = []; $code_block_elements = []; // Get all paragraph elements that might contain code blocks. $paragraphs = $body->getElementsByTagName( 'p' ); if ( $paragraphs->length === 0 ) { return $code_blocks; } foreach ( $paragraphs as $p ) { // Get the HTML content of the paragraph. $p_html = $dom_processor->get_node_html( $p ); // Check if this paragraph contains a code block start marker. if ( ! $in_code_block && \strpos( $p_html, self::CODE_BLOCK_START_MARKER ) !== false ) { // We found the start of a code block. $in_code_block = true; $code_content = ''; $code_block_elements = [ $p ]; // Extract content after the marker. $content_parts = \explode( self::CODE_BLOCK_START_MARKER, $p_html, 2 ); if ( ! empty( $content_parts[1] ) ) { $code_content .= $content_parts[1]; } continue; } // Check if this paragraph contains a code block end marker. if ( $in_code_block && \strpos( $p_html, self::CODE_BLOCK_END_MARKER ) !== false ) { // We found the end of a code block. $code_block_elements[] = $p; // Extract content before the marker. $content_parts = \explode( self::CODE_BLOCK_END_MARKER, $p_html, 2 ); if ( ! empty( $content_parts[0] ) ) { $code_content .= $content_parts[0]; } // Store the code block info. $code_blocks[] = [ 'elements' => $code_block_elements, 'content' => $code_content, ]; $in_code_block = false; continue; } // If we're inside a code block, add the paragraph's content. if ( $in_code_block ) { $code_block_elements[] = $p; $code_content .= $p_html; } } return $code_blocks; } /** * Determine if the anchor element is a TOC node by checking if the href start with #h-. * * @param DOMElement|null $anchor The anchor element to check. * * @return bool True if the anchor element is a TOC node, false otherwise. */ private function is_toc_node( ?DOMElement $anchor ): bool { if ( $anchor === null ) { return false; } $href = $anchor->getAttribute( 'href' ); return \strpos( $href, '#h-' ) === 0; } /** * Get the TOC nodes from the element. * * @param DOMElement|null $element The element to get the TOC nodes from. * * @return array The TOC nodes. */ public function get_toc_nodes( ?DOMElement $element ): array { if ( $element === null ) { return []; } // Used for storing TOC sections. $toc_sections = []; // Used for creating TOC sections in case of multiple TOC blocks. $toc_index = 0; // Only increment the toc_index if we encounter the first TOC node. $has_encountered_toc_node = false; // Used for tracking the position of the TOC nodes which is later used to skip the node. $current_position = 0; if ( $element->childNodes->length !== 0 ) { foreach ( $element->childNodes as $node_index => $node ) { // Skip non-element nodes for list detection. if ( $node->nodeType !== \XML_ELEMENT_NODE ) { continue; } if ( $node->nodeName === 'p' ) { $anchor = $node->getElementsByTagName( 'a' ); if ( $anchor->length > 0 ) { if ( $this->is_toc_node( $anchor->item( 0 ) ) ) { // Flip the flag to indicate that we have encountered a TOC node and can safely increment the toc_index. $has_encountered_toc_node = true; $toc_sections[ $toc_index ][] = [ 'node' => $node, 'position' => $current_position, ]; } // If next element is not a TOC node, end current TOC section and start a new one. if ( $element->childNodes[ ( $node_index + 1 ) ] !== null && $has_encountered_toc_node && ! $this->is_toc_node( $element->childNodes[ ( $node_index + 1 ) ]->getElementsByTagName( 'a' )->item( 0 ) ) ) { ++$toc_index; } } } ++$current_position; } } return $toc_sections; } /** * Generates the list structure for the TOC block items similar to the TOC block functionality. * * @param array $headings Array of heading elements with content, href, and level properties. * * @return string The HTML structure for the table of contents. */ private function create_toc_heading_structure( array $headings ): string { // Return empty string if no headings provided. if ( empty( $headings ) ) { return ''; } // Initialize the HTML structure with the opening ul tag. $heading_structure = '

' . \wp_strip_all_tags( $current_heading['content'] ) . ''; // Find all child headings (those with deeper nesting levels). $child_headings = []; $child_index = ( $heading_index + 1 ); // Collect all consecutive child headings that have deeper levels. while ( $child_index < $total_headings ) { // If we encounter a heading with same or higher hierarchy, stop collecting children. if ( $headings[ $child_index ]['level'] <= $current_heading['level'] ) { break; } // Add this heading to our child collection and mark it as processed. $child_headings[] = $headings[ $child_index ]; $processed_headings[ $child_index ] = true; ++$child_index; } // If we found child headings, create a nested list for them. if ( ! empty( $child_headings ) ) { $heading_structure .= '
- ' . \wp_strip_all_tags( $child_heading['content'] ) . '
'; } // Close this heading's list item. $heading_structure .= '

'; return $heading_structure; } /** * Creates a TOC block. * * @param DOMElement|null $element The element to attach the TOC block to. * * @return string The TOC block. */ public function create_toc_block( ?DOMElement $element ): string { // Early bail if the content is empty or the TOC block placeholder is not found. if ( $element === null ) { return ''; } // Get the headings from the element. $headings = []; foreach ( $element->childNodes as $node ) { if ( $node->nodeType !== \XML_ELEMENT_NODE ) { continue; } $tag = $node->nodeName; /** * During export on React app, we convert H1 to H2 and hence we can safely ignore the H1 headings. * We only consider H2 and H3 headings since the TOC block has maxHeadingLevel attribute set to 3. * We construct the TOC list items as it would be generated by the block's renderTableOfContents() function. * This ensures that the TOC block is rendered correctly in the editor and on the post preview generated. */ if ( $tag === 'h2' || $tag === 'h3' ) { // Accept only headings that are present at the top level within the body element. if ( $node->parentNode->nodeName !== 'body' ) { continue; } $headings[] = [ 'content' => $node->textContent, 'href' => '#' . $node->getAttribute( 'id' ), 'level' => (int) \substr( $tag, 1 ), ]; } } // Generate the heading structure expected by the Table of Contents block. $heading_structure = $this->create_toc_heading_structure( $headings ); $block = $this->create_block_comments( 'yoast-seo/table-of-contents' ); return $block['open'] . '

' . \PHP_EOL . '

' . \esc_html__( 'Table of contents', 'wordpress-seo-premium' ) . '

' . \wp_kses_post( $heading_structure ) . '

' . \PHP_EOL . $block['close']; } /** * Determines the elements to skip from the code blocks. * * @param array}> $code_blocks The code blocks. * * @return array The elements to skip. */ public function determine_elements_to_skip( array $code_blocks ): array { $skip_elements = []; if ( ! empty( $code_blocks ) ) { foreach ( $code_blocks as $code_block ) { if ( empty( $code_block['elements'] ) ) { continue; } foreach ( $code_block['elements'] as $element_to_skip ) { $skip_elements[] = $element_to_skip; } } } return $skip_elements; } /** * Extracts a cover image from the body element. * * This method identifies a cover image by looking for: * 1. Paragraphs with images at exact 768px width (which is a common indicator of a cover image). * 2. First removes any identified cover image from the content to avoid duplication. * 3. Creates a WordPress cover block with the proper image and layout. * * @param DOMElement|null $body Body Element. * * @return string The cover block if found, null otherwise. */ public function extract_cover_image( ?DOMElement $body ): string { if ( $body === null ) { return ''; } // Get all paragraph elements. $paragraphs = $body->getElementsByTagName( 'p' ); // Check if we have paragraphs. if ( $paragraphs->length === 0 ) { return ''; } // Loop through each paragraph and check for first image as cover image. foreach ( $paragraphs as $p ) { $img_found = false; $img_src = ''; $img_alt = ''; // Check for spans that might contain the image with the specific style. $spans = $p->getElementsByTagName( 'span' ); if ( $spans->length === 0 ) { continue; } foreach ( $spans as $span ) { $style = $span->getAttribute( 'style' ); // Check if the span has a style with width of exactly 768.00px. // This specific width is a common indicator of a full-width cover image. if ( \preg_match( '/width:\s*768\.00px/i', $style ) ) { $imgs = $span->getElementsByTagName( 'img' ); if ( ! empty( $imgs ) && $imgs->length > 0 ) { $img = $imgs->item( 0 ); $img_found = true; $img_src = $img->getAttribute( 'src' ); $img_alt = $img->getAttribute( 'alt' ); break; } } } // If no spans with 768px width, check for direct img elements with a width of 768. if ( ! $img_found ) { $imgs = $p->getElementsByTagName( 'img' ); if ( $imgs->length > 0 ) { $img = $imgs->item( 0 ); $width = $img->getAttribute( 'width' ); $img_src = $img->getAttribute( 'src' ); $img_alt = $img->getAttribute( 'alt' ); // Check width attribute. if ( $width === '768' ) { $img_found = true; } else { // Also check style attribute for width: 768px. $style = $img->getAttribute( 'style' ); if ( \preg_match( '/width:\s*768\.00px/i', $style ) || \preg_match( '/width:\s*768px/i', $style ) ) { $img_found = true; } } } } // If we found a suitable image in this paragraph. if ( $img_found && $img_src ) { // This is our cover image - remove it from the body content to avoid duplication. if ( $p->parentNode ) { $p->parentNode->removeChild( $p ); } // Escape the image source URL. $safe_img_src = \esc_url( $img_src ); // Generate the cover block comments. $cover_block = $this->create_block_comments( 'cover', [ 'url' => $safe_img_src, 'id' => 0, 'isDark' => false, 'dimRatio' => 50, ], ); // Generate the paragraph block comments. $paragraph_block = $this->create_block_comments( 'paragraph' ); // Create a cover block with appropriate settings for WordPress. return $cover_block['open'] . \PHP_EOL . '

' . \PHP_EOL . '' . \PHP_EOL . ' $' . \esc_attr( $img_alt ) . '$ ' . \PHP_EOL . '

' . \PHP_EOL . $paragraph_block['open'] . \PHP_EOL . '

' . \PHP_EOL . $paragraph_block['close'] . \PHP_EOL . '

' . \PHP_EOL . '

' . \PHP_EOL . $cover_block['close']; } // Only process the first paragraph that contains an image. if ( $p->getElementsByTagName( 'img' )->length > 0 ) { break; } } return ''; } /** * Creates a code block from the code content. * * @param string $code_content Code content. * * @return string The code block. */ public function create_code_block( string $code_content ): string { // Clean up the code content. $code_content = $this->clean_code_content( $code_content ); // Get block comments. $block = $this->create_block_comments( 'code' ); return $block['open'] . \PHP_EOL . "\t

" . \PHP_EOL
				. "\t\t" . \wp_kses_post( $code_content ) . '' . \PHP_EOL
				. "\t

" . \PHP_EOL . $block['close']; } /** * Sanitizes and cleans up code content by removing HTML artifacts. * * @param string $code_content The raw code content. * * @return string Cleaned code content. */ public function clean_code_content( string $code_content ): string { // Clean up the code content. $code_content = \trim( $code_content ); // Bail if the code content is empty. if ( $code_content === '' ) { return ''; } // Remove HTML tags that might be part of the styling but not the code. $code_content = \preg_replace( '/<\/p>/', \PHP_EOL, $code_content ); $code_content = \preg_replace( '/]*>/', '', $code_content ); // Strip all span tags - first explicitly remove opening and closing tags that might be at beginning/end. $code_content = \preg_replace( '/^<\/?span[^>]*>/', '', $code_content ); $code_content = \preg_replace( '/<\/?span[^>]*>$/', '', $code_content ); // Final cleaup - Balance the tags. $code_content = \force_balance_tags( $code_content ); // Remove the markers for the code block. $code_content = \str_replace( '', '', $code_content ); $code_content = \str_replace( '', '', $code_content ); return $code_content; } /** * Extracts an image from a paragraph element. * * @param DOMElement $paragraph The paragraph element. * * @return string The image block if found, null otherwise. */ public function extract_image_from_paragraph( ?DOMElement $paragraph ): string { if ( $paragraph === null ) { return ''; } // First check for direct img elements. $imgs = $paragraph->getElementsByTagName( 'img' ); // Check for spans that contain images. $spans = $paragraph->getElementsByTagName( 'span' ); if ( $spans->length === 0 ) { return ''; } foreach ( $spans as $span ) { $imgs = $span->getElementsByTagName( 'img' ); if ( $imgs->length > 0 ) { $img = $imgs->item( 0 ); // Get the original image source and alt. $src = $img->getAttribute( 'src' ); $alt = $img->getAttribute( 'alt' ); // Check for style attributes that might indicate image dimensions. $style = $img->getAttribute( 'style' ); $width = null; $height = null; // Parse dimensions from style attribute. if ( \preg_match( '/width:\s*([0-9]+(?:\.[0-9]+)?)(px|%|em|rem|vw|vh)?/', $style, $width_matches ) ) { $width = $width_matches[1]; } if ( \preg_match( '/height:\s*([0-9]+(?:\.[0-9]+)?)(px|%|em|rem|vw|vh)?/', $style, $height_matches ) ) { $height = $height_matches[1]; } // Also check for width/height attributes. if ( ! $width ) { $width = $img->getAttribute( 'width' ); } if ( ! $height ) { $height = $img->getAttribute( 'height' ); } // Build block attributes. $json_attrs = [ 'id' => 0, 'sizeSlug' => 'large', ]; if ( $width && \is_numeric( $width ) ) { $json_attrs['width'] = \absint( $width ); } if ( $height && \is_numeric( $height ) ) { $json_attrs['height'] = \absint( $height ); } $image_block = $this->create_block_comments( 'image', $json_attrs ); // Create the image block. return $image_block['open'] . \PHP_EOL . '

' . \PHP_EOL . $image_block['close']; } } return ''; } /** * Creates a separator block. * * @return string The separator block. */ public function create_separator_block(): string { $block = $this->create_block_comments( 'separator' ); return $block['open'] . \PHP_EOL . "\t

" . \PHP_EOL . $block['close']; } /** * Creates a paragraph with a link. * * @param DOMElement|null $anchor The node element interpreted as an anchor. * * @return string The paragraph block with a link. */ public function create_paragraph_with_link( ?DOMElement $anchor ): string { if ( $anchor === null ) { return ''; } $href = $anchor->getAttribute( 'href' ); $text = $anchor->textContent; $block = $this->create_block_comments( 'paragraph' ); return $block['open'] . \PHP_EOL . '

' . \esc_html( $text ) . '

' . \PHP_EOL . $block['close']; } /** * Creates a paragraph block from a p element. * * @param DOMElement|null $paragraph The paragraph element. * @param DOM_Processor|null $dom_processor The DOM processor instance. * * @return string The paragraph block. */ public function create_paragraph_block( ?DOMElement $paragraph, ?DOM_Processor $dom_processor ): string { if ( $paragraph === null || $dom_processor === null ) { return ''; } $content = $dom_processor->process_inline_elements( $paragraph ); // Check for text alignment in inline styles. $block_attrs = []; $style = $dom_processor->get_style_attribute( $paragraph, false ); // Process alignment from inline style. if ( \preg_match( '/text-align:\s*(center|right|left)/i', $style, $matches ) ) { $block_attrs['align'] = $matches[1]; } // Get block comments with attributes. $block = $this->create_block_comments( 'paragraph', $block_attrs ); // Get the paragraph class and style. $p_class = $dom_processor->get_class_attribute( $paragraph ); $p_style = $dom_processor->get_style_attribute( $paragraph ); return $block['open'] . \PHP_EOL . "\t' . \wp_kses_post( $content ) . '

' . \PHP_EOL . $block['close']; } /** * Creates a table block from a table element. * * @param DOMElement|null $table The table element. * @param DOM_Processor|null $dom_processor The DOM processor instance. * * @return string The table block. */ public function create_table_block( ?DOMElement $table, ?DOM_Processor $dom_processor ): string { if ( $table === null || $dom_processor === null ) { return ''; } // First, get all rows from the table, regardless of where they are. $all_rows = $table->getElementsByTagName( 'tr' ); if ( $all_rows->length === 0 ) { return ''; } // Initialize arrays to store table data. $all_row_data = []; $header_row_count = 0; // Process all rows. foreach ( $all_rows as $row_index => $row ) { $row_data = []; $is_header_row = false; // Check if this is a header row. // Consider it a header if it's in a thead or if it has th cells. $parent = $row->parentNode; if ( $parent && $parent->nodeName === 'thead' ) { $is_header_row = true; } // Get all cells in this row (both th and td). $th_cells = $row->getElementsByTagName( 'th' ); $td_cells = $row->getElementsByTagName( 'td' ); // If there are th cells, this might be a header row. if ( $th_cells->length > 0 ) { $is_header_row = true; } // Process all cells in the row. $cells = ( $th_cells->length > 0 ) ? $th_cells : $td_cells; foreach ( $cells as $cell ) { // Get attributes we want to preserve. $colspan = $cell->getAttribute( 'colspan' ); $rowspan = $cell->getAttribute( 'rowspan' ); // Get the cell's HTML content. $cell_html = $dom_processor->process_inline_elements( $cell ); // Store cell data with attributes. $cell_data = [ 'content' => $cell_html, 'attributes' => [ 'colspan' => ( $colspan !== '' ) ? \absint( $colspan ) : null, 'rowspan' => ( $rowspan !== '' ) ? \absint( $rowspan ) : null, ], ]; $row_data[] = $cell_data; } // Skip empty rows. if ( empty( $row_data ) ) { continue; } // Store the row data with a flag indicating if it's a header. $all_row_data[] = [ 'cells' => $row_data, 'is_header' => $is_header_row, ]; // Count header rows. if ( $is_header_row ) { ++$header_row_count; } } // If we have no data, return empty string. if ( empty( $all_row_data ) ) { return ''; } // Determine max columns considering colspan attributes. $max_columns = 0; foreach ( $all_row_data as $row ) { $row_cols = 0; foreach ( $row['cells'] as $cell ) { $colspan = ( $cell['attributes']['colspan'] !== '' ) ? $cell['attributes']['colspan'] : 1; $row_cols += $colspan; } $max_columns = \max( $max_columns, $row_cols ); } // Build the Gutenberg table block. $block = ''; $block .= '

'; // If we have header rows, add them to thead. if ( $header_row_count > 0 ) { $block .= ''; $header_rows_processed = 0; foreach ( $all_row_data as $row_index => $row ) { if ( $row['is_header'] ) { $block .= ''; foreach ( $row['cells'] as $cell ) { $attrs = ''; if ( $cell['attributes']['colspan'] && $cell['attributes']['colspan'] > 1 ) { $attrs .= ' colspan="' . \esc_attr( $cell['attributes']['colspan'] ) . '"'; } if ( $cell['attributes']['rowspan'] && $cell['attributes']['rowspan'] > 1 ) { $attrs .= ' rowspan="' . \esc_attr( $cell['attributes']['rowspan'] ) . '"'; } $is_cell_empty = \trim( \wp_strip_all_tags( $cell['content'] ) ); if ( $is_cell_empty === '' ) { $cell['content'] = ''; } $block .= '' . \wp_kses_post( $cell['content'] ) . ''; } $block .= ''; ++$header_rows_processed; // Remove the row from all_row_data to avoid duplication. unset( $all_row_data[ $row_index ] ); // If we've processed all header rows, stop. if ( $header_rows_processed >= $header_row_count ) { break; } } } $block .= ''; } // Add tbody with remaining rows. $block .= ''; // If we have no body rows, add an empty row. if ( empty( $all_row_data ) ) { $block .= ''; $block .= \str_repeat( '', $max_columns ); $block .= ''; } else { // Add all remaining rows to tbody (reset array keys first). $all_row_data = \array_values( $all_row_data ); foreach ( $all_row_data as $row ) { $block .= ''; foreach ( $row['cells'] as $cell ) { $attrs = ''; if ( $cell['attributes']['colspan'] && $cell['attributes']['colspan'] > 1 ) { $attrs .= ' colspan="' . \esc_attr( $cell['attributes']['colspan'] ) . '"'; } if ( $cell['attributes']['rowspan'] && $cell['attributes']['rowspan'] > 1 ) { $attrs .= ' rowspan="' . \esc_attr( $cell['attributes']['rowspan'] ) . '"'; } $is_cell_empty = \trim( \wp_strip_all_tags( $cell['content'] ) ); if ( $is_cell_empty === '' ) { $cell['content'] = ''; } $block .= '' . \wp_kses_post( $cell['content'] ) . ''; } $block .= ''; } } $block .= ''; $block .= '

'; $block .= ''; return $block; } /** * Creates a heading block from a heading element. * * @param DOMElement|null $heading The heading element. * @param DOM_Processor|null $dom_processor The DOM processor instance. * * @return string The heading block. */ public function create_heading_block( ?DOMElement $heading, ?DOM_Processor $dom_processor ): string { if ( $heading === null || $dom_processor === null ) { return ''; } $level = \substr( $heading->nodeName, 1, 1 ); // Process inline formatting elements instead of using plain text. $content = $dom_processor->process_inline_elements( $heading ); // Check for text alignment in style attribute. $style = $dom_processor->get_style_attribute( $heading, false ); $block_attrs = [ 'level' => \absint( $level ) ]; $alignment = null; // Process alignment from inline style. if ( \preg_match( '/text-align:\s*(center|right|left)/i', $style, $matches ) ) { $alignment = $matches[1]; $block_attrs['align'] = $alignment; // Remove the text-align property from the style attribute. $style = \preg_replace( '/text-align:\s*(center|right|left)(;|$)/i', '', $style ); $style = \trim( $style, " \t\n\r\0\x0B;" ); } // Get the heading id attribute. $id_attr = $heading->getAttribute( 'id' ); $id_html = ! empty( $id_attr ) ? ' id="' . \wp_kses_data( $id_attr ) . '"' : ''; // Get the heading style attribute. $style_attr = ! empty( $style ) ? ' style="' . \wp_kses_data( $style ) . '"' : ''; // Start with required wp-block-heading class. $classes = 'wp-block-heading'; // Add alignment class if needed. if ( $alignment ) { $classes .= ' has-text-align-' . \esc_attr( $alignment ); } // Get block comments. $block = $this->create_block_comments( 'heading', $block_attrs ); return $block['open'] . \PHP_EOL . "\t' . \wp_kses_post( $content ) . '' . \PHP_EOL . $block['close']; } /** * Creates a list block from a list element (ul or ol). * * @param DOMElement|null $list_element The list element. * @param DOM_Processor|null $dom_processor The DOM processor instance. * * @return string The list block. */ public function create_list_block( ?DOMElement $list_element, ?DOM_Processor $dom_processor ): string { if ( $list_element === null || empty( $list_element ) || $dom_processor === null ) { return ''; } // Determine list type. $is_ordered = $list_element->nodeName === 'ol'; // Create block attributes. $attrs = ( $is_ordered === true ) ? [ 'ordered' => true ] : []; // Get block comments. $block = $this->create_block_comments( 'list', $attrs ); // Start the list block. $output = $block['open'] . \PHP_EOL; $output .= '<' . $list_element->nodeName . ' class="wp-block-list">'; // Process all list items. foreach ( $list_element->childNodes as $child ) { if ( $child->nodeType !== \XML_ELEMENT_NODE || $child->nodeName !== 'li' ) { continue; } // Process this list item. $output .= $this->process_list_item( $child, $dom_processor ); } // Close the list block. $output .= 'nodeName . '>' . \PHP_EOL; $output .= $block['close']; return $output; } /** * Process a list item and its content, including any nested lists. * * @param DOMElement|null $list_item The list item element. * @param DOM_Processor|null $dom_processor The DOM processor instance. * * @return string The processed list item HTML with Gutenberg comments. */ public function process_list_item( ?DOMElement $list_item, ?DOM_Processor $dom_processor ): string { if ( $list_item === null || empty( $list_item ) || $dom_processor === null ) { return ''; } // Start the list item block. $output = '' . \PHP_EOL; $output .= '

'; // Extract nested lists. $nested_lists = []; $content = ''; // First pass: separate content and nested lists. foreach ( $list_item->childNodes as $node ) { // Capture nested lists. if ( $node->nodeType === \XML_ELEMENT_NODE && ( $node->nodeName === 'ul' || $node->nodeName === 'ol' ) ) { $nested_lists[] = $node; } // Process content nodes (text or other elements). elseif ( $node->nodeType === \XML_TEXT_NODE ) { $text = \trim( $node->textContent ); if ( ! empty( $text ) ) { $content .= $text; } } elseif ( $node->nodeType === \XML_ELEMENT_NODE ) { $content .= $dom_processor->get_node_html( $node ); } } // Add the content. $output .= $content; // Process nested lists. foreach ( $nested_lists as $nested_list ) { // Always use unordered list for nested lists. $output .= '' . \PHP_EOL; $output .= '

'; // Process nested list items. foreach ( $nested_list->childNodes as $nested_child ) { if ( $nested_child->nodeType !== \XML_ELEMENT_NODE || $nested_child->nodeName !== 'li' ) { continue; } // Recursively process nested list items. $output .= $this->process_list_item( $nested_child, $dom_processor ); } $output .= '' . \PHP_EOL; $output .= ''; } // Close the list item. $output .= '

' . \PHP_EOL; $output .= ''; return $output; } /** * Creates a bookmark paragraph that can be shared via link. * * @param DOMElement|null $paragraph The paragraph element. * * @return string The bookmark paragraph block if found, null otherwise. */ public function create_bookmark_paragraph( ?DOMElement $paragraph ): string { if ( $paragraph === null ) { return ''; } // Check if this matches our bookmark structure. $spans = $paragraph->getElementsByTagName( 'span' ); $anchors = $paragraph->getElementsByTagName( 'a' ); // Must have exactly one span followed by an anchor with ID. if ( $spans->length === 0 || $anchors->length === 0 ) { return ''; } $span = $spans->item( 0 ); $anchor = $anchors->item( 0 ); // The anchor should have an ID and be empty. $anchor_id = $anchor->getAttribute( 'id' ); // Ensure ID is present and anchor has no text content. // Prevent conflict with footnotes. if ( $anchor_id === '' || \trim( $anchor->textContent ) !== '' ) { return ''; } // Get the span text content. $span_text = \trim( $span->textContent ); if ( $span_text === '' ) { return ''; } // Create a paragraph block with a link and an ID for the bookmark. $block = $this->create_block_comments( 'paragraph' ); return $block['open'] . \PHP_EOL . '

' . '' . \esc_html( $span_text ) . '' . '

' . \PHP_EOL . $block['close']; } }