<?php
/**
 * LLMs.txt Generator module.
 *
 * Serves `/llms.txt` (and optionally `/llms-full.txt`) per the
 * https://llmstxt.org spec — a markdown index of high-value content
 * written for LLM consumption.
 *
 * - `/llms.txt`      always served when the module is enabled. Lists each
 *                    selected post-type's recent posts as markdown bullets
 *                    with title, URL, and excerpt.
 * - `/llms-full.txt` opt-in via the "include full content" toggle. Same
 *                    index PLUS the full plain-text content of each item
 *                    inlined, capped per-post to keep total file size
 *                    sane.
 *
 * URL interception: `init` priority 1 reads REQUEST_URI directly and
 * serves the response with `Content-Type: text/plain; charset=utf-8`,
 * then exits. No rewrite rules registered, no flush_rewrite_rules() needed.
 *
 * No caching in v1 — regenerates on every request. Most client sites have
 * tens to a few hundred posts and that's fast. Add a transient layer if
 * cost ever becomes visible.
 *
 * Filterable for advanced overrides:
 *   - `bw_dev_llms_post_types`         array of post-type slugs offered in the UI
 *   - `bw_dev_llms_txt_content`        final string returned for /llms.txt
 *   - `bw_dev_llms_full_txt_content`   final string returned for /llms-full.txt
 *
 * @package BW_Dev
 */

defined( 'ABSPATH' ) || exit;

class BW_Dev_Module_Llms_Txt implements BW_Dev_Module_Interface {

	private const DEFAULTS = array(
		'title'             => '',
		'summary'           => '',
		'intro'             => '',
		'post_types'        => array( 'post', 'page' ),
		'max_items'         => 50,
		// On by default — fresh installs serve both /llms.txt and /llms-full.txt
		// from day one. Disable only if your content library is large enough
		// that regenerating full text on every request becomes visible.
		'include_full'      => true,
		'max_chars'         => 10000,
	);

	public function slug(): string {
		return 'llms_txt';
	}

	public function label(): string {
		return __( 'LLMs.txt Generator', 'bw-dev' );
	}

	public function group(): string {
		return 'indexing';
	}

	public function default_settings(): array {
		return self::DEFAULTS;
	}

	public function sanitize( array $data ): array {
		$out = array();

		$out['title']   = isset( $data['title'] )   ? sanitize_text_field( wp_unslash( (string) $data['title'] ) ) : '';
		$out['summary'] = isset( $data['summary'] ) ? sanitize_textarea_field( wp_unslash( (string) $data['summary'] ) ) : '';
		$out['intro']   = isset( $data['intro'] )   ? sanitize_textarea_field( wp_unslash( (string) $data['intro'] ) ) : '';

		$valid_pts = $this->available_post_types();
		$selected  = isset( $data['post_types'] ) && is_array( $data['post_types'] ) ? $data['post_types'] : array();
		$out['post_types'] = array();
		foreach ( $selected as $pt ) {
			$pt = sanitize_key( (string) $pt );
			if ( isset( $valid_pts[ $pt ] ) ) {
				$out['post_types'][] = $pt;
			}
		}
		$out['post_types'] = array_values( array_unique( $out['post_types'] ) );

		$out['max_items']    = max( 1, min( 1000,    (int) ( $data['max_items']    ?? self::DEFAULTS['max_items'] ) ) );
		$out['include_full'] = ! empty( $data['include_full'] );
		$out['max_chars']    = max( 100, min( 1000000, (int) ( $data['max_chars']    ?? self::DEFAULTS['max_chars'] ) ) );

		return $out;
	}

	public function register(): void {
		add_action( 'init', array( $this, 'maybe_serve' ), 1 );
	}

	/* ---------------------------------------------------------------------
	 * URL interception
	 * ------------------------------------------------------------------- */

	public function maybe_serve(): void {
		$uri  = isset( $_SERVER['REQUEST_URI'] ) ? wp_unslash( $_SERVER['REQUEST_URI'] ) : '';
		$path = wp_parse_url( (string) $uri, PHP_URL_PATH );
		if ( ! is_string( $path ) ) {
			return;
		}
		$path = strtolower( rtrim( $path, '/' ) );

		if ( '/llms.txt' === $path ) {
			$body = (string) apply_filters( 'bw_dev_llms_txt_content', $this->generate_index() );
			$this->serve_text( $body );
			return;
		}

		if ( '/llms-full.txt' === $path && $this->include_full_enabled() ) {
			$body = (string) apply_filters( 'bw_dev_llms_full_txt_content', $this->generate_full() );
			$this->serve_text( $body );
		}
	}

	private function serve_text( string $body ): void {
		header( 'Content-Type: text/plain; charset=utf-8' );
		header( 'Cache-Control: public, max-age=3600' );
		header( 'X-Robots-Tag: noindex, follow' );
		echo $body; // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- plain text body, not HTML.
		exit;
	}

	private function include_full_enabled(): bool {
		return (bool) bw_dev()->settings()->get( $this->slug(), 'include_full', self::DEFAULTS['include_full'] );
	}

	/* ---------------------------------------------------------------------
	 * Generation
	 * ------------------------------------------------------------------- */

	private function generate_index(): string {
		$out  = $this->header_block();
		$out .= $this->build_sections( false );
		return rtrim( $out ) . "\n";
	}

	private function generate_full(): string {
		$out  = $this->header_block();
		$out .= $this->build_sections( true );
		return rtrim( $out ) . "\n";
	}

	/**
	 * Title + summary + intro at the top of either file.
	 */
	private function header_block(): string {
		$title   = (string) bw_dev()->settings()->get( $this->slug(), 'title',   '' );
		$summary = (string) bw_dev()->settings()->get( $this->slug(), 'summary', '' );
		$intro   = (string) bw_dev()->settings()->get( $this->slug(), 'intro',   '' );

		if ( '' === $title ) {
			$title = (string) get_bloginfo( 'name' );
		}
		if ( '' === $summary ) {
			$summary = (string) get_bloginfo( 'description' );
		}

		$out = '# ' . $title . "\n\n";
		if ( '' !== $summary ) {
			$out .= '> ' . $this->collapse_lines( $summary ) . "\n\n";
		}
		if ( '' !== $intro ) {
			$out .= rtrim( $intro ) . "\n\n";
		}
		return $out;
	}

	/**
	 * One markdown section per enabled post type.
	 */
	private function build_sections( bool $include_full ): string {
		$enabled   = (array) bw_dev()->settings()->get( $this->slug(), 'post_types', self::DEFAULTS['post_types'] );
		$max_items = (int) bw_dev()->settings()->get( $this->slug(), 'max_items',  self::DEFAULTS['max_items'] );
		$max_chars = (int) bw_dev()->settings()->get( $this->slug(), 'max_chars',  self::DEFAULTS['max_chars'] );

		$max_items = max( 1, min( 1000,    $max_items ) );
		$max_chars = max( 100, min( 1000000, $max_chars ) );

		$out = '';
		foreach ( $enabled as $pt ) {
			$pt_obj = get_post_type_object( $pt );
			if ( ! $pt_obj ) {
				continue;
			}
			$posts = get_posts(
				array(
					'post_type'      => $pt,
					'post_status'    => 'publish',
					'posts_per_page' => $max_items,
					'orderby'        => 'modified',
					'order'          => 'DESC',
					'no_found_rows'  => true,
				)
			);
			if ( empty( $posts ) ) {
				continue;
			}

			$out .= '## ' . $pt_obj->labels->name . "\n\n";

			foreach ( $posts as $post ) {
				$url       = get_permalink( $post );
				$title     = get_the_title( $post );
				$seo_title = $this->seo_title( $post );
				$seo_desc  = $this->seo_description( $post );
				$excerpt   = $this->bullet_description( $post, $seo_desc );

				$out .= sprintf(
					'- [%1$s](%2$s)%3$s' . "\n",
					$this->collapse_lines( $title ),
					(string) $url,
					'' !== $excerpt ? ': ' . $this->collapse_lines( $excerpt ) : ''
				);

				if ( $include_full ) {
					$out .= "\n";
					if ( '' !== $seo_title && 0 !== strcasecmp( $seo_title, $title ) ) {
						$out .= '  SEO Title: ' . $this->collapse_lines( $seo_title ) . "\n";
					}
					if ( '' !== $seo_desc ) {
						$out .= '  SEO Description: ' . $this->collapse_lines( $seo_desc ) . "\n";
					}
					$out .= '  Published: ' . get_the_date( 'Y-m-d', $post ) . ', updated ' . get_the_modified_date( 'Y-m-d', $post ) . "\n\n";
					$out .= $this->indent( $this->build_full_text( $post, $max_chars ), '  ' ) . "\n\n";
				}
			}
			wp_reset_postdata();

			$out .= "\n";
		}
		return $out;
	}

	/**
	 * One-line description used after `- [Title](url):` in both files.
	 * Prefers an already-fetched SEO meta description (hand-crafted summary),
	 * falls back to the post excerpt, then to an auto-trimmed body snippet.
	 *
	 * @param WP_Post $post
	 * @param string  $seo_desc Already-resolved SEO description (or '').
	 */
	private function bullet_description( $post, string $seo_desc ): string {
		if ( '' !== $seo_desc ) {
			return trim( html_entity_decode( wp_strip_all_tags( $seo_desc ), ENT_QUOTES | ENT_HTML5, 'UTF-8' ) );
		}
		$raw = (string) $post->post_excerpt;
		if ( '' === trim( $raw ) ) {
			$raw = wp_trim_words( wp_strip_all_tags( (string) $post->post_content ), 30, '…' );
		}
		return trim( html_entity_decode( wp_strip_all_tags( $raw ), ENT_QUOTES | ENT_HTML5, 'UTF-8' ) );
	}

	/**
	 * Read the SEO meta title from whichever SEO plugin set it. Returns ''
	 * if none is set, or if the value still contains unresolved `%%` template
	 * placeholders (we don't try to expand those — the post title is fine).
	 *
	 * Order: Yoast → Rank Math → SEOPress → AIOSEO.
	 */
	private function seo_title( $post ): string {
		$keys = array(
			'_yoast_wpseo_title',     // Yoast SEO
			'rank_math_title',        // Rank Math
			'_seopress_titles_title', // SEOPress
			'_aioseo_title',          // All in One SEO
		);
		foreach ( $keys as $key ) {
			$val = get_post_meta( $post->ID, $key, true );
			if ( ! is_string( $val ) || '' === trim( $val ) ) {
				continue;
			}
			// Skip Yoast/RankMath template placeholders (e.g. "%%title%% %%sitename%%").
			if ( false !== strpos( $val, '%%' ) ) {
				continue;
			}
			return trim( $val );
		}
		return '';
	}

	/**
	 * Read the SEO meta description from whichever SEO plugin set it.
	 *
	 * Order: Yoast → Rank Math → SEOPress → AIOSEO.
	 */
	private function seo_description( $post ): string {
		$keys = array(
			'_yoast_wpseo_metadesc',  // Yoast SEO
			'rank_math_description',  // Rank Math
			'_seopress_titles_desc',  // SEOPress
			'_aioseo_description',    // All in One SEO
		);
		foreach ( $keys as $key ) {
			$val = get_post_meta( $post->ID, $key, true );
			if ( ! is_string( $val ) || '' === trim( $val ) ) {
				continue;
			}
			if ( false !== strpos( $val, '%%' ) ) {
				continue;
			}
			return trim( $val );
		}
		return '';
	}

	/**
	 * Plain-text version of a post's full content, capped at $max chars.
	 */
	private function build_full_text( $post, int $max ): string {
		// Run the_content filter so blocks + shortcodes are expanded.
		$GLOBALS['post'] = $post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited
		setup_postdata( $post );
		$html = (string) apply_filters( 'the_content', $post->post_content );
		wp_reset_postdata();

		$text = wp_strip_all_tags( $html, true );
		$text = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5, 'UTF-8' );
		$text = (string) preg_replace( "/\n{3,}/", "\n\n", $text );
		$text = trim( $text );

		if ( strlen( $text ) > $max ) {
			$text = substr( $text, 0, $max );
			// Trim back to the last word boundary if it's not too costly.
			$cut = strrpos( $text, ' ' );
			if ( false !== $cut && $cut > $max * 0.8 ) {
				$text = substr( $text, 0, $cut );
			}
			$text .= "\n\n[content truncated]";
		}
		return $text;
	}

	private function collapse_lines( string $s ): string {
		$s = (string) preg_replace( "/\s+/u", ' ', $s );
		return trim( $s );
	}

	private function indent( string $text, string $prefix ): string {
		$lines = preg_split( "/\r?\n/", $text );
		if ( ! is_array( $lines ) ) {
			return $text;
		}
		return implode( "\n", array_map( static function ( $line ) use ( $prefix ) {
			return '' === $line ? '' : $prefix . $line;
		}, $lines ) );
	}

	/* ---------------------------------------------------------------------
	 * Settings tab
	 * ------------------------------------------------------------------- */

	/**
	 * Public post-types available for inclusion. Filterable so a site can
	 * add a private CPT (e.g. internal docs) it explicitly wants exposed.
	 */
	public function available_post_types(): array {
		$types = get_post_types( array( 'public' => true ), 'objects' );
		// Strip attachments — pages of media are not useful to LLMs.
		unset( $types['attachment'] );
		$out = array();
		foreach ( $types as $slug => $obj ) {
			$out[ $slug ] = $obj->labels->name;
		}
		return (array) apply_filters( 'bw_dev_llms_post_types', $out );
	}

	public function render_tab(): void {
		$prefix     = BW_Dev_Settings::OPTION . '[' . $this->slug() . ']';
		$title      = (string) bw_dev()->settings()->get( $this->slug(), 'title',        '' );
		$summary    = (string) bw_dev()->settings()->get( $this->slug(), 'summary',      '' );
		$intro      = (string) bw_dev()->settings()->get( $this->slug(), 'intro',        '' );
		$enabled_pt = (array)  bw_dev()->settings()->get( $this->slug(), 'post_types',   self::DEFAULTS['post_types'] );
		$max_items  = (int)    bw_dev()->settings()->get( $this->slug(), 'max_items',    self::DEFAULTS['max_items'] );
		$inc_full   = (bool)   bw_dev()->settings()->get( $this->slug(), 'include_full', self::DEFAULTS['include_full'] );
		$max_chars  = (int)    bw_dev()->settings()->get( $this->slug(), 'max_chars',    self::DEFAULTS['max_chars'] );
		$pts        = $this->available_post_types();
		$llms_url   = home_url( '/llms.txt' );
		$full_url   = home_url( '/llms-full.txt' );
		?>
		<p class="description">
			<?php esc_html_e( 'Generates an LLM-friendly markdown index at /llms.txt (per the llmstxt.org spec) — and optionally /llms-full.txt with the full content of each listed item inlined.', 'bw-dev' ); ?>
		</p>

		<div style="background:#fff;border-left:4px solid #2271b1;padding:12px 16px;margin:14px 0;max-width:720px;">
			<strong><?php esc_html_e( 'Live URLs', 'bw-dev' ); ?></strong>
			<p style="margin:6px 0 0;">
				<?php esc_html_e( 'Index:', 'bw-dev' ); ?>
				<a href="<?php echo esc_url( $llms_url ); ?>" target="_blank" rel="noopener"><code><?php echo esc_html( $llms_url ); ?></code></a>
				<?php if ( $inc_full ) : ?>
					<br /><?php esc_html_e( 'Full:', 'bw-dev' ); ?>
					<a href="<?php echo esc_url( $full_url ); ?>" target="_blank" rel="noopener"><code><?php echo esc_html( $full_url ); ?></code></a>
				<?php endif; ?>
			</p>
		</div>

		<table class="form-table" role="presentation">
			<tbody>
				<tr>
					<th scope="row">
						<label for="bw-dev-llms-title"><?php esc_html_e( 'Title', 'bw-dev' ); ?></label>
					</th>
					<td>
						<input type="text" id="bw-dev-llms-title" name="<?php echo esc_attr( $prefix . '[title]' ); ?>" value="<?php echo esc_attr( $title ); ?>" class="regular-text" placeholder="<?php echo esc_attr( get_bloginfo( 'name' ) ); ?>" />
						<p class="description"><?php esc_html_e( 'Heading at the top of llms.txt. Leave blank to use the site name.', 'bw-dev' ); ?></p>
					</td>
				</tr>
				<tr>
					<th scope="row">
						<label for="bw-dev-llms-summary"><?php esc_html_e( 'Summary', 'bw-dev' ); ?></label>
					</th>
					<td>
						<textarea id="bw-dev-llms-summary" name="<?php echo esc_attr( $prefix . '[summary]' ); ?>" rows="2" class="large-text" placeholder="<?php echo esc_attr( get_bloginfo( 'description' ) ); ?>"><?php echo esc_textarea( $summary ); ?></textarea>
						<p class="description"><?php esc_html_e( 'Rendered as a markdown blockquote (>) under the title. One or two sentences. Leave blank to use the site tagline.', 'bw-dev' ); ?></p>
					</td>
				</tr>
				<tr>
					<th scope="row">
						<label for="bw-dev-llms-intro"><?php esc_html_e( 'Intro markdown (optional)', 'bw-dev' ); ?></label>
					</th>
					<td>
						<textarea id="bw-dev-llms-intro" name="<?php echo esc_attr( $prefix . '[intro]' ); ?>" rows="4" class="large-text code"><?php echo esc_textarea( $intro ); ?></textarea>
						<p class="description"><?php esc_html_e( 'Free-form markdown inserted between the summary and the post-type sections. Useful for "About this site" copy oriented at LLMs.', 'bw-dev' ); ?></p>
					</td>
				</tr>
				<tr>
					<th scope="row"><?php esc_html_e( 'Include post types', 'bw-dev' ); ?></th>
					<td>
						<fieldset>
							<?php foreach ( $pts as $pt_slug => $pt_label ) :
								$id = 'bw-dev-llms-pt-' . sanitize_html_class( $pt_slug );
								?>
								<label for="<?php echo esc_attr( $id ); ?>" style="display:inline-block;margin-right:14px;">
									<input type="checkbox" id="<?php echo esc_attr( $id ); ?>" name="<?php echo esc_attr( $prefix . '[post_types][]' ); ?>" value="<?php echo esc_attr( $pt_slug ); ?>" <?php checked( in_array( $pt_slug, $enabled_pt, true ) ); ?> />
									<?php echo esc_html( $pt_label ); ?>
									<span style="color:#646970;font-size:11px;">(<?php echo esc_html( $pt_slug ); ?>)</span>
								</label>
							<?php endforeach; ?>
						</fieldset>
						<p class="description"><?php esc_html_e( 'Each enabled post type becomes a section in llms.txt with its most-recently-updated entries.', 'bw-dev' ); ?></p>
					</td>
				</tr>
				<tr>
					<th scope="row">
						<label for="bw-dev-llms-max-items"><?php esc_html_e( 'Max items per post type', 'bw-dev' ); ?></label>
					</th>
					<td>
						<input type="number" min="1" max="1000" id="bw-dev-llms-max-items" name="<?php echo esc_attr( $prefix . '[max_items]' ); ?>" value="<?php echo esc_attr( (string) $max_items ); ?>" class="small-text" />
						<p class="description"><?php esc_html_e( 'Cap on how many entries from each post type appear in the file. 1–1000.', 'bw-dev' ); ?></p>
					</td>
				</tr>
				<tr>
					<th scope="row"><?php esc_html_e( 'Also serve /llms-full.txt', 'bw-dev' ); ?></th>
					<td>
						<label for="bw-dev-llms-include-full">
							<input type="checkbox" id="bw-dev-llms-include-full" name="<?php echo esc_attr( $prefix . '[include_full]' ); ?>" value="1" <?php checked( $inc_full ); ?> />
							<?php esc_html_e( 'Generate the companion file with full content inlined (on by default)', 'bw-dev' ); ?>
						</label>
						<p class="description"><?php esc_html_e( 'When on, /llms-full.txt is served alongside /llms.txt. Same index but each entry has the full plain-text content beneath it. Larger file; LLMs that want context (not just URLs) prefer this. Both files regenerate on every request, so they always reflect current content.', 'bw-dev' ); ?></p>
					</td>
				</tr>
				<tr>
					<th scope="row">
						<label for="bw-dev-llms-max-chars"><?php esc_html_e( 'Max characters per post (full mode)', 'bw-dev' ); ?></label>
					</th>
					<td>
						<input type="number" min="100" max="1000000" id="bw-dev-llms-max-chars" name="<?php echo esc_attr( $prefix . '[max_chars]' ); ?>" value="<?php echo esc_attr( (string) $max_chars ); ?>" class="regular-text" style="width:120px;" />
						<p class="description"><?php esc_html_e( 'In /llms-full.txt only. Each post\'s body is truncated to this length (at the nearest word boundary) so the total file stays manageable. 100–1,000,000.', 'bw-dev' ); ?></p>
					</td>
				</tr>
			</tbody>
		</table>

		<h3><?php esc_html_e( 'How it works', 'bw-dev' ); ?></h3>
		<ul style="list-style:disc;margin-left:20px;">
			<li><?php esc_html_e( 'Output format: markdown, served as text/plain so browsers display it cleanly. 1-hour Cache-Control.', 'bw-dev' ); ?></li>
			<li><?php esc_html_e( 'Files are regenerated on every request — no caching layer in this version. Most sites have under 200 posts and the cost is negligible.', 'bw-dev' ); ?></li>
			<li><?php esc_html_e( 'X-Robots-Tag: noindex, follow — these files exist for LLM consumption, not search-engine indexing.', 'bw-dev' ); ?></li>
			<li><?php esc_html_e( 'Filterable: bw_dev_llms_post_types, bw_dev_llms_txt_content, bw_dev_llms_full_txt_content.', 'bw-dev' ); ?></li>
		</ul>
		<?php
	}

	public function uninstall(): void {
		// Settings live under bw_dev_settings — root option drop covers them.
	}
}
