<?php
defined( 'ABSPATH' ) || exit;

/**
 * Settings: defaults, registration, sanitization, getters.
 *
 * All mapping-style settings share one format:
 *     label : value1, value2, value3
 *
 * Sections:
 *   - parameter_aliases         : dimension key : URL param aliases (non-standard keys are custom dims)
 *   - referrer_classification   : medium name  : referrer hostnames (first match sets visit medium)
 *   - click_ids                 : source/medium : click-ID param names
 *   - channels                  : channel label : source/medium pairs
 */
class BW_Lead_AI_Settings {

	const OPTION_GROUP = 'bw_lead_ai_option_group';

	/** Standard dimension keys that map to built-in visit fields. */
	const STANDARD_ALIAS_KEYS = array( 'source', 'medium', 'campaign', 'term', 'content', 'adgroup' );

	/** Merge tags reserved by the plugin — custom dimension keys must not collide with these. */
	const RESERVED_TAG_NAMES = array(
		'source_medium', 'channel', 'first_channel', 'first_source', 'first_medium',
		'first_page', 'last_page', 'submit_page', 'visits', 'pages', 'tagged_visits',
		'summary', 'summary_detailed',
	);

	private static $instance = null;

	public static function instance() {
		if ( null === self::$instance ) {
			self::$instance = new self();
		}
		return self::$instance;
	}

	public function register() {
		add_action( 'admin_init', array( $this, 'register_settings' ) );
		add_action( 'admin_init', array( $this, 'maybe_seed_defaults' ) );
	}

	/**
	 * Defaults for the main settings option.
	 */
	public static function defaults() {
		return array(
			'parameter_aliases'       => self::default_parameter_aliases_text(),
			'referrer_classification' => self::default_referrer_classification_text(),
			'click_ids'               => self::default_click_ids_text(),
			'channels'                => self::default_channels_text(),

			// Form field targets (legacy non-GF support).
			'field_targets'           => array(
				'summary'      => array( 'attr' => '', 'val' => '' ),
				'source'       => array( 'attr' => '', 'val' => '' ),
				'medium'       => array( 'attr' => '', 'val' => '' ),
				'sources'      => array( 'attr' => '', 'val' => '' ),
				'terms'        => array( 'attr' => '', 'val' => '' ),
				'first_page'   => array( 'attr' => '', 'val' => '' ),
			),

			// Merging behavior.
			'source_medium_separator' => ' / ',

			'debug'                   => 0,
		);
	}

	public function register_settings() {
		register_setting(
			self::OPTION_GROUP,
			BW_LEAD_AI_OPTION,
			array(
				'type'              => 'array',
				'sanitize_callback' => array( $this, 'sanitize' ),
				'default'           => self::defaults(),
			)
		);
		register_setting(
			self::OPTION_GROUP . '_utm',
			BW_LEAD_AI_UTM_OPTION,
			array(
				'type'              => 'array',
				'sanitize_callback' => array( $this, 'sanitize_utm' ),
				'default'           => array(),
			)
		);
	}

	public function maybe_seed_defaults() {
		if ( false === get_option( BW_LEAD_AI_OPTION ) ) {
			update_option( BW_LEAD_AI_OPTION, self::defaults() );
		}
	}

	public function sanitize( $input ) {
		$defaults = self::defaults();
		if ( ! is_array( $input ) ) {
			return $defaults;
		}
		$out = array();

		// Parameter aliases can arrive either combined (hidden input passthrough from
		// other tabs) or split across three fields (Settings tab: dedicated source and
		// medium inputs plus a textarea for everything else). Rebuild the combined
		// string when the split form is present so that source/medium always exist as
		// the first two rows.
		$has_split = isset( $input['parameter_aliases_source'] )
			|| isset( $input['parameter_aliases_medium'] )
			|| isset( $input['parameter_aliases_other'] );
		if ( $has_split ) {
			$out['parameter_aliases'] = $this->build_parameter_aliases_from_split( $input, $defaults );
		} elseif ( isset( $input['parameter_aliases'] ) ) {
			$out['parameter_aliases'] = $this->sanitize_multiline( $input['parameter_aliases'] );
		} else {
			$out['parameter_aliases'] = $defaults['parameter_aliases'];
		}

		$out['referrer_classification'] = isset( $input['referrer_classification'] )
			? $this->sanitize_multiline( $input['referrer_classification'] )
			: $defaults['referrer_classification'];

		$out['click_ids'] = isset( $input['click_ids'] )
			? $this->sanitize_multiline( $input['click_ids'] )
			: $defaults['click_ids'];

		$out['channels'] = isset( $input['channels'] )
			? $this->sanitize_multiline( $input['channels'] )
			: $defaults['channels'];

		// Preserve spaces in the separator — `sanitize_text_field` trims whitespace,
		// which would silently convert the default " / " into "/" on first save.
		$out['source_medium_separator'] = isset( $input['source_medium_separator'] )
			? $this->sanitize_separator( $input['source_medium_separator'] )
			: $defaults['source_medium_separator'];

		$out['debug'] = ! empty( $input['debug'] ) ? 1 : 0;

		// Field targets.
		$out['field_targets'] = $defaults['field_targets'];
		if ( isset( $input['field_targets'] ) && is_array( $input['field_targets'] ) ) {
			foreach ( $out['field_targets'] as $key => $def ) {
				if ( isset( $input['field_targets'][ $key ] ) && is_array( $input['field_targets'][ $key ] ) ) {
					$attr = isset( $input['field_targets'][ $key ]['attr'] ) ? sanitize_key( $input['field_targets'][ $key ]['attr'] ) : '';
					$val  = isset( $input['field_targets'][ $key ]['val'] ) ? sanitize_text_field( $input['field_targets'][ $key ]['val'] ) : '';
					if ( ! in_array( $attr, array( '', 'id', 'class', 'name', 'selector' ), true ) ) {
						$attr = '';
					}
					$out['field_targets'][ $key ] = array( 'attr' => $attr, 'val' => $val );
				}
			}
		}

		$out['parameter_aliases'] = $this->validate_parameter_aliases( $out['parameter_aliases'] );

		return $out;
	}

	/**
	 * Check parameter_aliases for duplicate labels and custom dim keys that
	 * collide with reserved merge-tag names. Emits settings errors for problems
	 * and returns a cleaned string with any invalid rows stripped, so the stored
	 * option never carries rows the parser would ignore anyway.
	 */
	private function validate_parameter_aliases( $text ) {
		$lines = preg_split( '/\r\n|\r|\n/', (string) $text );
		if ( empty( $lines ) ) {
			return $text;
		}
		$seen     = array();
		$reserved = self::RESERVED_TAG_NAMES;
		$out      = array();
		foreach ( $lines as $line ) {
			$trim = trim( $line );
			if ( '' === $trim ) {
				continue;
			}
			$split_pos = self::find_label_separator( $trim );
			if ( false === $split_pos ) {
				// Keep lines the parser can't understand — users may be mid-edit.
				$out[] = $line;
				continue;
			}
			$label = sanitize_key( trim( substr( $trim, 0, $split_pos ) ) );
			if ( '' === $label ) {
				continue;
			}
			if ( isset( $seen[ $label ] ) ) {
				add_settings_error(
					BW_LEAD_AI_OPTION,
					'bw_lead_ai_alias_duplicate_' . $label,
					sprintf(
						/* translators: %s is a label key. */
						esc_html__( 'Parameter Aliases: duplicate label "%s" — each label may only appear once. The extra row was dropped.', 'bw-lead-ai' ),
						esc_html( $label )
					)
				);
				continue;
			}
			$seen[ $label ] = 1;

			if ( ! in_array( $label, self::STANDARD_ALIAS_KEYS, true )
				&& in_array( $label, $reserved, true ) ) {
				add_settings_error(
					BW_LEAD_AI_OPTION,
					'bw_lead_ai_alias_reserved_' . $label,
					sprintf(
						/* translators: %s is a label key. */
						esc_html__( 'Parameter Aliases: "%s" is a reserved merge tag name and cannot be used as a custom dimension key. The row was dropped — pick a different key.', 'bw-lead-ai' ),
						esc_html( $label )
					)
				);
				continue;
			}
			$out[] = $line;
		}
		return implode( "\n", $out );
	}

	public function sanitize_utm( $input ) {
		if ( ! is_array( $input ) ) {
			return array();
		}
		$out = array();
		foreach ( $input as $item ) {
			if ( ! is_array( $item ) ) {
				continue;
			}
			$out[] = array(
				'link_to_uri'  => isset( $item['link_to_uri'] ) ? esc_url_raw( $item['link_to_uri'] ) : '',
				'utm_source'   => isset( $item['utm_source'] ) ? sanitize_text_field( $item['utm_source'] ) : '',
				'utm_medium'   => isset( $item['utm_medium'] ) ? sanitize_text_field( $item['utm_medium'] ) : '',
				'utm_campaign' => isset( $item['utm_campaign'] ) ? sanitize_text_field( $item['utm_campaign'] ) : '',
				'utm_term'     => isset( $item['utm_term'] ) ? sanitize_text_field( $item['utm_term'] ) : '',
				'utm_content'  => isset( $item['utm_content'] ) ? sanitize_text_field( $item['utm_content'] ) : '',
				'note'         => isset( $item['note'] ) ? sanitize_textarea_field( $item['note'] ) : '',
			);
		}
		return $out;
	}

	/**
	 * Build the parameter_aliases string from the Settings tab's split inputs.
	 * Source and medium always occupy the first two rows; if the user clears
	 * them they fall back to the default values so visit capture keeps working.
	 */
	private function build_parameter_aliases_from_split( $input, $defaults ) {
		$source_raw = isset( $input['parameter_aliases_source'] ) ? (string) $input['parameter_aliases_source'] : '';
		$medium_raw = isset( $input['parameter_aliases_medium'] ) ? (string) $input['parameter_aliases_medium'] : '';
		$other_raw  = isset( $input['parameter_aliases_other'] )  ? (string) $input['parameter_aliases_other']  : '';

		$source_csv = $this->sanitize_csv( $source_raw );
		$medium_csv = $this->sanitize_csv( $medium_raw );

		if ( '' === $source_csv ) {
			$defaults_aliases = self::parse_parameter_aliases( $defaults['parameter_aliases'] );
			$source_csv       = implode( ', ', $defaults_aliases['standard']['source'] );
		}
		if ( '' === $medium_csv ) {
			$defaults_aliases = isset( $defaults_aliases ) ? $defaults_aliases : self::parse_parameter_aliases( $defaults['parameter_aliases'] );
			$medium_csv       = implode( ', ', $defaults_aliases['standard']['medium'] );
		}

		$lines   = array( 'source : ' . $source_csv, 'medium : ' . $medium_csv );
		$cleaned = $this->sanitize_multiline( $other_raw );
		if ( '' !== $cleaned ) {
			// Drop any user-typed source/medium rows from the "other" textarea — those
			// always come from the dedicated inputs and must not appear twice.
			foreach ( preg_split( '/\n/', $cleaned ) as $line ) {
				$parts = explode( ':', $line, 2 );
				if ( count( $parts ) === 2 ) {
					$label = sanitize_key( trim( $parts[0] ) );
					if ( 'source' === $label || 'medium' === $label ) {
						continue;
					}
				}
				$lines[] = $line;
			}
		}
		return implode( "\n", $lines );
	}

	/**
	 * Minimal sanitizer for the source/medium separator: strip tags, drop control
	 * characters, but preserve leading/trailing whitespace so users can keep
	 * separators like " / " or " • " intact. Avoids `sanitize_text_field` and
	 * `wp_strip_all_tags` because both of those trim whitespace.
	 */
	private function sanitize_separator( $val ) {
		$val = (string) $val;
		$val = wp_check_invalid_utf8( $val );
		// Strip tags without trimming. `strip_tags` leaves whitespace alone.
		$val = preg_replace( '@<(script|style)[^>]*?>.*?</\\1>@si', '', $val );
		$val = strip_tags( $val );
		// Drop control characters (including \r\n\t) but keep the space (0x20).
		$val = preg_replace( '/[\x00-\x1F\x7F]/u', '', $val );
		return $val;
	}

	private function sanitize_csv( $val ) {
		$parts = array_map( 'trim', explode( ',', (string) $val ) );
		$parts = array_filter( $parts, 'strlen' );
		$parts = array_map( 'sanitize_text_field', $parts );
		return implode( ', ', $parts );
	}

	private function sanitize_multiline( $val ) {
		$val   = (string) $val;
		$lines = preg_split( '/\r\n|\r|\n/', $val );
		$clean = array();
		foreach ( $lines as $line ) {
			$line = trim( $line );
			if ( '' === $line ) {
				continue;
			}
			$clean[] = sanitize_text_field( $line );
		}
		return implode( "\n", $clean );
	}

	/**
	 * Get the full settings array, merged with defaults.
	 */
	public static function get() {
		$opt = get_option( BW_LEAD_AI_OPTION, array() );
		if ( ! is_array( $opt ) ) {
			$opt = array();
		}
		return wp_parse_args( $opt, self::defaults() );
	}

	/**
	 * Parse "label : value1, value2, ..." multiline text into an ordered list.
	 * Returns: [ [ 'label' => '...', 'values' => [ '...', ... ] ], ... ]
	 *
	 * The label separator is the first `:` that sits outside any `{...}` group,
	 * so labels like `{bw:source}` (used in channel rules) parse correctly even
	 * though they contain a literal `:` inside the braces.
	 */
	public static function parse_labeled_list( $text ) {
		$rows  = array();
		$lines = preg_split( '/\r\n|\r|\n/', (string) $text );
		foreach ( $lines as $line ) {
			$line = trim( $line );
			if ( '' === $line || '#' === substr( $line, 0, 1 ) ) {
				continue;
			}
			$split_pos = self::find_label_separator( $line );
			if ( false === $split_pos ) {
				continue;
			}
			$label  = trim( substr( $line, 0, $split_pos ) );
			$rest   = substr( $line, $split_pos + 1 );
			$values = array();
			foreach ( array_map( 'trim', explode( ',', $rest ) ) as $val ) {
				if ( '' !== $val ) {
					$values[] = $val;
				}
			}
			if ( '' === $label || empty( $values ) ) {
				continue;
			}
			$rows[] = array( 'label' => $label, 'values' => $values );
		}
		return $rows;
	}

	/**
	 * Find the position of the first `:` that sits outside any `{...}` group.
	 * Returns false if no such separator exists on the line.
	 */
	private static function find_label_separator( $line ) {
		$depth = 0;
		$len   = strlen( $line );
		for ( $i = 0; $i < $len; $i++ ) {
			$ch = $line[ $i ];
			if ( '{' === $ch ) {
				$depth++;
			} elseif ( '}' === $ch ) {
				if ( $depth > 0 ) {
					$depth--;
				}
			} elseif ( ':' === $ch && 0 === $depth ) {
				return $i;
			}
		}
		return false;
	}

	/**
	 * Parse parameter_aliases into standard dimensions + custom dimensions.
	 *
	 * Returns: [
	 *     'standard' => [ source => [...], medium => [...], ... ],
	 *     'custom'   => [ key    => [...], ... ],
	 * ]
	 *
	 * Rows whose label is one of STANDARD_ALIAS_KEYS populate the standard
	 * bucket. Any other label becomes a custom dimension exposed via
	 * `{bw:<label>}`. Rows with reserved merge-tag labels are dropped.
	 */
	public static function parse_parameter_aliases( $text ) {
		$out  = array( 'standard' => array(), 'custom' => array() );
		$rows = self::parse_labeled_list( $text );
		foreach ( $rows as $row ) {
			$key = sanitize_key( $row['label'] );
			if ( '' === $key || isset( $out['standard'][ $key ] ) || isset( $out['custom'][ $key ] ) ) {
				continue;
			}
			if ( in_array( $key, self::STANDARD_ALIAS_KEYS, true ) ) {
				$out['standard'][ $key ] = $row['values'];
				continue;
			}
			if ( in_array( $key, self::RESERVED_TAG_NAMES, true ) ) {
				continue;
			}
			$out['custom'][ $key ] = $row['values'];
		}
		// Ensure every standard key exists so the frontend config is complete.
		foreach ( self::STANDARD_ALIAS_KEYS as $std ) {
			if ( ! isset( $out['standard'][ $std ] ) ) {
				$out['standard'][ $std ] = array();
			}
		}
		return $out;
	}

	/**
	 * Parse referrer_classification into an ordered list of classes.
	 * Returns: [ [ 'medium' => 'organic', 'sources' => [ 'google', ... ] ], ... ]
	 */
	public static function parse_referrer_classification( $text ) {
		$out  = array();
		$rows = self::parse_labeled_list( $text );
		foreach ( $rows as $row ) {
			$medium = sanitize_key( $row['label'] );
			if ( '' === $medium ) {
				continue;
			}
			$out[] = array( 'medium' => $medium, 'sources' => $row['values'] );
		}
		return $out;
	}

	/**
	 * Parse click_ids text into a list of [param, source, medium] rows.
	 *
	 * Format per line: `source/medium : param1, param2, ...`
	 * One output row per param so the JS lookup stays the same as before.
	 */
	public static function parse_click_ids( $text ) {
		$out  = array();
		$rows = self::parse_labeled_list( $text );
		foreach ( $rows as $row ) {
			$pair = array_map( 'trim', explode( '/', $row['label'], 2 ) );
			if ( count( $pair ) !== 2 || '' === $pair[0] || '' === $pair[1] ) {
				continue;
			}
			$source = $pair[0];
			$medium = $pair[1];
			foreach ( $row['values'] as $param ) {
				$param = sanitize_key( $param );
				if ( '' === $param ) {
					continue;
				}
				$out[] = array(
					'param'  => $param,
					'source' => $source,
					'medium' => $medium,
				);
			}
		}
		return $out;
	}

	/**
	 * Default parameter aliases text (standard dimensions only).
	 */
	public static function default_parameter_aliases_text() {
		return implode( "\n", array(
			'source   : utm_source, source, src, ctm_source',
			'medium   : utm_medium, medium, med, ctm_medium',
			'campaign : utm_campaign, campaign, cmp, ctm_campaign',
			'term     : utm_term, trm, term, key, keyword, ctm_term',
			'content  : utm_content, content, cnt, creative, crv, ctm_content',
			'adgroup  : utm_adgroup, adgroup, adg, ctm_adgroup',
		) );
	}

	/**
	 * Default referrer classification text. Labels are the medium the visit
	 * will be assigned when its referrer host matches one of the values.
	 *
	 * This is only a fallback — explicit UTMs or click-IDs always win.
	 */
	public static function default_referrer_classification_text() {
		return implode( "\n", array(
			'organic : google, yahoo, bing, duckduckgo, ecosia',
			'social  : facebook, instagram, youtube, twitter, x.com, linkedin, pinterest, tiktok, reddit',
		) );
	}

	/**
	 * Default click-ID inference text. Label is the source/medium the visit
	 * will be assigned when any of the listed params is present.
	 */
	public static function default_click_ids_text() {
		return implode( "\n", array(
			'google/cpc         : gclid, gclsrc, gbraid, wbraid',
			'facebook/social    : fbclid',
			'bing/cpc           : msclkid',
			'doubleclick/display: dclid',
			'tiktok/cpc         : ttclid',
			'linkedin/cpc       : li_fat_id',
			'twitter/cpc        : twclid',
			'yandex/cpc         : yclid',
		) );
	}

	/**
	 * Default channel mapping rules. Order matters: first match wins.
	 *
	 * Format per line: `Label : source1/medium1, source2/medium2, ...`
	 * `*` matches any value. `{bw:source}` / `{bw:medium}` in the label are
	 * substituted with the actual visit values at render time.
	 */
	public static function default_channels_text() {
		return implode( "\n", array(
			'Google Ads : google/cpc, google/ppc, google/paid, google/display, google/shopping, google/video',
			'Bing Ads : bing/cpc, microsoft/cpc',
			'Facebook Ads : facebook/cpc, facebook/paid, meta/cpc, meta/paid',
			'Instagram Ads : instagram/cpc, instagram/paid',
			'LinkedIn Ads : linkedin/cpc, linkedin/paid',
			'TikTok Ads : tiktok/cpc, tiktok/paid',
			'YouTube Ads : youtube/cpc, youtube/paid, youtube/video',
			'Twitter Ads : twitter/cpc, x/cpc',
			'Google Organic : google/organic',
			'Bing Organic : bing/organic',
			'DuckDuckGo : duckduckgo/organic',
			'Yahoo : yahoo/organic',
			'Organic Search : */organic',
			'Email : */email, */newsletter, */e-mail',
			'Social : */social',
			'Display : */display, */banner',
			'Affiliate : */affiliate',
			'Direct : (direct)/(none)',
			'{bw:source} : */referral',
			'Unknown : (none)/(none), (unknown)/(unknown), (not set)/(not set)',
		) );
	}

	/**
	 * Parse channels text into an ordered list of rules.
	 *
	 * Returns: [ [ 'label' => '...', 'patterns' => [ [source, medium], ... ] ], ... ]
	 * Each pattern token uses '*' for wildcard. '{bw:source}' and '{bw:medium}' in the
	 * label are preserved — the JS resolver substitutes them at render time.
	 */
	public static function parse_channels( $text ) {
		$rules = array();
		$rows  = self::parse_labeled_list( $text );
		foreach ( $rows as $row ) {
			$patterns = array();
			foreach ( $row['values'] as $pair ) {
				$pieces = array_map( 'trim', explode( '/', $pair, 2 ) );
				if ( count( $pieces ) !== 2 ) {
					continue;
				}
				$patterns[] = array(
					'source' => '' === $pieces[0] ? '*' : $pieces[0],
					'medium' => '' === $pieces[1] ? '*' : $pieces[1],
				);
			}
			if ( empty( $patterns ) ) {
				continue;
			}
			$rules[] = array(
				'label'    => $row['label'],
				'patterns' => $patterns,
			);
		}
		return $rules;
	}
}
