#!/usr/bin/env python3
"""
Removes definitions (property, prefixed-property, media-feature, pseudo-class, pseudo-element, function, prefixed-function, named-value)
from webrefGeneratedXmls/webkit.xml if a definition with the same id exists in any other
XML file inside the same directory.

Usage:
  - Dry run (default):
      ./remove_duplicate_webkit_defs.py --file="mozilla.xml"
  - Actually modify webkit.xml (creates a .bak backup):
      ./remove_duplicate_webkit_defs.py --apply --file="mozilla.xml"

Notes:
  - Only elements with an "id" attribute are considered.
  - Only the following tag names are processed:
      property, prefixed-property, media-feature, pseudo-class, pseudo-element, function, prefixed-function, named-value
  - XML namespace urn:schemas-jetbrains-com:css-xml is supported.
"""
import argparse
import os
import sys
import shutil
import xml.etree.ElementTree as ET


TARGET_FILES_DIR_NAME='manuallySupportedData'
PROCESS_TAGS = {
    'property', 'prefixed-property', 'media-feature', 'pseudo-class', 'pseudo-element', 'function', 'prefixed-function', 'named-value'
}
# XML namespace used in these files
NS = {'css': 'urn:schemas-jetbrains-com:css-xml'}
CSS_NS = '{urn:schemas-jetbrains-com:css-xml}'

def _gather_ids_from_dir(dir_path: str, exclude_name: str, ids_by_tag: dict[str, dict[str, set[str]]]):
    if not os.path.isdir(dir_path):
        return
    for name in os.listdir(dir_path):
        if not name.endswith('.xml'):
            continue
        if name == exclude_name:
            continue
        path = os.path.join(dir_path, name)
        try:
            tree = ET.parse(path)
            root = tree.getroot()
        except Exception:
            # Skip malformed files silently
            continue
        for elem in root.iter():
            if not isinstance(elem.tag, str):
                continue
            local = elem.tag.split('}', 1)[-1] if '}' in elem.tag else elem.tag
            if local in PROCESS_TAGS:
                _id = elem.attrib.get('id')
                if _id:
                    ids_by_tag.setdefault(local, {}).setdefault(_id, set()).add(name)


def find_other_ids(search_dirs: list[str], target_basename: str) -> dict:
    """
    Build a map: local_tag_name -> dict(id -> set(file_names)) for all files except target_basename
    across all provided search directories.
    """
    ids_by_tag: dict[str, dict[str, set[str]]] = {t: {} for t in PROCESS_TAGS}
    for d in search_dirs:
        _gather_ids_from_dir(d, target_basename, ids_by_tag)
    return ids_by_tag


def remove_duplicates_from_file(base_directory: str, apply_changes: bool, target_file: str) -> int:
    target_dir = os.path.join(base_directory, TARGET_FILES_DIR_NAME)
    target_path = os.path.join(target_dir, target_file)
    if not os.path.exists(target_path):
        print(f"[ERROR] {target_path} not found", file=sys.stderr)
        return 2

    # Search duplicates across both the base directory and manuallySupportedData
    ids_by_tag = find_other_ids([base_directory, target_dir], target_file)

    try:
        tree = ET.parse(target_path)
        root = tree.getroot()
    except Exception as e:
        print(f"[ERROR] Failed to parse {target_path}: {e}", file=sys.stderr)
        return 3

    # Collect elements to remove (direct children of <definitions>)
    to_remove = []
    for elem in list(root):
        if not isinstance(elem.tag, str):
            continue
        local = elem.tag.split('}', 1)[-1] if '}' in elem.tag else elem.tag
        elem_id = elem.attrib.get('id')
        if local in PROCESS_TAGS and elem_id and elem_id in ids_by_tag.get(local, set()):
            to_remove.append(elem)

    if not to_remove:
        print('[INFO] No duplicate definitions found; no changes needed.')
        return 0

    print(f"[INFO] Found {len(to_remove)} duplicate definition(s) to remove from {target_file}:")
    for e in to_remove:
        local = e.tag.split('}', 1)[-1] if '}' in e.tag else e.tag
        eid = e.attrib.get('id')
        files = sorted(ids_by_tag.get(local, {}).get(eid, []))
        where = ", ".join(files) if files else "<unknown file>"
        print(f"  - <{local} id=\"{eid}\"/>  (also in: {where})")

    if not apply_changes:
        print('[INFO] Dry run mode. Re-run with --apply to modify the file.')
        return 0

    # Backup first
    backup_path = target_path + '.bak'
    shutil.copyfile(target_path, backup_path)
    print(f"[INFO] Backup created at {backup_path}")

    # Remove elements and write back
    for e in to_remove:
        root.remove(e)

    # Preserve namespace declaration
    ET.register_namespace('', NS['css'])
    tree.write(target_path, encoding='utf-8', xml_declaration=True)
    print(f"[INFO] Updated {target_path}")
    return 0


def main():
    parser = argparse.ArgumentParser(description='Remove duplicate definitions from an XML file in this directory')
    parser.add_argument('--apply', action='store_true', help='Apply changes (otherwise dry run)')
    parser.add_argument('--file', '-f', dest='file', default='webkit.xml', help='Target XML file name (default: webkit.xml)')
    args = parser.parse_args()

    # Use the parent directory of this script's directory
    directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    target = os.path.basename(args.file)
    rc = remove_duplicates_from_file(directory, args.apply, target)
    sys.exit(rc)


if __name__ == '__main__':
    main()
