import sys
import os
from bs4 import BeautifulSoup

BUILD_CONTEXT=os.environ.get('DOCS_BUILD_CONTEXT')

def remove_column_from_tables(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        soup = BeautifulSoup(file, 'lxml')

    empty_rows = 0
    in_context = None

    print("Checking if in valid context for", file_path)

    if BUILD_CONTEXT:
        context_tags = soup.find('meta', attrs={'name': 'docs-build-context'})
        if context_tags:
           for context_tag in context_tags.get('content').split(","):
              if context_tag == BUILD_CONTEXT:
                 in_context = True
                 print("  ... in", context_tag, "- Processing")
                 break
              else:
                 print("  ... not in", context_tag)
        
           if not in_context:
               return
        else:
            print("docs-build-context not set. Treating", BUILD_CONTEXT, "as valid.")

    # Find column to delete
    column_tag = soup.find('meta', attrs={'name': 'remove-column-from-html-table'})
    if column_tag:
       column_names = column_tag.get('content').split(",")
    else:
       print(f"No column to remove specified in '{file_path}' header")
       sys.exit(1)

    # Remove empty rows?
    row_tag = soup.find('meta', attrs={'name': 'remove-column-emptied-row'})
    if row_tag:
        if row_tag.get('content') == 1:
           empty_rows = 1

    for column_name in column_names:

       with open(file_path, 'r', encoding='utf-8') as file:
           soup = BeautifulSoup(file, 'lxml')

       print(f"Removing column '{column_name}' from '{file_path}'")
       tables = soup.find_all('table')

       for table in tables:
           headers = table.find_all('th')
           column_index = None

           for index, header in enumerate(headers):
               if header.get_text(strip=True) == column_name:
                   column_index = index
                   break

           if column_index is not None:
               # for header in headers:
               #     header.extract()

               rows = table.find_all('tr')
               for row in rows:
                   columns = row.find_all(['td', 'th'])
                   if column_index < len(columns):
                       columns[column_index].extract()


               # Clean up rows that have become empty
               if int(empty_rows) == 1:
                  for row in rows:
                      if not row.find_all(['td', 'th']):
                          row.decompose()

           with open(file_path, 'w', encoding='utf-8') as file:
               file.write(str(soup))

    if int(empty_rows) == 1:
        print("... removed rows made empty by column removal")

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Usage: python remove_column.py <html_file> <html_file> ...")
        sys.exit(0)

    for html_file in sys.argv[1:]:
       remove_column_from_tables(html_file)