import { censoredKeywords } from "./keywords/genericKeywords";
import { combinedCensorship } from "./keywords/combinationKeywords";
import { furryBannedKeywords } from "./keywords/generatorSpecificKeywords";
import { nonFurryBannedKeywords } from "./keywords/generatorSpecificKeywords";
import { replacementsKeywords } from "./keywords/replacementKeywords";
import { isSDXL } from "../generators/isSDXL";

// Master Logic
export function keywordCensorship(input: string, generator: any) {
  let prompt = input;
  if (isCensored(prompt, generator)) { //isCensored is a lightweight checker
    prompt = filterSensitivePrompt(prompt, generator) //filterSensitivePrompt is heavier in terms of regex operation, so we save on compute by only modifying prompts that failed the isCensored check
  }
  prompt = antiCooker(prompt, generator);
  prompt = replaceTerms(prompt)

  // console.log(prompt)
  return prompt
}

export function antiCooker(rawInput: any, generator: any) {
  function simplifyParentheses(input: string) {
    if (!input) return ""; // Return an empty string if the input is empty

    // Detect if there are any parentheses in the input
    const hasParentheses = /[\(\)]/.test(input);
    if (!hasParentheses) {
      // If no parentheses are present, return the input as is
      return input;
    }

    // Remove spaces next to parentheses to clean up the input
    let cleanedInput = input.replace(/\s*\(\s*/g, "(").replace(/\s*\)\s*/g, ")");

    // Use a regex to reduce nested parentheses to a single set
    cleanedInput = cleanedInput.replace(/\({2,}/g, "(").replace(/\){2,}/g, ")");

    // Process each segment individually only if they contain parentheses
    let segments = cleanedInput.split(",");
    let processedSegments = segments.map((segment) => {
      segment = segment.trim();
      // Remove unnecessary parentheses if there are more than needed
      if (segment.startsWith("(") && segment.endsWith(")")) {
        let innerContent = segment.slice(1, -1);
        // Recursively clean inner content if nested parentheses are unbalanced
        innerContent = simplifyParentheses(innerContent);
        segment = "(" + innerContent + ")";
      }
      return segment;
    });

    return processedSegments.join(', ').replace(/：/g, ':');
  }

  // Reduce more than 4 brackets (with potential spaces) to 4 brackets
  let input = simplifyParentheses(rawInput);

  if (isSDXL(generator)) {
    return input.replace(/:(\s*\d+(\.\s*\d+)?)/g, (match: any, p1: any) => {
      const number = parseFloat(p1.replace(/\s+/g, "")); // remove spaces and parse to float
      return number >= 1.4 ? ": 1.3" : match;
    });
  }

  return input.replace(/:(\s*\d+(\.\s*\d+)?)/g, (match: any, p1: any) => {
    const number = parseFloat(p1.replace(/\s+/g, "")); // remove spaces and parse to float
    return number >= 1.4 ? ": 1.3" : match;
  });
}

// todo:
// for the return, remove all special symbols outside of ()
// add the majority keyword matching rule if the word is more than 5 letters as long as letter count is c-1 it's good. for words more than 8 letters as long as letter count is c-2 it's ok
// is censored is currently censoring words containing the phrase but not the full word
// celeb / keyword by pass where people split the keyword with a comma

export function isCensored(string: string, generator: string): boolean { // Compute light censorship check
  // Step 1: Normalize the input string
  const normalizedString = string
    .replace(/[^a-zA-Z ]+/g, "") // Remove all special symbols and numbers
    .toLowerCase()
    .trim(); // Trim leading and trailing spaces

  // Step 2: Split the normalized string into words
  const words = normalizedString.split(/\s+/);

  // Step 3: Determine generator-specific keywords
  const generatorKeywords =
    process.env.NEXT_PUBLIC_BASE_URL === "https://www.createaifurry.com"
      ? furryBannedKeywords
      : nonFurryBannedKeywords;

  // Step 4: Define the keyword matching function
  const containsKeyword = (keyword: string, text: string): boolean => {
    const regex = new RegExp(`\\b${keyword}(s|es|ed|er)?\\b`, "i");
    return regex.test(text);
  };

  // --- First Pass: Original Checks on normalizedString ---

  // Check for censored keywords
  if (censoredKeywords.some((keyword) => containsKeyword(keyword, normalizedString))) return true;

  // Check for combined censorship
  if (combinedCensorship.some((group) => group.filter((keyword) => words.includes(keyword)).length >= 2)) return true;

  // Check for generator-specific censored keywords
  if (generatorKeywords.some((keyword) => containsKeyword(keyword, normalizedString))) return true;

  // --- Second Pass: Checks with Spaces Removed from Keywords Only ---

  // Function to create regex that allows spaces in the text
  const containsKeywordWithSpacesInText = (keyword: string): boolean => {
    // Remove spaces from the keyword
    const keywordNoSpaces = keyword.replace(/\s+/g, "");
    // Split keyword into individual letters
    const letters = keywordNoSpaces.split("");
    // Create regex pattern allowing any number of spaces between letters
    const pattern = letters.map((letter) => `${letter}\\s*`).join("");
    // Allow for common suffixes
    const regex = new RegExp(pattern + "(s|es|ed|er)?", "i");
    return regex.test(normalizedString);
  };

  // Check for censored keywords with spaces removed from keywords
  if (censoredKeywords.some(containsKeywordWithSpacesInText)) return true;

  // Check for generator-specific censored keywords with spaces removed from keywords
  if (generatorKeywords.some(containsKeywordWithSpacesInText)) return true;

  // Note: Combined censorship is not applied in the second pass

  // If none of the checks result in censorship, return false
  return false;
}

function filterSensitivePrompt(text: string, generator: string) {
  function deobfuscate(string: string): string {
    const replacements: { [key: string]: string } = {
      "$": "s",
      "_": " ",
      " _": " ",
      "-": " ",
      " -": " ",
      "- ": " ",
      " - ": " ",
      "|": " ",
      "，": ",",
      ",": ",",
      "、": ",",
      "𑑍": ",",
      "（": "(",
      "）": ")",
      "「": "(",
      "」": ")",
      "【": "(",
      "】": ")",
      "{": "(",
      "}": ")",
      ":": ":",
      "˸": ":",
      "׃": ":",
      '∶': ":",
      "︓": ":",
      "：": ":",
      "﹕": ":"
      // ... add more replacements as needed
    };

    return string
      .split("")
      .map((char) => replacements[char] || char)
      .join("");
  }

  let normalizedText = deobfuscate(text)
    .toLowerCase()
    .replace(/[\(\)]?(\b[a-zA-Z0-9]+\b)[\(\)]?/g, '$1') // Remove parentheses around alphanumeric words and unbalanced parentheses prevent cases like jennifer)lawrence
    .replace(/\s{2,}/g, ' ') // Replace multiple spaces with a single space
    .replace(/, (?=[^,]*[a-zA-Z])(?=[^,]*[0-9])(?=[^,]*:)(?=[^,]*\.)[^,]+,/g, (match) => { // Wrap content between commas that contains letters, numbers, colon, and period in any order this is to change gigantic:1.3 to (gigantic:1.3)
      const content = match.slice(2, -1); // Remove the leading ", " and trailing ","
      return `, (${content}),`;
    })
    .replace(/(?<!\([^)]*)[0-9!@#$%^&*_\-+=\[\]{}|\\:;"'<>\.?/~`]+(?![^()]*\))/g, '') // Remove all special symbols and numbers outside parentheses, except commas
    .replace(/([a-zA-Z])[^a-zA-Z\s,]+(?=[a-zA-Z])/g, '$1') // Remove special symbols between letters but keep commas and spaces
    .replace(/\(([^()]*?)\)/g, (match, content) => { // Process content inside parentheses
      return '(' + content
        .replace(/[^a-zA-Z\s]+(?=:)/g, '')  // Keeps only letters and spaces before the colon inside parentheses
        + ')';
    })
    .trim(); // Trim leading and trailing spaces

  const generatorKeywords = (process.env.NEXT_PUBLIC_BASE_URL === "https://www.createaifurry.com") ? furryBannedKeywords : nonFurryBannedKeywords

  // Generalized function to process keywords
  function filterKeywords(
    keywordList: string[],
    transformFn: (keyword: string) => string = (keyword) => keyword
  ): void {
    keywordList.forEach((keyword) => {
      const processedKeyword = transformFn(keyword);

      // Escape special characters in the keyword for the regex
      const escapedKeyword = processedKeyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');

      // Create a regex to match and remove the keyword with any leading or trailing digits, dots, or underscores
      const regexGeneral = new RegExp(`(?<![\\w\\d])${escapedKeyword}(?![\\w\\d])`, 'gi');
      normalizedText = normalizedText.replace(regexGeneral, '');
    });
  }

  // First pass: Run filter with original keywords (with spaces)
  filterKeywords(censoredKeywords);
  filterKeywords(generatorKeywords);

  // Second pass: Run filter with keywords where spaces are removed
  filterKeywords(censoredKeywords, (keyword: string) => keyword.replace(/\s+/g, ''));
  filterKeywords(generatorKeywords, (keyword: string) => keyword.replace(/\s+/g, ''));

  // Third pass: Run filter with keywords where spaces are replaced with ", "
  filterKeywords(censoredKeywords, (keyword: string) => keyword.replace(/\s+/g, ', '));
  filterKeywords(generatorKeywords, (keyword: string) => keyword.replace(/\s+/g, ', '));

  // Handle combined censorship for multi-word phrases
  combinedCensorship.forEach((group) => {
    // Check if all keywords in a group are present
    const groupKeywordsPresent = group.every((keyword) => normalizedText.includes(keyword));
    if (groupKeywordsPresent) {
      // Remove all keywords from this group
      group.forEach((keyword) => {
        const escapedKeyword = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
        const regex = new RegExp(`\\b${escapedKeyword}(s|es|ed|er)?\\b`, 'gi');
        normalizedText = normalizedText.replace(regex, '');
      });
    }
  });

  return normalizedText
    .replace(/\(\s*[:\d.]+\s*\)/g, '') // Remove patterns like (:1.3) or (:1.4) where no letters are present
    .replace(/(\s*,\s*)+/g, ', ') // Replace any sequence of commas and spaces with a single ", "
    .replace(/^,\s*|,\s*$/g, '') // Remove any leading or trailing commas
    .trim()
}

// Function to replace specified terms with their counterparts
function replaceTerms(inputString: string) {
  // First, check if any of the replacement keywords exist in the input
  const hasReplacementTerms = Object.keys(replacementsKeywords).some((term) =>
    inputString.includes(term),
  );

  if (hasReplacementTerms) {
    // Remove parentheses, colons, and numbers
    inputString = inputString.replace(/[\(\):]|\b\d+(\.\d+)?\b/g, "");
  }

  const regex = new RegExp(`\\b(${Object.keys(replacementsKeywords).join('|')})\\b`, 'gi');
  const alreadyReplaced = new Set(); // To keep track of words already replaced

  return inputString.replace(regex, (matched, offset, fullString) => {
    const lowerMatch = matched.toLowerCase();
    // const replacement = replacements[lowerMatch];

    // Check if the term has already been replaced anywhere in the string
    if (alreadyReplaced.has(lowerMatch)) {
      return ""; // Replace with empty string if already replaced once
    } else {
      alreadyReplaced.add(lowerMatch);
      return replacementsKeywords[lowerMatch];
    }
  });
}

// test prompt
//// jennifer)lawrence), *-+ jennifer)lawrence, (lawrence hi:1.3), (lawrence), (rape), rape, (rap:1.3), ((hithere)), thisse, ss, dog, tei, child13, (child13), (child13:1.3), jennifer(lawrence), (gigantic:1.3), hi, gigantic13, ant13, (love13:1.3), hiii, , chi.ld. (ch.ild), (ch.ild:), ch.ild,. hi,