import { sortBy, uniqBy } from "lodash";

// hardcoded for now; may move to config in future
const TOTAL_ROW_KEYWORDS = ["total", "subtotal", "計"];

const canonicalString = str => str.normalize().toLocaleLowerCase();

/**
 * @typedef RowWithIndex
 * @type {Object}
 * @property {*} row Row returned matching some query
 * @property {number} rowIndex Index referring to the row in the sheet's data
 */

/**
 * Returns any rows matching the predicate
 * @param {*} sheetData
 * @param {function} rowPredicate (row, index) Function to execute for each row of data in SheetData. Return truthy to return the element. Falsy otherwise.
 * @returns {Array<RowWithIndex>}
 */
const findRowsMatching = (sheetData, rowPredicate) => {
  const rowsWithIndexes = sheetData.data.map((row, rowIndex) => ({
    row,
    rowIndex
  }));
  return rowsWithIndexes.filter(({ row, rowIndex }) =>
    rowPredicate(row, rowIndex)
  );
};

/**
 * Blank rows are those where all cells are empty.
 * @param {*} sheetData
 * @returns {Array<RowWithIndex>}
 */
const findBlankRows = sheetData => {
  const blankRowPredicate = row => row.every(cell => !cell || cell === "");
  return findRowsMatching(sheetData, blankRowPredicate);
};

/**
 * Header rows are those where the leftmost-cell has a value but all other cells are empty
 * @param {*} sheetData
 * @returns {Array<RowWithIndex>}
 */
const findHeaderRows = sheetData => {
  const isCellEmpty = cell => !cell || cell === "";
  const headerRowPredicate = row => {
    const firstCell = row?.[0];
    if (isCellEmpty(firstCell)) {
      return false;
    }
    return row.slice(1).every(isCellEmpty);
  };
  return findRowsMatching(sheetData, headerRowPredicate);
};

/**
 * Boundary Matches words in list against cells in rows and returns rows which have any matches.
 *
 * Matching strategy for each word in the list is dependant on the Character Script of ALL the characters.
 * - For Latin characters, we split by spaces or punctuation and then matching done on each 'word' ignoring cases.
 *    (e.g: "this, total!", "this, Total!", "this, totalality!"; 'total' matches the first two strings)
 * - For others, unicode matching is done against normalize()'d characters. Matching strategy can be tweak via options.unicodeMatchStrategy
 * @param {*} sheetData
 * @param {*} wordList List of words to match against using boundary matching
 * @param {object} options Options for tweak matching strategies
 * @param {string} [options.unicodeMatchStrategy='end'] How unicode characters should be matched.
 * - "end": Unicode characters match at end of string
 * @returns {Array<RowWithIndex>}
 */
const findRowsWithCellsContainingAnyWord = (
  sheetData,
  wordList = [],
  { unicodeMatchStrategy = "end" } = {}
) => {
  if (!wordList?.length) {
    return [];
  }

  const latinMatcher = needle => {
    const normalised = needle.toLocaleLowerCase();
    return haystack =>
      haystack
        .toLocaleLowerCase()
        .split(/(\s|\p{P})/u)
        .some(w => w.localeCompare(normalised) === 0);
  };
  const unicodeMatcher = needle => {
    const mustMatchEnd = unicodeMatchStrategy === "end" ? "$" : "";
    const regex = new RegExp(`${canonicalString(needle)}${mustMatchEnd}`, "iu");
    return haystack => regex.test(canonicalString(haystack));
  };

  const isLatinRegex = /^\p{sc=Latn}+$/iu;
  const matchers = wordList.map(w => {
    const isLatin = isLatinRegex.test(w);
    if (isLatin) {
      return latinMatcher(w);
    }
    return unicodeMatcher(w);
  });

  const cellContainingAnyWordPredicate = row =>
    row.some(cell => {
      const cellValue = (cell ?? "").toString();
      return matchers.some(hasMatch => hasMatch(cellValue));
    });

  return findRowsMatching(sheetData, cellContainingAnyWordPredicate);
};

const isRowInListPredicate = list => {
  if (!list?.length) {
    return () => false;
  }

  const calculateRowHash = rowOfCellData => {
    return rowOfCellData
      .map(cellValue => (cellValue ?? "").toString())
      .map(cellValue => canonicalString(cellValue))
      .join("_");
  };

  const hashedList = new Set(list.map(calculateRowHash));

  return row => {
    const hashedRow = calculateRowHash(row);
    return hashedList.has(hashedRow);
  };
};

const findRowsMatchingDataInList = (sheetData, list) => {
  if (!list?.length) {
    return [];
  }
  if (sheetData.data[0].length !== list[0].length) {
    return [];
  }

  const predicate = isRowInListPredicate(list);
  return findRowsMatching(sheetData, predicate);
};

const suggestUnnecessaryRows = ({
  sheetData,
  inclusionList = [],
  exclusionList = []
}) => {
  const rowsToSelect = [
    ...findBlankRows(sheetData),
    ...findHeaderRows(sheetData),
    ...findRowsWithCellsContainingAnyWord(sheetData, TOTAL_ROW_KEYWORDS),
    ...findRowsMatchingDataInList(sheetData, inclusionList)
  ];

  const uniqueRows = uniqBy(rowsToSelect, "rowIndex");

  const rowsMinusExclusions = uniqueRows.filter(({ row }) => {
    const predicate = isRowInListPredicate(exclusionList);
    return !predicate(row);
  });

  return sortBy(rowsMinusExclusions, "rowIndex");
};

export default {
  suggest: suggestUnnecessaryRows,
  forTest: {
    findBlankRows,
    findHeaderRows,
    findRowsWithCellsContainingAnyWord,
    findRowsMatchingDataInList
  }
};
