import { stringHash } from '@sancare/ui-frontend-commons/src/misc/string-utils'
import asciiFoldingFilter from '@sancare/ui-frontend-commons/src/text-processing/asciiFoldingFilter'
import stopwords from '@sancare/ui-frontend-commons/src/text-processing/frenchStopwords'
import { findWordInChunks } from '@sancare/ui-frontend-commons/src/text-processing/textQueryHandlers'
import { processSearchKeyword } from '@sancare/ui-frontend-commons/src/text-processing/textQueryHandlers'
import _ from 'lodash'
import lodashFp from 'lodash/fp'

import { mergeIntervals } from '@/store/modules/document-entity-explorer/utils'

const wordRegex = /[A-Za-zÀ-ÖØ-öø-ÿ0-9+\-]+|[\u2713\u2714\u2715\u2716\u2717\u2718\u25A0\u25A1\u2610\u2611\u2612]+/g

// Returns an array which contains all pairs of hint limits for one document (report or textualHealthEntry)
const getHintsLimits = (sourceType, sourceId, hints) => {
  const limits = []
  _.forEach(hints, (hint) => {
    if (sourceType === hint.targetType && sourceId === hint.targetId && hint.hintStart < hint.hintEnd) {
      limits.push([hint.hintStart, hint.hintEnd])
    }
  })

  return limits
}

const getSoftAbsentLimits = (parsedReportWords, softAbsentSearches) => {
  const softAbsentLimits = []

  parsedReportWords.forEach((word, wordIdx) => {
    // check for the softAbsentContent (can only be exact word patterns)
    _.forEach(softAbsentSearches, (softAbsentSearch) => {
      // as for now, softAbsent searches contain only one exact pattern, so this forEach() is just for show
      _.forEach(softAbsentSearch.exactPatterns, (patternWords) => {
        if (wordIdx <= parsedReportWords.length - patternWords.length && patternWords[0] === word.exactWord) {
          const isExpression = _.every(patternWords, (patternWord, patternWordIdx) => {
            return patternWord === parsedReportWords[wordIdx + patternWordIdx].exactWord
          })
          if (isExpression) {
            softAbsentLimits.push([word.start, parsedReportWords[wordIdx + patternWords.length - 1].end])
          }
        }
      })
    })
  })

  return softAbsentLimits
}

// Returns a list of every word found in a report's content
// attached with valuable information for getSavedSearchLimits()
const parseContentWords = (sourceContent, existingIndices) => {
  existingIndices = existingIndices || {}

  const wordList = []
  let match
  const negationChunks = _.map(existingIndices.negation || [], (indexSet) => [indexSet.startIndex, indexSet.endIndex])
  const doubtChunks = _.map(existingIndices.doubt || [], (indexSet) => [indexSet.startIndex, indexSet.endIndex])
  const antecedentChunks = _.map(existingIndices.antecedent || [], (indexSet) => [indexSet.startIndex, indexSet.endIndex])
  const familyAntecedentChunks = _.map(existingIndices.familyAntecedent || [], (indexSet) => [indexSet.startIndex, indexSet.endIndex])

  const negationIndices = _.orderBy(negationChunks, (lim) => lim[0])
  const doubtIndices = _.orderBy(doubtChunks, (lim) => lim[0])
  const antecedentIndices = _.orderBy(antecedentChunks, (lim) => lim[0])
  const familyAntecedentIndices = _.orderBy(familyAntecedentChunks, (lim) => lim[0])
  while ((match = wordRegex.exec(sourceContent)) !== null) {
    const word = match[0]
    wordList.push({
      position: wordList.length,
      content: word,
      // keep it sync with backend ES config analyzer.filter
      stemmedWord: asciiFoldingFilter(word.toLowerCase().stem()),
      exactWord: asciiFoldingFilter(word.toLowerCase()),
      start: match.index,
      end: match.index + word.length,
      negation: findWordInChunks(negationIndices, match.index, match.index + word.length),
      doubt: findWordInChunks(doubtIndices, match.index, match.index + word.length),
      antecedent: findWordInChunks(antecedentIndices, match.index, match.index + word.length),
      familyAntecedent: findWordInChunks(familyAntecedentIndices, match.index, match.index + word.length),
    })
  }

  return wordList
}

// Returns an object which contains a word's match indices (match, negation and antecedent)
// and a negation boolean (soon to be removed) for JustificationSummary.vue
const getParsedWordLimits = (parsedReportWord, matchIndices) => {
  return {
    textIndices: matchIndices,
    isNegative: parsedReportWord.negation ? true : false,
    isDoubt: parsedReportWord.doubt ? true : false,
  }
}

// see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-span-near-query.html
//   distance (slop) is the maximum number of intervening unmatched positions
function checkWordDistance(foundWords, search) {
  const wordDistance = search.modifiers.wordDistance
  const numberOfSearchWords = search.originalKeyword.split(' ').filter((keyword) => keyword.trim() !== '').length

  const combinations = []
  for (const word of foundWords) {
    for (const existingCombination of combinations) {
      if (word.position - existingCombination[existingCombination.length - 1].position <= (wordDistance + 1)) {
        existingCombination.push(word)
      }
    }
    combinations.push([word])
  }

  return combinations.filter((combination) => combination.length >= numberOfSearchWords)
}

// Returns an object which contains all hint indices
// for one document for one keyword
const getKeywordLimits = (parsedReportWords, keywordSearch, softAbsentSearches) => {
  const limits = []
  const softAbsentLimits = getSoftAbsentLimits(parsedReportWords, softAbsentSearches)

  if (!parsedReportWords || !parsedReportWords.length) {
    return limits
  }

  // We only keep the keywordSearch where all arguments match:
  //  * all exactPattern are at least once in the content
  //  * all stems have at least one match
  //  * all prefixes have at least one match
  //  * all orderedStems are at least once in the content
  let foundWords = []
  _.forEach(parsedReportWords, (word, wordIdx) => {
    // don't return hint that was found in antecedents, unless the search includes them
    if ((word.antecedent && !keywordSearch.modifiers.antecedent) || (word.familyAntecedent && !keywordSearch.modifiers.familyAntecedent)) {
      return
    }

    // don't return hint that was found in softAbsentContent
    if (findWordInChunks(softAbsentLimits, word.start, word.end) !== undefined) {
      return
    }

    // check for the exactPatterns
    _.forEach(keywordSearch.exactPatterns, (patternWords) => {
      if (wordIdx <= parsedReportWords.length - patternWords.length && patternWords[0] === word.exactWord) {
        if (_.every(patternWords, (patternWord, patternWordIdx) => patternWord === parsedReportWords[wordIdx + patternWordIdx].exactWord)) {
          for (let i = 0; i < patternWords.length; ++i) {
            foundWords.push(parsedReportWords[wordIdx + i])
          }
        }
      }
    })

    // check for the prefixes
    _.forEach(keywordSearch.prefixes, (prefix) => {
      if (word.exactWord.startsWith(prefix)) {
        foundWords.push(word)
      }
    })

    // stopwords can't be found in stems, so next steps can be skipped
    if (stopwords.includes(word.exactWord)) {
      return
    }

    // check for the orderedStems
    _.forEach(keywordSearch.orderedStems, (stemList) => {
      if (wordIdx < parsedReportWords.length - stemList.length && stemList[0] === word.stemmedWord) {
        if (_.every(stemList, (stem, stemIdx) => stem === asciiFoldingFilter(parsedReportWords[wordIdx + stemIdx].content.stem()))) {
          foundWords.push(word)
        }
      }
    })

    // check for the stems
    _.forEach(keywordSearch.stems, (stem) => {
      if (stem === word.stemmedWord) {
        foundWords.push(word)
      }
    })
  })

  // If nothing was found, we stop here and go to the next keyword
  if (!foundWords.length) {
    return limits
  }

  foundWords = _.orderBy(_.uniqBy(foundWords, (w) => w.position), (w) => w.position, 'asc')

  // Check word distance
  let foundWordLists = []
  if (!keywordSearch.modifiers || keywordSearch.modifiers.wordDistance === null) {
    foundWordLists.push(foundWords)
  } else {
    foundWordLists = checkWordDistance(foundWords, keywordSearch)
  }

  // filter foundWordsList to keep only list where all words were found
  foundWordLists = _.filter(foundWordLists, (wordList) => {
    return _.every(keywordSearch.exactPatterns, (pattern) => _.every(pattern, (searchWord) => _.find(wordList, (w) => w.exactWord === searchWord)))
      && _.every(keywordSearch.prefixes, (prefix) => _.find(wordList, (w) => w.exactWord.startsWith(prefix)))
      && _.every(keywordSearch.orderedStems, (stem) => _.every(stem, (searchWord) => _.find(wordList, (w) => w.stemmedWord === searchWord)))
      && _.every(keywordSearch.stems, (searchWord) => _.find(wordList, (w) => w.stemmedWord === searchWord))
  })

  // Prepare chunks
  _.forEach(foundWordLists, (wordList) => {
    let lastFirstWord = null
    let lastEndWord = null

    _.forEach(wordList, (w) => {
      if (!lastFirstWord) {
        lastFirstWord = w
        lastEndWord = w
      } else if (lastEndWord.position !== (w.position - 1)) {
        limits.push(getParsedWordLimits(lastFirstWord, [lastFirstWord.start, lastEndWord.end]))
        lastFirstWord = w
        lastEndWord = w
      } else {
        lastEndWord = w
      }
    })
    limits.push(getParsedWordLimits(lastFirstWord, [lastFirstWord.start, lastEndWord.end]))
  })
  return _.uniqBy(limits, (lim) => `${lim.textIndices[0]}-${lim.textIndices[1]}`)
}

function isMatching(limits, presentKeywordSearch) {
  return limits.some((limit) => {
    if (limit.isDoubt) {
      return false
    }
    if (limit.isNegative) {
      return presentKeywordSearch.modifiers.negation
    }
    return true
  })
}

// Returns an object which contains all hint indices
// for one patient's documents, for one keyword
const getKeywordConditionMatches = (keyword, parsedContent) => {
  const matches = {
    condition: keyword,
    stays: {}
  }
  const presentKeywordSearch = processSearchKeyword(keyword.value, true)

  let softAbsentSearches = []
  if (keyword.softAbsentContents.length) {
    softAbsentSearches = _.map(keyword.softAbsentContents, function(keyword) {
      return processSearchKeyword(`"${keyword.replace('"', '')}"`, true)
    })
  }

  _.forEach(parsedContent.stays, (stay) => {
    matches.stays[stay.id] = {
      id: stay.id,
      stayStart: stay.stayStart,
      stayEnd: stay.stayEnd,
      stayDuration: stay.stayDuration,
      reports: {},
      textualHealthEntries: {},
      categoricalLabResults: {},
    }
    _.forEach(stay.reports, (report) => {
      if (!keyword.documentType || report.documentTemplate === keyword.documentType) {
        const limits = getKeywordLimits(report.parsedWords, presentKeywordSearch, softAbsentSearches)
        matches.stays[stay.id].reports[report.id] = {
          id: report.id,
          title: report.documentTitle,
          limits,
          text: report.content
        }
        if (isMatching(limits, presentKeywordSearch)) {
          keyword.matching = true
        }
      }
    })

    _.forEach(stay.textualHealthEntries, (healthEntry) => {
      if (!keyword.documentType || healthEntry.type === keyword.documentType) {
        const limits = getKeywordLimits(healthEntry.parsedWords, presentKeywordSearch, softAbsentSearches)
        matches.stays[stay.id].textualHealthEntries[healthEntry.id] = {
          id: healthEntry.id,
          limits,
          text: healthEntry.content
        }
        if (isMatching(limits, presentKeywordSearch)) {
          keyword.matching = true
        }
      }
    })
  })

  return matches
}

/**
 * caution: only simple cases are supported
 *   '*search' // suffix
 *   'search*' // prefix
 *   'sea*rch' // simple wildcard with not escaped chars
 *
 * @param {string|null} toBeSearchedText
 */
const wildCardMatch = (conditionText, toBeSearchedText) => {
  if (!toBeSearchedText) {
    return false
  }
  const wildCardPosition = conditionText.indexOf('*')
  if (wildCardPosition === -1) {
    return String(conditionText) === toBeSearchedText
  }

  const rawConditionText = conditionText.replace('*', '')
  // suffix
  if (wildCardPosition === 0) {
    return toBeSearchedText.length - rawConditionText.length === toBeSearchedText.indexOf(rawConditionText)
  }
  // prefix
  if (rawConditionText.length === wildCardPosition) {
    return toBeSearchedText.indexOf(rawConditionText) === 0
  }
  // wildcard
  const regex = new RegExp(`^${conditionText.replace('*', '.*')}$`)
  return regex.test(toBeSearchedText)
}

// Returns an object which contains all hint indices
// for one patient's documents, for one drug condition
const getDrugConditionMatches = (drugCondition, parsedContent) => {
  const matches = {
    condition: drugCondition,
    stays: {}
  }
  _.forEach(parsedContent.stays, (stay) => {
    matches.stays[stay.id] = {
      id: stay.id,
      stayStart: stay.stayStart,
      stayEnd: stay.stayEnd,
      stayDuration: stay.stayDuration,
      drugEvents: {},
    }
    _.forEach(stay.drugEvents, (drugEvent) => {
      if (wildCardMatch(drugCondition.value, String(drugEvent.drug.shortUcd ?? '')) || wildCardMatch(drugCondition.value, String(drugEvent.drug.atc ?? ''))) {
        matches.stays[stay.id].drugEvents[drugEvent.id] = drugEvent
        drugCondition.matching = true
      }
    })
  })

  return matches
}

const splitText = (reportContent, limits) => {
  // Split the report content into a list of Maps containing a string and its indexes in the original text
  // Each string is referred to as a "Chunk"
  //
  // Note that we slice(0, -1) the list of limits, as the last limit is the end of the report
  return _.map(
    limits.slice(0, -1),
    (lim, idx) => {
      if (idx === limits.length - 1) {
        const content = reportContent.substring(lim)
        return {
          string: content,
          chunkStart: lim,
          chunkEnd: reportContent.length,
          hash: stringHash(`${lim}___${content}`),
        }
      }
      const content = reportContent.substring(lim, limits[idx + 1])
      return {
        string: content,
        chunkStart: lim,
        chunkEnd: limits[idx + 1],
        hash: stringHash(`${lim}___${content}`),
      }
    }
  )
}

/**
 * @param {KeywordMatches} keywordsMatches
 */
const computeSingleSourceChunks = (sourceType, sourceItem, keywordsMatches, stayId) => {
  if (!sourceItem?.content) return []

  const limits = _.flow([
    _.flattenDeep,                                      // [[s1, e1], [s2, e2], [s3, e3], []] => [s1, e1, s2, e2, s3, e3]
    _.compact,                                          // remove falsey values (empty arrays, and zeros)
    lodashFp.concat([0, sourceItem.content.length]),    // add start and end of the text
    _.uniq,                                             // remove duplicates
  ])(_.map(keywordsMatches.filter((matches) => matches.stays[stayId] && matches.stays[stayId][sourceType][sourceItem.id]), (matches) => {
    return _.map(matches.stays[stayId][sourceType][sourceItem.id].limits, (lims) => lims.textIndices)
  }))

  limits.sort((a, b) => a - b)                         // sort limits to build content in the right order

  const chunks = splitText(sourceItem.content, limits)

  _.forEach(chunks, (chunk) => {
    chunk.isHighlighted = false

    chunk.isNegative = false
    chunk.isDoubt = false
    chunk.studyNodeIds = []
    _.forIn(keywordsMatches.filter((matches) => matches.stays[stayId] && matches.stays[stayId][sourceType][sourceItem.id]), (matches) => {
      const match = _.find(matches.stays[stayId][sourceType][sourceItem.id].limits, (match) => {
        return chunk.chunkStart >= match.textIndices[0]
          && chunk.chunkEnd <= match.textIndices[1]
      })

      if (match) {
        // searchId is converted to a number if possible, as forIn casts it to be a string
        chunk.studyNodeIds = _.uniq(_.concat(chunk.studyNodeIds, matches.condition.studyNodeIds))
        chunk.isHighlighted = true
        chunk.isNegative = chunk.isNegative || match.isNegative
        chunk.isDoubt = chunk.isDoubt || match.isDoubt
        // Why is the search match tell source chunk is negative???
        // it should be the source chunk that tell search the match is negative !
      }
    })
  })

  return chunks
}

const createChunk = (content, start, end, isHighlighted = false, annotationValues = []) => ({
  string: content.slice(start, end),
  chunkStart: start,
  chunkEnd: end,
  hash: stringHash(`${start}___${content.slice(start, end)}`),
  isHighlighted,
  annotationValues,
})

const computeSingleSourceChunksFromMlHints = (sourceItem, mlHints) => {
  if (!sourceItem?.content) return []

  const mergedIntervals = mergeIntervals(mlHints.filter((hint) => !hint.removed))

  const chunks = []
  let currentIdx = 0

  // Create chunks based on merged intervals
  mergedIntervals.forEach((interval) => {
    if (currentIdx < interval.startIdx) {
      chunks.push(createChunk(sourceItem.content, currentIdx, interval.startIdx))
    }
    chunks.push(createChunk(sourceItem.content, interval.startIdx, interval.endIdx, true, interval.annotationValues))
    currentIdx = interval.endIdx
  })

  if (currentIdx < sourceItem.content.length) {
    chunks.push(createChunk(sourceItem.content, currentIdx, sourceItem.content.length))
  }
  return chunks
}


/**
 * @param {StudyNodeMatches} studyNodesMatches
 */
const computeMultipleSearchesChunks = (stay, searchIds, studyNodesMatches) => {
  const chunks = {
    reports: {},
    textualHealthEntries: {},
  }

  _.forEach(['reports', 'textualHealthEntries'], (dataType) => {
    _.forEach(stay[dataType], (item) => {
      if (item && item.content) {

        const limits = _.flow([
          lodashFp.pickBy((ssm, searchId) => searchIds.indexOf(isNaN(Number(searchId)) ? searchId : Number(searchId)) !== -1),
          lodashFp.map((ssm) => _.map(ssm[dataType][item.id], (lims) => lims.textIndices)),
          _.flattenDeep,  // See above for the operations done in this part, it's pretty much the same from here
          _.compact,
          _.uniq,
        ])(studyNodesMatches.keywords)

        limits.sort((a, b) => a - b)

        // Same as above, we don't keep items that do not contain any justification
        if (!limits || !limits.length) {
          chunks[dataType][item.id] = null
          return
        }

        chunks[dataType][item.id] = splitText(
          item.content,
          _.uniq(_.concat(limits, [0, item.content.length])).sort((a, b) => a - b)
        )

        // Same as above, we only need to know whether a chunk is highlighted or not
        _.forEach(chunks[dataType][item.id], (chunk) => {
          chunk.isNegative = false
          chunk.isDoubt = false
          chunk.isHighlighted = false
          _.forEach(studyNodesMatches.keywords, (singleSavedSearchMatches) => {
            _.forEach(singleSavedSearchMatches[dataType][item.id], (limits) => {
              if (chunk.chunkStart >= limits.textIndices[0] && chunk.chunkEnd <= limits.textIndices[1]) {
                chunk.isHighlighted = true
                chunk.isNegative = chunk.isNegative || limits.isNegative
                chunk.isDoubt = chunk.isDoubt || limits.isDoubt
              }
            })
          })
        })
      }
    })
  })

  chunks['categoricalLabResults'] = {}
  _.forEach(stay['categoricalLabResults'], (item) => {
    chunks['categoricalLabResults'][item.id] = _.some(_.map(searchIds, (id) => studyNodesMatches.keywords[id]['categoricalLabResults'] && studyNodesMatches.keywords[id]['categoricalLabResults'][item.id]))
  })

  return chunks
}

const computeSingleSearchChunks = (stay, searchId, studyNodesMatches) => {
  return computeMultipleSearchesChunks(stay, [searchId], studyNodesMatches)
}

const getSimpleSourceMatchOrigins = (sourceType, sourceItem, studyNodesMatches, stayId) => {
  const matchOrigins = {
    studyNodeIds: [],
  }

  if (sourceType === 'categoricalLabResults') {
    _.forIn(studyNodesMatches.keywords.filter((matches) => matches.stays[stayId] && matches.stays[stayId][sourceType][sourceItem.id]), (matches) => {
      if (matches.stays[stayId][sourceType][sourceItem.id]) {
        matchOrigins.studyNodeIds = matchOrigins.studyNodeIds.concat(matches.condition.studyNodeIds)
      }
    })
  }

  if (sourceType === 'drugEvents') {
    _.forIn(studyNodesMatches.drugs.filter((matches) => matches.stays[stayId] && matches.stays[stayId][sourceType][sourceItem.id]), (matches) => {
      matchOrigins.studyNodeIds = matchOrigins.studyNodeIds.concat(matches.condition.studyNodeIds)
    })
  }

  return matchOrigins
}

const getIntersectionSubsets = (searchIds) => {
  const subsets = []
  const n = searchIds.length
  // There are 2^n subsets. So let's directly iterate on 1..2^n
  for (let i = 0; i < (1 << n); i++) {
    const currentSubset = []
    // We iterate over all the searchIds
    for (let j = 0; j < n; j++) {
      // (1<<j) is a number with its j-th bit 1
      // So, if we 'and' it with the subset number i,
      // we get which elements are present or not.
      if ((i & (1 << j)) > 0) {
        currentSubset.push(searchIds[j])
      }
    }
    // We only keep subsets of size greater or equal than 2
    if (currentSubset.length > 1) {
      subsets.push(currentSubset)
    }
  }

  // Note that each subset keep the order of searchIds
  return subsets
}

const computeSearchIntersectionChunks = (stay, staySearches, savedSearchMatches) => {
  const searchDiagnoses = {}
  _.forEach(staySearches, (staySavedSearch) => {
    searchDiagnoses[staySavedSearch.search.id] = _.flow([
      lodashFp.filter((field) => field.type === 'revaloDiagnosis' || field.type === 'revaloDP' || field.type === 'revaloDAS'),
      lodashFp.map((field) => field.value),
      _.flatten,
      _.uniq,
    ])(staySavedSearch.search.fields)
  })

  // We search for searches intersecting each other.
  // As soon as there is at least one pattern in common, we will calculate the chunks of (S1 ∩ S2)
  const similarSearchesGroups = []
  _.forEach(searchDiagnoses, (outerDiagnosisList, outerSearchId) => {
    _.forEach(outerDiagnosisList, (outerDiagnosis) => {
      const outerDiagnosisReference = outerDiagnosis.replace('+', '\\+').replace('*', '.+')
      const diagMatchingSearches = []
      _.forEach(searchDiagnoses, (innerDiagnosisList, innerSearchId) => {
        if (innerSearchId !== outerSearchId && _.some(innerDiagnosisList, (innerDiagnosis) => innerDiagnosis.match(`^${outerDiagnosisReference}$`))) {
          diagMatchingSearches.push(Number(innerSearchId))
        }
      })


      // The group is sorted, in order to make groups comparisions easier
      const searchGroup = [Number(outerSearchId), ..._.uniq(diagMatchingSearches)].sort((a, b) => a - b)

      if (searchGroup.length <= 1) {
        return
      }

      // Now we have a set of searchIds in diagMatchingSearch.
      // We calculate all subsets that contain at least 2 searches
      const combinations = getIntersectionSubsets(searchGroup)

      // Finally, we add the combinations into similarSearchesGroups (if they are not already present)
      _.forEach(combinations, (combination) => {
        if (_.every(similarSearchesGroups, (searchGroup) => !_.isEqual(searchGroup, combination))) {
          similarSearchesGroups.push(combination)
        }
      })
    })
  })

  // Now that we have the similar search groups, we calculate the chunks associated with every intersection
  return _.map(similarSearchesGroups, (searchGroup) => ({
    searchIds: searchGroup,
    chunks: computeMultipleSearchesChunks(stay, searchGroup, savedSearchMatches),
  }))
}

export {
  parseContentWords,
  computeSingleSearchChunks,
  computeSingleSourceChunks,
  computeSingleSourceChunksFromMlHints,
  getSimpleSourceMatchOrigins,
  computeSearchIntersectionChunks,
  getHintsLimits,
  splitText,
  getKeywordConditionMatches,
  getDrugConditionMatches,
  getKeywordLimits,
  wildCardMatch,
}
