/* eslint-disable @typescript-eslint/explicit-module-boundary-types */
// TODO: Convert to TypeScript and/or rewrite
import compromise from 'compromise'
import stopword from 'stopword'

export function normalize (
  str,
  normalizer = {
    plurals: true,
    parentheses: true,
    possessives: true,
    honorifics: true
  }
) {
  // remove citations [*], and : ; {} [] '- '
  const s = str.trim()
    .replace(/\[[\d\w.]+\]/g, '')
    .replace(/(:|;|\[|\]|\{|\}|"|'|- )/g, '')

  return compromise(s).normalize(normalizer).out()
}

export function extractWords (sentence) {
  const s = sentence
    .replace(/\d+\.*\d+/g, '')
    .replace(/(\w+|\d+)(\.)/g, '$1')
    .split(' ').filter(w => w.length >= 3)
  return stopword.removeStopwords(s).map(w => w.toLowerCase())
}

export function calcWordMetrics (documents) {
  const numDocument = documents.length
  const keywords = []
  const documentTexts = documents.map(doc => normalize(doc))
  for (const document of documentTexts) {
    const words = extractWords(document)
    for (const word of words) {
      const wordIdx = keywords.map(kw => kw.word).indexOf(word)
      if (wordIdx === -1) {
        keywords.push({
          word,
          tf: 1,
          idf: Math.log10(numDocument / (1 + documentTexts.filter(doc => doc.includes(word)).length))
        })
      } else {
        keywords[wordIdx].tf += 1
      }
    }
  }
  for (const keyword of keywords) {
    keyword.tfidf = keyword.tf * keyword.idf
  }
  return keywords
}

export function calcSentenceMetrics ({
  text,
  wordMetrics = null
}) {
  const sentences = text.split('. ').filter(s => s.length > 20).map((s, i) => {
    return {
      docIdx: i,
      str: s.trim(),
      words: extractWords(normalize(s))
    }
  })
  const metrics = wordMetrics || calcWordMetrics(sentences.map(s => s.str))
  for (const sentence of sentences) {
    sentence.tfidf = 0
    const tfVals = sentence.words.map(word => sentence.words.filter(w => w === word).length)
    if (sentence.words.length) {
      sentence.tfidf = sentence.words
        .map((w, i) => {
          const wordIdx = metrics.map(w => w.word).indexOf(w)
          const idfVal = (wordIdx === -1) ? 0 : metrics[wordIdx].idf
          return tfVals[i] * idfVal
        })
        .reduce((a, b) => a + b)

      sentence.tfidf /= sentence.words.length
    }
  }
  // if (!k) {
  //   k = Math.ceil(sentences.length * 0.1)
  // }
  const result = sentences
    .filter(s => s.words.length > 5 && s.words.length < 20 && !s.str.includes('{') && !s.str.includes('}'))
    .sort((a, b) => b.tfidf - a.tfidf)
    // .slice(0, k)
    .sort((a, b) => a.docIdx - b.docIdx)

  return result
}
