import type { ArticleSummary } from '../store/modules/article-list';
import type { SimpleVNodeList } from './marked/marked';
import { markedFull, parseHTML } from './marked/marked';
import type { BlockRenderer } from './marked/Renderer';

interface WordData {
  words: { word: string; weight: number }[];
  bigraphs: { first: string; second: string; weight: number }[];
  lastWord?: string;
  currentWord?: string;
  closed: boolean;
}

const inlineTags = [
  'a',
  'abbr',
  'b',
  'bdi',
  'bdo',
  'cite',
  'code',
  'em',
  'i',
  'mark',
  'q',
  's',
  'small',
  'span',
  'strong',
  'sub',
  'sup',
  'time',
  'u',
  'var',
];

abstract class LineBreakReader<T> implements BlockRenderer<T> {
  setOptions(): void {}
  code(code: string): T {
    return this.close(this.ofText(code));
  }

  blockquote(quote: T): T {
    return this.close(quote);
  }

  html(html: string, long: boolean): T {
    if (!long) {
      return this.start();
    }
    return this.datifyVisibleContents(parseHTML(html));
  }

  datifyVisibleContents(arg: SimpleVNodeList): T {
    let content = this.start();
    for (const element of arg) {
      if (typeof element === 'string') {
        content = this.cat(content, this.ofText(element));
        continue;
      }
      if (inlineTags.includes(element.tag.toLowerCase())) {
        content = this.close(content);
        content = this.cat(
          content,
          this.datifyVisibleContents(element.children)
        );
        content = this.close(content);
        continue;
      }
      content = this.cat(content, this.datifyVisibleContents(element.children));
    }
    return content;
  }

  heading(text: T): T {
    return this.close(text);
  }

  list(body: T): T {
    return body;
  }

  listitem(text: T): T {
    return this.close(text);
  }

  paragraph(text: T): T {
    return this.close(text);
  }

  tablecell(text: T): T {
    return this.close(text);
  }

  tablerow(text: T): T {
    return text;
  }

  table(header: T, body?: T | undefined): T {
    if (body) {
      return this.cat(header, body);
    }
    return header;
  }

  hr(): T {
    return this.close(this.start());
  }

  checkbox(): T {
    return this.start();
  }

  styleNextElement(): void {}

  strong(text: T): T {
    return text;
  }

  sub(text: T): T {
    return text;
  }

  sup(text: T): T {
    return text;
  }

  marked(text: T): T {
    return text;
  }

  em(text: T): T {
    return text;
  }

  codespan(text: T): T {
    return text;
  }

  del(text: T): T {
    return text;
  }

  text(text: T): T {
    return text;
  }

  link(_href: string, _title: string | null, text: T): T {
    return text;
  }

  image(): T {
    return this.start();
  }

  br(): T {
    return this.close(this.start());
  }

  abstract start(): T;
  abstract close(type: T): T;
  abstract ofText(text: string): T;
  abstract cat(a: T, b: T): T;
}

class WordCountRender extends LineBreakReader<WordData> {
  static close(type: WordData): WordData {
    type.currentWord = undefined;
    type.lastWord = undefined;
    type.closed = true;
    return type;
  }

  close(type: WordData): WordData {
    type.currentWord = undefined;
    type.lastWord = undefined;
    type.closed = true;
    return type;
  }

  setOptions(): void {
    // noop
  }

  static scale(text: WordData, weight: number) {
    for (const word of text.words) {
      word.weight *= weight;
    }
    for (const bigraph of text.bigraphs) {
      bigraph.weight *= weight;
    }
    return text;
  }

  heading(text: WordData, level?: number): WordData {
    // words in headings get somewhat weighted;
    level = level || 1;
    const weight = level < 2 ? 6 : level < 3 ? 4 : 2;
    text = WordCountRender.scale(text, weight);
    return super.heading(text);
  }

  ofText(text: string): WordData {
    return WordCountRender.ofText(text);
  }

  static ofText(text: string): WordData {
    const wordData: WordData = {
      words: [],
      bigraphs: [],
      closed: false,
    };
    const words = text
      .replace(/[^a-zA-Z0-9-]/g, ' ')
      .split(/\s+/g)
      .map(v => v.toLowerCase());
    for (const word of words) {
      if (!word) {
        wordData.currentWord = undefined;
        continue;
      }
      wordData.words.push({
        word,
        weight: 1,
      });
      if (wordData.lastWord) {
        wordData.bigraphs.push({
          first: wordData.lastWord,
          second: word,
          weight: 1,
        });
      }
      wordData.lastWord = word;
      wordData.currentWord = word;
    }
    return wordData;
  }

  static start(): WordData {
    return {
      words: [],
      bigraphs: [],
      closed: false,
    };
  }

  start(): WordData {
    return WordCountRender.start();
  }

  cat(a: WordData, b: WordData): WordData {
    return WordCountRender.cat(a, b);
  }

  static cat(a: WordData, b: WordData): WordData {
    if (b.closed || a.closed) {
      a.currentWord = undefined;
      b.currentWord = undefined;
    }
    if (!b.words.length) {
      return a;
    }
    if (a.currentWord) {
      const wordwas = b.words[0].word;
      b.words[0].word = a.currentWord + b.words[0].word;
      if (b.bigraphs.length && b.bigraphs[0].first == wordwas) {
        b.bigraphs[0].first = b.words[0].word;
      }
      a.words.pop();
      if (
        a.bigraphs.length &&
        a.bigraphs[a.bigraphs.length - 1].second == a.currentWord
      ) {
        a.bigraphs[a.bigraphs.length - 1].second = b.words[0].word;
      }
      a.currentWord = undefined;
    } else if (a.lastWord) {
      a.bigraphs.push({
        first: a.lastWord,
        second: b.words[0].word,
        weight: 1,
      });
    }
    a.words.push(...b.words);
    a.bigraphs.push(...b.bigraphs);
    a.currentWord = b.currentWord;
    a.lastWord = b.lastWord;
    return a;
  }
}

export type WordKey = [string, number, string];

export default function getSearchKeys(
  article: ArticleSummary & PouchDB.Core.IdMeta
): WordKey[] {
  if (article.deleted || !article.isPublic) {
    return [];
  }
  let wordData = markedFull(article.documentBody || '', WordCountRender)
    .results;
  // titles get a very hight weight.
  wordData = WordCountRender.cat(
    WordCountRender.close(
      WordCountRender.scale(WordCountRender.ofText(article.documentName), 48)
    ),
    wordData
  );
  wordData = WordCountRender.cat(
    WordCountRender.close(
      WordCountRender.scale(
        WordCountRender.ofText(article.hierarchyWeight || ''),
        48
      )
    ),
    wordData
  );
  const wordsMap = new Map<string, number>();
  for (let { word, weight } of wordData.words) {
    if (word.length <= 2 || commonWordsSet.has(word)) {
      continue;
    }
    if (word.length > 7) {
      // longer words get weighted more heavily. A short word will have a weight of 1
      // "uncharacteristically" would get a weight of ~2.
      weight *= Math.sqrt(word.length - 7);
    }
    wordsMap.set(word, (wordsMap.get(word) || 0) - weight);
  }
  for (let { first, second, weight } of wordData.bigraphs) {
    if (commonWordsSet.has(first)) {
      continue;
    }
    if (commonWordsSet.has(second)) {
      continue;
    }
    const word = `${first}\uFFFF${second}`;
    if (word.length > 7) {
      // longer bigraphs get weighted more heavilly
      weight *= Math.sqrt(word.length - 7);
    }
    // weight is negative so better words are listed first.
    wordsMap.set(word, (wordsMap.get(word) || 0) - weight);
  }
  return [...wordsMap.entries()].map(([word, weight]) => [
    word,
    weight,
    article._id,
  ]);
}

const commonWordsList = [
  'the',
  'of',
  'and',
  'in',
  'to',
  'was',
  'is',
  'for',
  'as',
  'on',
  'with',
  'by',
  'he',
  'that',
  'at',
  'from',
  'his',
  'it',
  'an',
  'were',
  'are',
  'which',
  'doc',
  'https',
  'this',
  'also',
  'be',
  'or',
  'has',
  'had',
  'first',
  'one',
  'their',
  'its',
  'after',
  'new',
  'not',
  'but',
  'who',
  'they',
  'two',
  'have',
  'her',
  'she',
  'been',
  'other',
  'all',
  'when',
  'time',
  'during',
  'there',
  'into',
  'school',
  'more',
  'may',
  'years',
  'over',
  'up',
  'only',
  'year',
];

export const commonWordsSet = new Set<string>();
for (const word of commonWordsList) {
  commonWordsSet.add(word);
}

class LineRenderer extends LineBreakReader<{
  contents: string[];
  closed: boolean;
}> {
  start(): { contents: string[]; closed: boolean } {
    return {
      contents: [''],
      closed: false,
    };
  }

  close(type: {
    contents: string[];
    closed: boolean;
  }): { contents: string[]; closed: boolean } {
    type.closed = true;
    return type;
  }

  ofText(text: string): { contents: string[]; closed: boolean } {
    return {
      contents: [text],
      closed: false,
    };
  }

  cat(
    a: { contents: string[]; closed: boolean },
    b: { contents: string[]; closed: boolean }
  ): { contents: string[]; closed: boolean } {
    if (a.closed || b.closed) {
      a.contents.push(...b.contents);
    } else {
      a.contents[a.contents.length - 1] += b.contents[0];
      b.contents.unshift();
    }
    a.closed = b.closed;
    return a;
  }
}

export function searchifyText(l: string) {
  return l.replace(/[^A-Za-z0-9]/g, ' ').toLowerCase();
}

export function getRawContents(
  content: string
): { rawLines: string[]; searchableLines: string[] } {
  const rawLines = markedFull(content, LineRenderer)
    .results.contents.filter(l => !!l.trim())
    .map(l => l.replace(/\s+/g, ' ').trim());
  const searchableLines = rawLines.map(l => searchifyText(l));
  return { rawLines, searchableLines };
}
