import { first } from 'rxjs/operators';

import { Injectable } from '@angular/core';

import { StopWords } from '@report/shared/enums/stopwords.enum';

import { LocalesManager } from '@shared/services/locales-manager.service';

import { AnswerData } from '@shared/models/answer.model';

/**
 * This service will count word usage for text dimensions
 */
@Injectable({
  providedIn: 'root',
})
export class WordCounter {
  constructor(private lm: LocalesManager) {}

  public countWords(
    rawAnswer: AnswerData,
    dimensionValues: (string | number)[],
    textLabels: (string | number)[],
    locales: string[] = ['en'],
    valueGroupKeys?: string[],
    valueGroupTypes?: string[],
    valueGroupValues?: number[][],
    labelsValueGroup?: { [key: string]: string },
    useRawAnswer?: boolean,
  ): number[] {
    const groupSeparator: string = '\u001D';
    const answer: number[] = [];
    const answerLanguage: string =
      rawAnswer &&
      rawAnswer.tags &&
      rawAnswer.tags.find((tag) => tag.type === 'language') &&
      rawAnswer.tags.find((tag) => tag.type === 'language')['value'];
    let answerLanguageIndex: number = answerLanguage && valueGroupKeys ? valueGroupKeys.indexOf(answerLanguage) : -1;
    const acceptedTags: string[] = ['NOUN', 'ADJ', 'VERB', 'ADV'];
    const tags: any[] =
      (rawAnswer &&
        rawAnswer.tags &&
        rawAnswer.tags
          .filter((tag) => tag && tag.type === 'word')
          .filter((tag) => acceptedTags.indexOf(tag.PoS) >= 0)
          .filter((tag) => tag && tag.value && tag.value.toLowerCase().replace(StopWords[answerLanguage], ''))
          .map((tag) => {
            if (valueGroupKeys && valueGroupKeys.indexOf(tag.PoS) >= 0) {
              tag.value += groupSeparator + valueGroupKeys.indexOf(tag.PoS);
            }
            return tag;
          })) ||
      [];

    const words = tags.length
      ? Array.from(new Map(tags.map((item) => [(item['value'] || '').toLowerCase(), item])).values())
      : !useRawAnswer
        ? this.parseText(rawAnswer.value, locales).map((value) => ({ value }))
        : rawAnswer.value.split(';').map((value) => ({ value }));

    if (answerLanguage && valueGroupKeys) {
      if (answerLanguageIndex < 0) {
        answerLanguageIndex = valueGroupKeys.length;
        valueGroupKeys.push(answerLanguage);

        if (valueGroupTypes) {
          valueGroupTypes.push('language');
        }

        if (labelsValueGroup) {
          this.lm
            .isoLocales()
            .pipe(first())
            .subscribe((languages) => {
              const langObj = languages.find((lang) => lang.code === answerLanguage);

              if (langObj) {
                labelsValueGroup[answerLanguage] = `${langObj.name} (${langObj.native})`;
              }
            });
        }
      }
    }

    for (let i = 0, len = words.length; i < len; i++) {
      const word = words[i] && words[i].value && words[i].value.toLowerCase();
      const wordGroup = words[i] && words[i].PoS;

      let index = textLabels.indexOf(word);
      let valueGroupKeyIndex: number =
        valueGroupKeys && valueGroupKeys.indexOf(wordGroup) >= 0 ? valueGroupKeys.indexOf(wordGroup) : -1;

      if (wordGroup && valueGroupKeys && valueGroupKeyIndex < 0) {
        valueGroupKeyIndex = valueGroupKeys.length;
        valueGroupKeys.push(wordGroup);

        if (valueGroupTypes) {
          valueGroupTypes.push('PoS');
        }
      }

      if (index < 0) {
        index = textLabels.length;
        textLabels.push(word);
        dimensionValues.push(index);
      }

      if (valueGroupValues) {
        if (answerLanguageIndex >= 0) {
          if (!valueGroupValues[answerLanguageIndex]) {
            valueGroupValues[answerLanguageIndex] = [];
          }
          if (valueGroupValues[answerLanguageIndex].indexOf(index) < 0) {
            valueGroupValues[answerLanguageIndex].push(index);
          }
        }

        if (valueGroupKeyIndex >= 0) {
          if (!valueGroupValues[valueGroupKeyIndex]) {
            valueGroupValues[valueGroupKeyIndex] = [];
          }
          if (valueGroupValues[valueGroupKeyIndex].indexOf(index) < 0) {
            valueGroupValues[valueGroupKeyIndex].push(index);
          }
        }
      }

      answer.push(index);
    }

    return answer;
  }

  private parseText(text, locales): string[] {
    const wordSeparators = new RegExp(
      `[ \f\n\r\t\v\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005` +
        `\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\u3031-\u3035\u309b\u309c\u30a0\u30fc\uff70]`,
      'g',
    );

    const unicodePunctuation = new RegExp(
      `[!-#%-*,-/:;?@\\[-\\]_{}\xa1\xa7\xab\xb6\xb7\xbb\xbf\u037e\u0387\u055a-\u055f` +
        `\u0589\u058a\u05be\u05c0\u05c3\u05c6\u05f3\u05f4\u0609\u060a\u060c\u060d\u061b\u061e\u061f\u066a-\u066d` +
        `\u06d4\u0700-\u070d\u07f7-\u07f9\u0830-\u083e\u085e\u0964\u0965\u0970\u0af0\u0df4\u0e4f\u0e5a\u0e5b\u0f04-` +
        `\u0f12\u0f14\u0f3a-\u0f3d\u0f85\u0fd0-\u0fd4\u0fd9\u0fda\u104a-\u104f\u10fb\u1360-\u1368\u1400\u166d\u166e` +
        `\u169b\u169c\u16eb-\u16ed\u1735\u1736\u17d4-\u17d6\u17d8-\u17da\u1800-\u180a\u1944\u1945\u1a1e\u1a1f\u1aa0-` +
        `\u1aa6\u1aa8-\u1aad\u1b5a-\u1b60\u1bfc-\u1bff\u1c3b-\u1c3f\u1c7e\u1c7f\u1cc0-\u1cc7\u1cd3\u2010-\u2027` +
        `\u2030-\u2043\u2045-\u2051\u2053-\u205e\u207d\u207e\u208d\u208e\u2329\u232a\u2768-\u2775\u27c5\u27c6\u27e6-` +
        `\u27ef\u2983-\u2998\u29d8-\u29db\u29fc\u29fd\u2cf9-\u2cfc\u2cfe\u2cff\u2d70\u2e00-\u2e2e\u2e30-\u2e3b\u3001-` +
        `\u3003\u3008-\u3011\u3014-\u301f\u3030\u303d\u30a0\u30fb\ua4fe\ua4ff\ua60d-\ua60f\ua673\ua67e\ua6f2-\ua6f7` +
        `\ua874-\ua877\ua8ce\ua8cf\ua8f8-\ua8fa\ua92e\ua92f\ua95f\ua9c1-\ua9cd\ua9de\ua9df\uaa5c-\uaa5f\uaade\uaadf` +
        `\uaaf0\uaaf1\uabeb\ufd3e\ufd3f\ufe10-\ufe19\ufe30-\ufe52\ufe54-\ufe61\ufe63\ufe68\ufe6a\ufe6b\uff01-\uff03` +
        `\uff05-\uff0a\uff0c-\uff0f\uff1a\uff1b\uff1f\uff20\uff3b-\uff3d\uff3f\uff5b\uff5d\uff5f-\uff65]`,
      `g`,
    );

    let processedText: string = text.toLowerCase();
    processedText = processedText.replace(wordSeparators, ' ');
    processedText = processedText.replace(unicodePunctuation, ' ');

    for (const lang of locales) {
      if (StopWords[lang]) {
        processedText = processedText.replace(StopWords[lang.replace(/-/g, '')], '');
      }
    }

    const splittedText: string[] = processedText
      .split(' ')
      .map((item) => item.replace(/\d+/g, ''))
      .filter((item) => item && item.length > 1);

    return Array.from(new Set(splittedText));
  }
}
