import {
  chainDependencies,
  create,
  sqrtDependencies,
  subtractDependencies,
} from 'mathjs';

import { PostingsDataQuery } from '@revelio/data-access';

// Create minimal mathjs instance with only the functions we need
const { chain } = create({
  chainDependencies,
  subtractDependencies,
  sqrtDependencies,
});

// Used to clean up postings data for early periods of low data acquisition
// Expects 5% of peak active postings, 2% of peak removed postings
// and within 1.75 standard deviations of mean salary

type Posting = NonNullable<PostingsDataQuery['posting']>[number];

export const sanitiseMainPostingData = (posting: Posting) => {
  const category = posting?.category;

  if (!category) {
    return posting;
  }

  // Calculate peak metrics and prepare for salary mean and std deviation calculation
  const metrics = category.reduce<{
    peakActive: number;
    peakRemoved: number;
    sumOfSalaries: number;
    sumOfSquaredSalaries: number;
    countOfSalaries: number;
  }>(
    (acc, item) => {
      if (item?.metrics) {
        const activePostings = item.metrics.active || 0;
        const removedPostings = item.metrics.removed || 0;
        const salary = item.metrics.salary || 0;

        if (salary) {
          acc.sumOfSalaries += salary;
          acc.sumOfSquaredSalaries += salary * salary;
          acc.countOfSalaries += 1;
        }

        acc.peakActive = Math.max(acc.peakActive, activePostings);
        acc.peakRemoved = Math.max(acc.peakRemoved, removedPostings);
      }

      return acc;
    },
    {
      peakActive: 0,
      peakRemoved: 0,
      sumOfSalaries: 0,
      sumOfSquaredSalaries: 0,
      countOfSalaries: 0,
    }
  );

  if (metrics.countOfSalaries === 0) {
    return posting;
  }

  // Calculate mean and standard deviation for salaries
  const meanSalary = metrics.sumOfSalaries / metrics.countOfSalaries;
  const salaryStdDev = chain(
    metrics.sumOfSquaredSalaries / metrics.countOfSalaries
  )
    .subtract(meanSalary * meanSalary)
    .sqrt()
    .done();

  const activeThreshold = Math.ceil((metrics.peakActive / 100) * 5);
  const removedThreshold = Math.ceil((metrics.peakRemoved / 100) * 2);

  const sanitizedCategories = category.reduce<NonNullable<Posting>['category']>(
    (acc, item, index) => {
      if ((acc?.length || 0) > 0) {
        // Include all data after the first valid data point
        return [...(acc || []), item];
      }

      if (!item?.metrics) return acc;

      const { active, removed, new: newPostings, salary } = item.metrics;

      // Check if all the necessary metrics exist
      if (!(acc && active && removed && newPostings && salary)) {
        return acc;
      }

      const is5PercentOfPeakActive = active > activeThreshold;
      const is2PercentOfPeakRemoved = removed > removedThreshold;
      const isWithinStdDevOfMean =
        Math.abs(salary - meanSalary) <= 1.75 * salaryStdDev;

      const shouldInclude =
        newPostings >= 2 &&
        is5PercentOfPeakActive &&
        is2PercentOfPeakRemoved &&
        isWithinStdDevOfMean;

      if (shouldInclude) {
        //Check next 10 data points to ensure consistency
        const isConsistentlyActive = category
          .slice(index, index + 10)
          .every((el) => (el?.metrics?.active || 0) >= activeThreshold);

        if (isConsistentlyActive) {
          return [...acc, item];
        }
      }

      return acc;
    },
    []
  );

  return { ...posting, category: sanitizedCategories };
};
