import { ColumnSet } from '@revelio/layout';
import { PipelineType } from '@revelio/data-access';
import { flattenColumnSet } from './columns/columns.model';
import { NEW_DELIVERABLE_DEFAULTS } from './deliverables.repository';
import { CompletePipeline, Deliverable } from './deliverables.model';

const sentimentItems = [
  {
    id: PipelineType.ReviewTrends,
    label: 'Sentiment Ratings',
    detail:
      'This dataset contains employee review data aggregated to the level of company, region, position, and month. Each row contains aggregated ratings for this level of granularity. All ratings span from 1 to 5, with 5 being the highest rating.',
  },
  {
    id: PipelineType.IndividualReviews,
    label: 'Sentiment Reviews',
    detail:
      'Includes employee reviews for all companies, with the full text of each review, split into positive and negative text, segmented by various employee characteristics (occupation, seniority, geography). The coverage is global, history begins in 2009, and the frequency of update is monthly but can feasibly be moved to weekly.',
  },
  {
    id: PipelineType.SentimentScores,
    label: 'Sentiment Effects',
    detail:
      'This dataset contains employee sentiment scores that were generated using our sentiment model. This model uses Natural Language Processing to capture employee sentiment on specific topics such as management and diversity. For each review, we compute a weighted sentiment score based on how relevant a given topic was for the positive or negative portion of the review, assigning a positive (negative) score to topics that had an overall positive (negative) impact on the review. These scores are then aggregated to arrive at a company-wide sentiment score.',
  },
];

const jobPostingItems = [
  {
    id: PipelineType.Posting,
    label: 'Job Postings Dynamics',
    detail:
      'Includes active postings, new postings, removed postings, salaries, and full text of postings for any company, segmented by various employee characteristics (occupation, seniority, geography, keywords, skills, etc). Our postings data comes from two sources: Linkedln postings and, for an additional cost, company websites. The coverage of both sources is global. History begins in 2008 for company website postings, and it begins in August 2021 for Linkedin postings. The frequency of update is weekly.',
  },
  {
    id: PipelineType.PostingsIndividual,
    label: 'Individual Job Postings',
    detail: 'Job Postings data at the individual level.',
  },
];

export const dataSets: ColumnSet<PipelineType>[] = [
  {
    heading: null,
    columns: [
      {
        id: PipelineType.WfDynam,
        label: 'Workforce Dynamics',
        detail:
          'Includes counts, inflows, outflows, salaries for the entire workforce of any company, segmented by various employee characteristics (occupation, seniority, geography, demographics, skills, etc). The coverage is global, history begins in 2008, and the frequency of update is monthly.',
      },
      {
        id: PipelineType.SkillDynam,
        label: 'Skills Dynamics',
        detail:
          'Includes the same variables as Workforce Dynamics (count, inflow, outflow, etc), with the exception that it also contains skills. Skills, unlike occupation, seniority, and geography, can co-occur in the same position, so summing up the skills will not equal headcount.',
      },
      {
        id: PipelineType.Transition,
        label: 'Transitions',
        detail:
          'Includes the previous and new roles, location, seniority, and salary of individuals leaving (or joining) any company or changing roles within a company. The coverage is global, history begins in 2008, and the frequency of updates is monthly.',
      },
      {
        id: PipelineType.Individual,
        label: 'Individual User',
        detail:
          'Includes three files: The first has position-level information on current and historical positions. The second has educational history with universities and degrees. The third has user-level information, like personal details and demographic characteristics.',
      },
      {
        id: 'jobpostings',
        label: 'Job Postings',
        detail:
          'Includes aggregated job posting statistics at the company and month level, as well as individual-level job postings data with details like posting date, location, role, and salary.',
        menuItems: jobPostingItems,
      },
      {
        id: 'sentiment',
        label: 'Sentiment',
        detail:
          'Includes aggregated employee review data at various levels (company, region, position, month) with ratings from 1 to 5, along with detailed employee characteristics and the full text of reviews. Additionally, it includes sentiment scores generated through Natural Language Processing, capturing sentiment on specific topics and aggregating them into company-wide sentiment scores. The dataset has global coverage since 2009, with monthly updates that can potentially be made weekly.',
        menuItems: sentimentItems,
      },
      {
        id: PipelineType.Layoffs,
        label: 'Layoffs',
        detail:
          'Includes post data and effective date for all layoffs in every company in the US, segmented by geography. The coverage is just US, history differs by state and can begin any time from 2008 to 2020 (most states have long history), and the frequency of update is monthly.',
      },
      {
        id: PipelineType.CompanyInfo,
        label: 'Company Reference',
        detail:
          'Includes company information (name, headquarters, year founded), unique identifiers (websites) and other security identifiers (ticker, CUSIP, ISIN, SEDOL). The file can also be configured to include all subsidiaries of the companies of interest.',
      },
    ],
  },
];

export const getDatasetId = (pipelineType: PipelineType) =>
  flattenColumnSet(dataSets[0]).findIndex((x) => x.id === pipelineType);

export const getPipelineTypeTitleById = (pipelineType: PipelineType) => {
  return flattenColumnSet(dataSets[0]).find((ds) => ds.id === pipelineType)
    ?.label;
};

export const createNewEntity = (
  pipelineType: PipelineType,
  deliverables: Record<string | number, Deliverable<CompletePipeline>>
) => {
  const newId = getDatasetId(pipelineType);
  const existingDeliverables = Object.keys(deliverables);
  let s3_delivery = undefined;
  let snowflake_delivery = undefined;
  if (existingDeliverables.length) {
    const existingDeliverable = deliverables[Number(existingDeliverables[0])];
    s3_delivery = existingDeliverable?.s3_delivery;
    snowflake_delivery = existingDeliverable?.snowflake_delivery;
  }

  const entityToAdd = {
    id: newId,
    ...NEW_DELIVERABLE_DEFAULTS,
    pipeline: {
      pipeline_type: pipelineType,
    },
    s3_delivery,
    snowflake_delivery,
  };

  return {
    newId,
    newEntity: entityToAdd,
  };
};
