import React from 'react';

import { h3Style, pNormal } from '@/components/hallucination-index-2024/common';
import Chart from '../common/chart';
import Heatmap from '../common/heatmap';
import ResultSnapshot from '../common/result-snapshot';
import RagTaskExamples from '../common/rag-task-examples';
import {
  mcrHeatmapModels,
  IMcrLcrModel,
} from '@/components/hallucination-index-2024/model-data-helpers';

import AnthropicLogo from '@/assets/anthropic.svg';
import GoogleLogo from '@/assets/google.svg';
import QwenLogo from '@/assets/qwen.svg';
import MistralAILogo from '@/assets/mistral-ai.svg';
import PricingInfo from '../common/pricing-info';
import { RagTaskExampleData } from '../rag-task-examples-data';

// const ragTaskExamples = [
//   {
//     name: 'Gemini Flash',
//     responseCorrect: 'Yes',
//     contextAdherence: 1,
//     prompt: {
//       context: `In the city, the population was distributed as 28.3% under the age of 18, 11.3% from 18 to 24, 32.7% from 25 to 44, 18.2% from 45 to 64, and 9.6% who were 65 years of age or older. The median age was 31 years. For every 100 females, there were 97.3 males. For every 100 females age 18 and over, there were 94.5 males.`,
//       task: `Answer the question using the information in the context.`,
//       question: `Which gender group is larger: females or males?`,
//       response: `2.7%`,
//     },
//   },
//   {
//     name: 'Mistral-7B-Instruct-v0.3',
//     responseCorrect: 'No',
//     contextAdherence: 0,
//     prompt: {
//       context: `In the city, the population was distributed as 28.3% under the age of 18, 11.3% from 18 to 24, 32.7% from 25 to 44, 18.2% from 45 to 64, and 9.6% who were 65 years of age or older. The median age was 31 years. For every 100 females, there were 97.3 males. For every 100 females age 18 and over, there were 94.5 males.`,
//       task: `Answer the question using the information in the context.`,
//       question: `Which gender group is larger: females or males?`,
//       response: `The documents do not provide the information about the percentage of employees who left the company during FY 2024.`,
//     },
//   },
// ];

const resultSnapshotItems = [
  {
    category: 'Best closed-source model',
    icon: <GoogleLogo />,
    modelName: `Google’s Gemini-1.5-flash-001`,
    text: `During testing, many models scored a perfect 1.0 score: 

        • Anthropic: claude-3-5-sonnet, claude-3-opus,\n• Cohere: command-r-plus \n• Google: gemini-1.5-flash-001, gemini-1.5-pro-001 \n• Mistral: mistral-large \n• OpenAI: gpt-4o-2024-05-13

    We ultimately chose Gemini-1.5-flash-001 for its low cost.
 `,
    score: 1.0,
  },
  {
    category: 'Worst closed-source model',
    icon: <AnthropicLogo />,
    modelName: `Anthropic’s Claude-3-Haiku`,
    text: `Claude-3-haiku performed the worst but still scored a 0.96.`,
    score: 0.96,
  },
  {
    category: 'Best open-source model',
    icon: <QwenLogo />,
    modelName: `Alibaba's qwen2-72b-instruct`,
    text: `Alibaba's qwen2-72b-instruct scored a perfect 1.0 and had flawless performance up to 25k tokens. Note that llama-3-70b-instruct does not support beyond 8K context length.`,
    score: 1.0,
  },
  {
    category: 'Worst open-source model',
    icon: <MistralAILogo />,
    modelName: `Mistral-7b-instruct-v0.3`,
    text: `Mistral-7b-instruct-v0.3 had good performance however when considering cost, we felt there were better options.`,
    score: 0.94,
  },
  {
    category: 'Best performance for the cost',
    icon: <GoogleLogo />,
    modelName: `Google’s Gemini-1.5-flash-001`,
    text: `Gemini-1.5-flash-001 scored a perfect 1.0. Similar to our rationale for when evaluating models for small context testing, Gemini-1.5-flash-001 also performed the best at a fraction of the cost, making it our choice for this category.`,
    score: 1.0,
  },
  {
    category: `Best small open model`,
    icon: <QwenLogo />,
    modelName: `Alibaba's qwen2-7b-instruct`,
    text: `Alibaba's qwen2-7b-instruct scored the best among the 7b models.`,
    score: 0.96,
  },
];

const defaultSelectedChartFilters = [
  {
    name: 'promptType',
    values: ['Simple'],
  },
];

const heatmapData = {
  type: 'mcr' as const,
  models: mcrHeatmapModels,
  title: 'Recall heatmap',
  description: `This heatmap shows the model's ability to recall information in different parts of the context. The x-axis represents the length of the context during the experiment, and the y-axis represents the location of the information. Green indicates successful recall, while red indicates failure.`,
};

type Props = {
  mcrModels: IMcrLcrModel[];
  ragTaskExamples: RagTaskExampleData[];
};

const MCR = ({ mcrModels, ragTaskExamples }: Props) => {
  return (
    <section className="my-12 lg:my-24">
      {/* Head */}
      <div className="flex flex-col gap-6 md:flex-row">
        <div className="md:w-1/2">
          <h3 className={h3Style}>Medium Context RAG (MCR)</h3>
          <p className="my-3 text-lg leading-8 !text-black lg:text-[22px]">
            5k to 25k tokens
          </p>
        </div>

        <p className={`${pNormal} !text-[#454547] md:w-1/2`}>
          The Medium Context RAG aims to determine the most effective model for
          comprehending long contexts spanning from 5k to 25k tokens. It focuses
          on identifying any loss of information and reasoning ability within
          these extensive contexts. Additionally, we experiment with a prompting
          technique known as Chain-of-Note to improve performance as it has
          worked for short context. This task is akin to doing RAG on a few book
          chapters.
        </p>
      </div>

      {/* Chart */}
      <div className="my-16">
        <Chart
          maxYTickOffset={0.1}
          minYTickOffset={-0.04}
          models={mcrModels}
          enabledFilters={[
            'type',
            'contextLength',
            'promptType',
            'pricePerToken',
            'responseCost',
          ]}
          defaultSelectedChartFilters={defaultSelectedChartFilters}
        />
        <PricingInfo />
      </div>
      {/* Result Snapshot */}
      <ResultSnapshot items={resultSnapshotItems} />
      {/* Heatmap */}
      <Heatmap {...heatmapData} />
      {/* RAG Task Examples */}
      <RagTaskExamples items={ragTaskExamples} />
    </section>
  );
};

export default MCR;
