import ToggleDiv from "../../../components/common/ToggleDiv";
import DemoApp from "../../../SpreadApp/DemoApp";
import { DataFrame, TestModel } from "../../../SpreadApp/types";
import { ColumnType, ModelType } from "../../../webcore/client/types";

const testDataframe: DataFrame = {
    types: [ColumnType.NUMBER, ColumnType.NUMBER, ColumnType.CATEGORY],
    data: [
        ["Temperature", "Humidity", "Season"],
        ["80", "50", "Summer"],
        ["20", "20", "Winter"],
        ["60", "60", "Spring"],
        ["50", "70", "Spring"],
        ["60", "20", "Fall"],
        ["25", "8", "Winter"],
        ["50", "20", "Fall"],
        ["70", "40", "Summer"],
    ]
}

const testModels: TestModel[] = [
    {
        modelInfo: {
            ModelName: "seasons",
            ModelId: "test-classification-1",
            InputColumns: ["Temperature", "Humidity"],
            OutputColumn: "Season",
            Status: "completed",
            ModelType: ModelType.CLASSIFICATION,
            ModelMetrics: "{}",
            SheetId: "Classification Demo",
            DateCreated: new Date().toString()
        },
        predictor: (input1: string, input2: string) => {
            const temp = Number(input1);
            const humidity = Number(input2);

            if (temp > 70) return "Summer";
            if (temp <= 32.6) return "Winter";
            if (humidity > 33.4) return "Spring";

            return "Fall";
        }
    }
]

const ClassificationPage = () => {
    return (
        <div>


            <div className="docs-container-with-example">
                <div className="docs-content-with-example">
                    <h1>
                        Classification
                    </h1>
                    <h2>
                        Overview
                    </h2>
                    <p>
                        Classification models are used to categorize an item based on common attributes.
                        Classification is useful when a dataset has a column of a limited set of values, or <b>categories</b>.
                    </p>
                    <p>
                        These models can be used to make more sense of a dataset by simplifying a set of attributes into one attribute (its category).
                    </p>
                    <ToggleDiv title={"How the algorithm works"}>
                        <p>
                            Our classification model training is based on a decision tree algorithm.
                        </p>
                        <p>
                            A decision tree machine learning model works by making a series of yes/no decisions based on features of the data.
                            Imagine you have a dataset of houses, and you want to predict whether each house is expensive or cheap.
                            You might have features like the number of bedrooms, the size of the backyard, and the distance from the city center.
                        </p>
                        <p>
                            To create a decision tree model, you start at the top of the tree with a question that splits the data into two groups.
                            For example, you might ask, "Does the house have more than 3 bedrooms?" If the answer is yes, you go down one branch of the tree,
                            and if the answer is no, you go down the other branch.
                        </p>
                        <p>
                            At each subsequent node, you ask another yes/no question based on another feature.
                            For example, if the house has more than 3 bedrooms, you might ask, "Is the backyard larger than 500 square feet?"
                            If the answer is yes, you go down one branch, and if the answer is no, you go down the other branch.
                        </p>
                        <p>
                            You keep asking these questions and splitting the data until you reach a leaf node, which gives you a prediction.
                            For example, if you end up at a leaf node that says "Expensive," then you predict that the house is expensive.
                        </p>
                        <p>
                            The decision tree algorithm learns how to choose the best features and questions to ask by
                            minimizing a cost function that measures how well the tree predicts the outcomes on the training data.
                            Once the tree is trained, you can use it to make predictions on new data by following the same sequence of questions down the tree.
                        </p>
                    </ToggleDiv>
                </div>
                <div className="docs-example"></div>
            </div>
            <div className="docs-container-with-example">
                <div className="docs-content-with-example">
                    <h2>
                        Model Training
                    </h2>
                    <h3>
                        Inputs
                    </h3>
                    <p>
                        Inputs for classification model training can be of any type.
                    </p>
                    <h3>
                        Outputs
                    </h3>
                    <p>
                        Outputs for classification model training must be Category types.
                    </p>
                    <h3>
                        How to make your model better
                    </h3>
                    <h4>Less categories</h4>
                    <p>
                        The best way to make your classification algorithm better is to limit the amount of categories you use.
                        A classification model that is trying to categorize data into a hundred categories is usually going to be much less accurate
                        than one only trying to categorize into two categories.
                    </p>
                    <h4>More data</h4>
                    <p>
                        More items per category will usually improve the accuracy of your predictor.
                        With not enough data for each category, the model will not be as familiar to what items of that category will look like in those edge cases
                        and will likely be less accurate.
                    </p>
                    <h4>Category parity</h4>
                    <p>
                        Additionally, making sure that you have a relatively equal number of items for each category can improve the performance of your model.
                        Training with items that are mostly one category will make the algorithm highly favor that one category over the others.
                    </p>
                    <h4>Attribute relevance</h4>
                    <p>
                        Make sure your data doesn't contain attributes/columns that are irrelevant to the categories.
                        By including these attributes, you are telling the model to try to incorporate this information in its decision which can lead it astray.
                        For example, what you had for dinner last night doesn't say much about whether or not a basketball player will be voted an allstar.
                    </p>
                </div>

                <div className="docs-example">
                    <p>
                        Below, we have set up a spreadsheet of weather data and its corresponding season.
                        Since there are only four seasons, we can categorize this data, making it an ideal choice for classification.
                        Try to create a classification predictor for seasons below.
                    </p>
                    <div>
                        <DemoApp startDataframe={testDataframe} sheetName={"Season Classifier Demo"} testModels={[]} isTrainingDemo={true} />
                    </div>
                </div>
            </div>
            <div className="docs-container-with-example">
                <div className="docs-content-with-example">
                    <h2>
                        Predictions
                    </h2>
                    <p>
                        Columns used in predictions using classification models should be of the same type as the type of the corresponding input column.
                    </p>
                    <p>
                        For example, if the type of the first column used in training was a number, your first input should be a number.
                    </p>
                    <p>
                        The output of the prediction will be one of the categories that your trained with.
                    </p>

                </div>
                <div className="docs-example">
                    <p>
                        Using the same season data from the training demo and a premade classification predictor,
                        try to predict seasons using our predictor.
                    </p>
                    <div>
                        <DemoApp startDataframe={testDataframe} sheetName={"Season Predictor Demo"} testModels={testModels} isTrainingDemo={false} />
                    </div>
                </div>

            </div>
        </div>
    )
};

export default ClassificationPage;