feat: add image for chunk method (#139)

* feat: add image of chunk method * feat: add image for chunk method
2024-03-21 16:45:03 +08:00
parent 5875c8ba08
commit f4ec7cfa76
39 changed files with 2823 additions and 196 deletions
--- a/web/src/pages/add-knowledge/components/knowledge-file/index.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-file/index.tsx
@@ -220,7 +220,7 @@ const KnowledgeFile = () => {
      key: 'create_date',
    },
    {
-      title: 'Category',
+      title: 'Chunk Method',
      dataIndex: 'parser_id',
      key: 'parser_id',
      render: (text) => {
--- a/web/src/pages/add-knowledge/components/knowledge-setting/category-panel.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/category-panel.tsx
@@ -0,0 +1,71 @@
+import SvgIcon from '@/components/svg-icon';
+import { useSelectParserList } from '@/hooks/userSettingHook';
+import { Col, Divider, Empty, Row, Typography } from 'antd';
+import { useMemo } from 'react';
+import styles from './index.less';
+import { ImageMap, TextMap } from './utils';
+
+const { Title, Text } = Typography;
+
+const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
+  const parserList = useSelectParserList();
+
+  const item = useMemo(() => {
+    const item = parserList.find((x) => x.value === chunkMethod);
+    if (item) {
+      return {
+        title: item.label,
+        description: TextMap[item.value as keyof typeof TextMap]?.description,
+      };
+    }
+    return { title: '', description: '' };
+  }, [parserList, chunkMethod]);
+
+  const imageList = useMemo(() => {
+    if (chunkMethod in ImageMap) {
+      return ImageMap[chunkMethod as keyof typeof ImageMap];
+    }
+    return [];
+  }, [chunkMethod]);
+
+  return (
+    <section className={styles.categoryPanelWrapper}>
+      {imageList.length > 0 ? (
+        <>
+          <Title level={5} className={styles.topTitle}>
+            {item.title} Category
+          </Title>
+          <Text>{item.description}</Text>
+          <Title level={5}>{item.title} Image Examples</Title>
+          <Text>
+            We've prepared detailed visual guides to make understanding easier
+            for you.
+          </Text>
+          <Row gutter={[10, 10]} className={styles.imageRow}>
+            {imageList.map((x) => (
+              <Col span={12} key={x}>
+                <SvgIcon
+                  name={x}
+                  width={'100%'}
+                  className={styles.image}
+                ></SvgIcon>
+              </Col>
+            ))}
+          </Row>
+          <Title level={5}>{item.title} Dialogue Examples</Title>
+          <Divider></Divider>
+        </>
+      ) : (
+        <Empty description={''} image={null}>
+          <p>
+            This will display a visual explanation of the knowledge base
+            categories
+          </p>
+          <SvgIcon name={'chunk-method/chunk-empty'} width={'100%'}></SvgIcon>
+        </Empty>
+      )}
+    </section>
+  );
+};
+
+export default CategoryPanel;
--- a/web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx
@@ -2,7 +2,6 @@ import { normFile } from '@/utils/fileUtil';
 import { PlusOutlined } from '@ant-design/icons';
 import {
  Button,
-  Divider,
  Flex,
  Form,
  Input,
@@ -11,8 +10,6 @@ import {
  Select,
  Slider,
  Space,
-  Spin,
-  Typography,
  Upload,
 } from 'antd';
 import {
@@ -20,174 +17,162 @@ import {
  useSubmitKnowledgeConfiguration,
 } from './hooks';

+import { FormInstance } from 'antd/lib';
 import styles from './index.less';

-const { Title } = Typography;
 const { Option } = Select;

-const Configuration = () => {
+const ConfigurationForm = ({ form }: { form: FormInstance }) => {
  const { submitKnowledgeConfiguration, submitLoading } =
    useSubmitKnowledgeConfiguration();
-  const { form, parserList, embeddingModelOptions, loading } =
-    useFetchKnowledgeConfigurationOnMount();
+  const { parserList, embeddingModelOptions } =
+    useFetchKnowledgeConfigurationOnMount(form);

  const onFinishFailed = (errorInfo: any) => {
    console.log('Failed:', errorInfo);
  };

  return (
-    <div className={styles.configurationWrapper}>
-      <Title level={5}>Configuration</Title>
-      <p>Update your knowledge base details especially parsing method here.</p>
-      <Divider></Divider>
-      <Spin spinning={loading}>
-        <Form
-          form={form}
-          name="validateOnly"
-          layout="vertical"
-          autoComplete="off"
-          onFinish={submitKnowledgeConfiguration}
-          onFinishFailed={onFinishFailed}
+    <Form
+      form={form}
+      name="validateOnly"
+      layout="vertical"
+      autoComplete="off"
+      onFinish={submitKnowledgeConfiguration}
+      onFinishFailed={onFinishFailed}
+    >
+      <Form.Item
+        name="name"
+        label="Knowledge base name"
+        rules={[{ required: true }]}
+      >
+        <Input />
+      </Form.Item>
+      <Form.Item
+        name="avatar"
+        label="Knowledge base photo"
+        valuePropName="fileList"
+        getValueFromEvent={normFile}
+      >
+        <Upload
+          listType="picture-card"
+          maxCount={1}
+          beforeUpload={() => false}
+          showUploadList={{ showPreviewIcon: false, showRemoveIcon: false }}
        >
-          <Form.Item
-            name="name"
-            label="Knowledge base name"
-            rules={[{ required: true }]}
-          >
-            <Input />
-          </Form.Item>
-          <Form.Item
-            name="avatar"
-            label="Knowledge base photo"
-            valuePropName="fileList"
-            getValueFromEvent={normFile}
-          >
-            <Upload
-              listType="picture-card"
-              maxCount={1}
-              beforeUpload={() => false}
-              showUploadList={{ showPreviewIcon: false, showRemoveIcon: false }}
-            >
-              <button style={{ border: 0, background: 'none' }} type="button">
-                <PlusOutlined />
-                <div style={{ marginTop: 8 }}>Upload</div>
-              </button>
-            </Upload>
-          </Form.Item>
-          <Form.Item name="description" label="Description">
-            <Input />
-          </Form.Item>
-          <Form.Item
-            label="Language"
-            name="language"
-            initialValue={'Chinese'}
-            rules={[{ required: true, message: 'Please input your language!' }]}
-          >
-            <Select placeholder="select your language">
-              <Option value="English">English</Option>
-              <Option value="Chinese">Chinese</Option>
-            </Select>
-          </Form.Item>
-          <Form.Item
-            name="permission"
-            label="Permissions"
-            tooltip="coming soon"
-            rules={[{ required: true }]}
-          >
-            <Radio.Group>
-              <Radio value="me">Only me</Radio>
-              <Radio value="team">Team</Radio>
-            </Radio.Group>
-          </Form.Item>
-          <Form.Item
-            name="embd_id"
-            label="Embedding Model"
-            rules={[{ required: true }]}
-            tooltip="xx"
-          >
-            <Select
-              placeholder="Please select a country"
-              options={embeddingModelOptions}
-            ></Select>
-          </Form.Item>
-          <Form.Item
-            name="parser_id"
-            label="Knowledge base category"
-            tooltip="xx"
-            rules={[{ required: true }]}
-          >
-            <Select placeholder="Please select a country">
-              {parserList.map((x) => (
-                <Option value={x.value} key={x.value}>
-                  {x.label}
-                </Option>
-              ))}
-            </Select>
-          </Form.Item>
-          <Form.Item noStyle dependencies={['parser_id']}>
-            {({ getFieldValue }) => {
-              const parserId = getFieldValue('parser_id');
+          <button style={{ border: 0, background: 'none' }} type="button">
+            <PlusOutlined />
+            <div style={{ marginTop: 8 }}>Upload</div>
+          </button>
+        </Upload>
+      </Form.Item>
+      <Form.Item name="description" label="Description">
+        <Input />
+      </Form.Item>
+      <Form.Item
+        label="Language"
+        name="language"
+        initialValue={'Chinese'}
+        rules={[{ required: true, message: 'Please input your language!' }]}
+      >
+        <Select placeholder="select your language">
+          <Option value="English">English</Option>
+          <Option value="Chinese">Chinese</Option>
+        </Select>
+      </Form.Item>
+      <Form.Item
+        name="permission"
+        label="Permissions"
+        tooltip="coming soon"
+        rules={[{ required: true }]}
+      >
+        <Radio.Group>
+          <Radio value="me">Only me</Radio>
+          <Radio value="team">Team</Radio>
+        </Radio.Group>
+      </Form.Item>
+      <Form.Item
+        name="embd_id"
+        label="Embedding Model"
+        rules={[{ required: true }]}
+        tooltip="xx"
+      >
+        <Select
+          placeholder="Please select a country"
+          options={embeddingModelOptions}
+        ></Select>
+      </Form.Item>
+      <Form.Item
+        name="parser_id"
+        label="Chunk method"
+        tooltip="xx"
+        rules={[{ required: true }]}
+      >
+        <Select placeholder="Please select a country">
+          {parserList.map((x) => (
+            <Option value={x.value} key={x.value}>
+              {x.label}
+            </Option>
+          ))}
+        </Select>
+      </Form.Item>
+      <Form.Item noStyle dependencies={['parser_id']}>
+        {({ getFieldValue }) => {
+          const parserId = getFieldValue('parser_id');

-              if (parserId === 'naive') {
-                return (
-                  <Form.Item label="Chunk token number" tooltip="xxx">
-                    <Flex gap={20} align="center">
-                      <Flex flex={1}>
-                        <Form.Item
-                          name={['parser_config', 'chunk_token_num']}
-                          noStyle
-                          initialValue={128}
-                          rules={[
-                            { required: true, message: 'Province is required' },
-                          ]}
-                        >
-                          <Slider
-                            className={styles.variableSlider}
-                            max={2048}
-                          />
-                        </Form.Item>
-                      </Flex>
-                      <Form.Item
-                        name={['parser_config', 'chunk_token_num']}
-                        noStyle
-                        rules={[
-                          { required: true, message: 'Street is required' },
-                        ]}
-                      >
-                        <InputNumber
-                          className={styles.sliderInputNumber}
-                          max={2048}
-                          min={0}
-                        />
-                      </Form.Item>
-                    </Flex>
+          if (parserId === 'naive') {
+            return (
+              <Form.Item label="Max token number" tooltip="xxx">
+                <Flex gap={20} align="center">
+                  <Flex flex={1}>
+                    <Form.Item
+                      name={['parser_config', 'chunk_token_num']}
+                      noStyle
+                      initialValue={128}
+                      rules={[
+                        { required: true, message: 'Province is required' },
+                      ]}
+                    >
+                      <Slider className={styles.variableSlider} max={2048} />
+                    </Form.Item>
+                  </Flex>
+                  <Form.Item
+                    name={['parser_config', 'chunk_token_num']}
+                    noStyle
+                    rules={[{ required: true, message: 'Street is required' }]}
+                  >
+                    <InputNumber
+                      className={styles.sliderInputNumber}
+                      max={2048}
+                      min={0}
+                    />
                  </Form.Item>
-                );
-              }
-              return null;
-            }}
-          </Form.Item>
-          <Form.Item>
-            <div className={styles.buttonWrapper}>
-              <Space>
-                <Button htmlType="reset" size={'middle'}>
-                  Cancel
-                </Button>
-                <Button
-                  htmlType="submit"
-                  type="primary"
-                  size={'middle'}
-                  loading={submitLoading}
-                >
-                  Save
-                </Button>
-              </Space>
-            </div>
-          </Form.Item>
-        </Form>
-      </Spin>
-    </div>
+                </Flex>
+              </Form.Item>
+            );
+          }
+          return null;
+        }}
+      </Form.Item>
+      <Form.Item>
+        <div className={styles.buttonWrapper}>
+          <Space>
+            <Button htmlType="reset" size={'middle'}>
+              Cancel
+            </Button>
+            <Button
+              htmlType="submit"
+              type="primary"
+              size={'middle'}
+              loading={submitLoading}
+            >
+              Save
+            </Button>
+          </Space>
+        </div>
+      </Form.Item>
+    </Form>
  );
 };

-export default Configuration;
+export default ConfigurationForm;
--- a/web/src/pages/add-knowledge/components/knowledge-setting/hooks.ts
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/hooks.ts
@@ -15,6 +15,7 @@ import {
  getUploadFileListFromBase64,
 } from '@/utils/fileUtil';
 import { Form, UploadFile } from 'antd';
+import { FormInstance } from 'antd/lib';
 import pick from 'lodash/pick';
 import { useCallback, useEffect } from 'react';
 import { LlmModelType } from '../../constant';
@@ -39,9 +40,8 @@ export const useSubmitKnowledgeConfiguration = () => {
  return { submitKnowledgeConfiguration, submitLoading };
 };

-export const useFetchKnowledgeConfigurationOnMount = () => {
-  const [form] = Form.useForm();
-  const loading = useOneNamespaceEffectsLoading('kSModel', ['getKbDetail']);
+export const useFetchKnowledgeConfigurationOnMount = (form: FormInstance) => {
+  // const [form] = Form.useForm();

  const knowledgeDetails = useSelectKnowledgeDetails();
  const parserList = useSelectParserList();
@@ -69,5 +69,15 @@ export const useFetchKnowledgeConfigurationOnMount = () => {
    });
  }, [form, knowledgeDetails]);

-  return { form, parserList, embeddingModelOptions, loading };
+  return { parserList, embeddingModelOptions };
+};
+
+export const useSelectKnowledgeDetailsLoading = () =>
+  useOneNamespaceEffectsLoading('kSModel', ['getKbDetail']);
+
+export const useHandleChunkMethodChange = () => {
+  const [form] = Form.useForm();
+  const chunkMethod = Form.useWatch('parser_id', form);
+
+  return { form, chunkMethod };
 };
--- a/web/src/pages/add-knowledge/components/knowledge-setting/index.less
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/index.less
@@ -31,3 +31,15 @@
    width: 100%;
  }
 }
+
+.categoryPanelWrapper {
+  .topTitle {
+    margin-top: 0;
+  }
+  .imageRow {
+    margin-top: 16px;
+  }
+  .image {
+    width: 100%;
+  }
+}
--- a/web/src/pages/add-knowledge/components/knowledge-setting/index.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/index.tsx
@@ -1,3 +1,36 @@
-import Configuration from './configuration';
+import { Col, Divider, Row, Spin, Typography } from 'antd';
+import CategoryPanel from './category-panel';
+import ConfigurationForm from './configuration';
+import {
+  useHandleChunkMethodChange,
+  useSelectKnowledgeDetailsLoading,
+} from './hooks';
+
+import styles from './index.less';
+
+const { Title } = Typography;
+
+const Configuration = () => {
+  const loading = useSelectKnowledgeDetailsLoading();
+  const { form, chunkMethod } = useHandleChunkMethodChange();
+
+  return (
+    <div className={styles.configurationWrapper}>
+      <Title level={5}>Configuration</Title>
+      <p>Update your knowledge base details especially parsing method here.</p>
+      <Divider></Divider>
+      <Spin spinning={loading}>
+        <Row gutter={32}>
+          <Col span={12}>
+            <ConfigurationForm form={form}></ConfigurationForm>
+          </Col>
+          <Col span={12}>
+            <CategoryPanel chunkMethod={chunkMethod}></CategoryPanel>
+          </Col>
+        </Row>
+      </Spin>
+    </div>
+  );
+};

 export default Configuration;
--- a/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts
@@ -0,0 +1,84 @@
+const getImageName = (prefix: string, length: number) =>
+  new Array(length)
+    .fill(0)
+    .map((x, idx) => `chunk-method/${prefix}-0${idx + 1}`);
+
+export const ImageMap = {
+  book: getImageName('book', 4),
+  laws: getImageName('law', 4),
+  manual: getImageName('manual', 4),
+  media: getImageName('media', 2),
+  naive: getImageName('naive', 2),
+  paper: getImageName('paper', 2),
+  presentation: getImageName('presentation', 2),
+  qa: getImageName('qa', 2),
+  resume: getImageName('resume', 2),
+  table: getImageName('table', 2),
+};
+
+export const TextMap = {
+  book: {
+    title: '',
+    description: `Supported file formats are docx, excel, pdf, txt.
+  Since a book is long and not all the parts are useful, if it's a PDF,
+  please setup the page ranges for every book in order eliminate negative effects and save computing time for analyzing.`,
+  },
+  laws: {
+    title: '',
+    description: `Supported file formats are docx, pdf, txt.`,
+  },
+  manual: { title: '', description: `Only pdf is supported.` },
+  media: { title: '', description: '' },
+  naive: {
+    title: '',
+    description: `Supported file formats are docx, pdf, txt.
+  This method apply the naive ways to chunk files.
+  Successive text will be sliced into pieces using 'delimiter'.
+  Next, these successive pieces are merge into chunks whose token number is no more than 'Max token number'.`,
+  },
+  paper: {
+    title: '',
+    description: `Only pdf is supported.
+  The special part is that, the abstract of the paper will be sliced as an entire chunk, and will not be sliced partly.`,
+  },
+  presentation: {
+    title: '',
+    description: `The supported file formats are pdf, pptx.
+  Every page will be treated as a chunk. And the thumbnail of every page will be stored.
+  PPT file will be parsed by using this method automatically, setting-up for every PPT file is not necessary.`,
+  },
+  qa: {
+    title: '',
+    description: `Excel and csv(txt) format files are supported.
+  If the file is in excel format, there should be 2 column question and answer without header.
+  And question column is ahead of answer column.
+  And it's O.K if it has multiple sheets as long as the columns are rightly composed.
+
+  If it's in csv format, it should be UTF-8 encoded. Use TAB as delimiter to separate question and answer.
+
+  All the deformed lines will be ignored.
+  Every pair of Q&A will be treated as a chunk.`,
+  },
+  resume: {
+    title: '',
+    description: `The supported file formats are pdf, docx and txt.`,
+  },
+  table: {
+    title: '',
+    description: `Excel and csv(txt) format files are supported.
+  For csv or txt file, the delimiter between columns is TAB.
+  The first line must be column headers.
+  Column headers must be meaningful terms inorder to make our NLP model understanding.
+  It's good to enumerate some synonyms using slash '/' to separate, and even better to
+  enumerate values using brackets like 'gender/sex(male, female)'.
+  Here are some examples for headers:
+      1. supplier/vendor\tcolor(yellow, red, brown)\tgender/sex(male, female)\tsize(M,L,XL,XXL)
+      2. 姓名/名字\t电话/手机/微信\t最高学历（高中，职高，硕士，本科，博士，初中，中技，中专，专科，专升本，MPA，MBA，EMBA）
+  Every row in table will be treated as a chunk.
+
+visual:
+  Image files are supported. Video is comming soon.
+  If the picture has text in it, OCR is applied to extract the text as a description of it.
+  If the text extracted by OCR is not enough, visual LLM is used to get the descriptions.`,
+  },
+};