feat: add image for chunk method (#139)

* feat: add image of chunk method

* feat: add image for chunk method
This commit is contained in:
balibabu
2024-03-21 16:45:03 +08:00
committed by GitHub
parent 5875c8ba08
commit f4ec7cfa76
39 changed files with 2823 additions and 196 deletions

View File

@@ -220,7 +220,7 @@ const KnowledgeFile = () => {
key: 'create_date',
},
{
title: 'Category',
title: 'Chunk Method',
dataIndex: 'parser_id',
key: 'parser_id',
render: (text) => {

View File

@@ -0,0 +1,71 @@
import SvgIcon from '@/components/svg-icon';
import { useSelectParserList } from '@/hooks/userSettingHook';
import { Col, Divider, Empty, Row, Typography } from 'antd';
import { useMemo } from 'react';
import styles from './index.less';
import { ImageMap, TextMap } from './utils';
const { Title, Text } = Typography;
const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
const parserList = useSelectParserList();
const item = useMemo(() => {
const item = parserList.find((x) => x.value === chunkMethod);
if (item) {
return {
title: item.label,
description: TextMap[item.value as keyof typeof TextMap]?.description,
};
}
return { title: '', description: '' };
}, [parserList, chunkMethod]);
const imageList = useMemo(() => {
if (chunkMethod in ImageMap) {
return ImageMap[chunkMethod as keyof typeof ImageMap];
}
return [];
}, [chunkMethod]);
return (
<section className={styles.categoryPanelWrapper}>
{imageList.length > 0 ? (
<>
<Title level={5} className={styles.topTitle}>
{item.title} Category
</Title>
<Text>{item.description}</Text>
<Title level={5}>{item.title} Image Examples</Title>
<Text>
We've prepared detailed visual guides to make understanding easier
for you.
</Text>
<Row gutter={[10, 10]} className={styles.imageRow}>
{imageList.map((x) => (
<Col span={12} key={x}>
<SvgIcon
name={x}
width={'100%'}
className={styles.image}
></SvgIcon>
</Col>
))}
</Row>
<Title level={5}>{item.title} Dialogue Examples</Title>
<Divider></Divider>
</>
) : (
<Empty description={''} image={null}>
<p>
This will display a visual explanation of the knowledge base
categories
</p>
<SvgIcon name={'chunk-method/chunk-empty'} width={'100%'}></SvgIcon>
</Empty>
)}
</section>
);
};
export default CategoryPanel;

View File

@@ -2,7 +2,6 @@ import { normFile } from '@/utils/fileUtil';
import { PlusOutlined } from '@ant-design/icons';
import {
Button,
Divider,
Flex,
Form,
Input,
@@ -11,8 +10,6 @@ import {
Select,
Slider,
Space,
Spin,
Typography,
Upload,
} from 'antd';
import {
@@ -20,174 +17,162 @@ import {
useSubmitKnowledgeConfiguration,
} from './hooks';
import { FormInstance } from 'antd/lib';
import styles from './index.less';
const { Title } = Typography;
const { Option } = Select;
const Configuration = () => {
const ConfigurationForm = ({ form }: { form: FormInstance }) => {
const { submitKnowledgeConfiguration, submitLoading } =
useSubmitKnowledgeConfiguration();
const { form, parserList, embeddingModelOptions, loading } =
useFetchKnowledgeConfigurationOnMount();
const { parserList, embeddingModelOptions } =
useFetchKnowledgeConfigurationOnMount(form);
const onFinishFailed = (errorInfo: any) => {
console.log('Failed:', errorInfo);
};
return (
<div className={styles.configurationWrapper}>
<Title level={5}>Configuration</Title>
<p>Update your knowledge base details especially parsing method here.</p>
<Divider></Divider>
<Spin spinning={loading}>
<Form
form={form}
name="validateOnly"
layout="vertical"
autoComplete="off"
onFinish={submitKnowledgeConfiguration}
onFinishFailed={onFinishFailed}
<Form
form={form}
name="validateOnly"
layout="vertical"
autoComplete="off"
onFinish={submitKnowledgeConfiguration}
onFinishFailed={onFinishFailed}
>
<Form.Item
name="name"
label="Knowledge base name"
rules={[{ required: true }]}
>
<Input />
</Form.Item>
<Form.Item
name="avatar"
label="Knowledge base photo"
valuePropName="fileList"
getValueFromEvent={normFile}
>
<Upload
listType="picture-card"
maxCount={1}
beforeUpload={() => false}
showUploadList={{ showPreviewIcon: false, showRemoveIcon: false }}
>
<Form.Item
name="name"
label="Knowledge base name"
rules={[{ required: true }]}
>
<Input />
</Form.Item>
<Form.Item
name="avatar"
label="Knowledge base photo"
valuePropName="fileList"
getValueFromEvent={normFile}
>
<Upload
listType="picture-card"
maxCount={1}
beforeUpload={() => false}
showUploadList={{ showPreviewIcon: false, showRemoveIcon: false }}
>
<button style={{ border: 0, background: 'none' }} type="button">
<PlusOutlined />
<div style={{ marginTop: 8 }}>Upload</div>
</button>
</Upload>
</Form.Item>
<Form.Item name="description" label="Description">
<Input />
</Form.Item>
<Form.Item
label="Language"
name="language"
initialValue={'Chinese'}
rules={[{ required: true, message: 'Please input your language!' }]}
>
<Select placeholder="select your language">
<Option value="English">English</Option>
<Option value="Chinese">Chinese</Option>
</Select>
</Form.Item>
<Form.Item
name="permission"
label="Permissions"
tooltip="coming soon"
rules={[{ required: true }]}
>
<Radio.Group>
<Radio value="me">Only me</Radio>
<Radio value="team">Team</Radio>
</Radio.Group>
</Form.Item>
<Form.Item
name="embd_id"
label="Embedding Model"
rules={[{ required: true }]}
tooltip="xx"
>
<Select
placeholder="Please select a country"
options={embeddingModelOptions}
></Select>
</Form.Item>
<Form.Item
name="parser_id"
label="Knowledge base category"
tooltip="xx"
rules={[{ required: true }]}
>
<Select placeholder="Please select a country">
{parserList.map((x) => (
<Option value={x.value} key={x.value}>
{x.label}
</Option>
))}
</Select>
</Form.Item>
<Form.Item noStyle dependencies={['parser_id']}>
{({ getFieldValue }) => {
const parserId = getFieldValue('parser_id');
<button style={{ border: 0, background: 'none' }} type="button">
<PlusOutlined />
<div style={{ marginTop: 8 }}>Upload</div>
</button>
</Upload>
</Form.Item>
<Form.Item name="description" label="Description">
<Input />
</Form.Item>
<Form.Item
label="Language"
name="language"
initialValue={'Chinese'}
rules={[{ required: true, message: 'Please input your language!' }]}
>
<Select placeholder="select your language">
<Option value="English">English</Option>
<Option value="Chinese">Chinese</Option>
</Select>
</Form.Item>
<Form.Item
name="permission"
label="Permissions"
tooltip="coming soon"
rules={[{ required: true }]}
>
<Radio.Group>
<Radio value="me">Only me</Radio>
<Radio value="team">Team</Radio>
</Radio.Group>
</Form.Item>
<Form.Item
name="embd_id"
label="Embedding Model"
rules={[{ required: true }]}
tooltip="xx"
>
<Select
placeholder="Please select a country"
options={embeddingModelOptions}
></Select>
</Form.Item>
<Form.Item
name="parser_id"
label="Chunk method"
tooltip="xx"
rules={[{ required: true }]}
>
<Select placeholder="Please select a country">
{parserList.map((x) => (
<Option value={x.value} key={x.value}>
{x.label}
</Option>
))}
</Select>
</Form.Item>
<Form.Item noStyle dependencies={['parser_id']}>
{({ getFieldValue }) => {
const parserId = getFieldValue('parser_id');
if (parserId === 'naive') {
return (
<Form.Item label="Chunk token number" tooltip="xxx">
<Flex gap={20} align="center">
<Flex flex={1}>
<Form.Item
name={['parser_config', 'chunk_token_num']}
noStyle
initialValue={128}
rules={[
{ required: true, message: 'Province is required' },
]}
>
<Slider
className={styles.variableSlider}
max={2048}
/>
</Form.Item>
</Flex>
<Form.Item
name={['parser_config', 'chunk_token_num']}
noStyle
rules={[
{ required: true, message: 'Street is required' },
]}
>
<InputNumber
className={styles.sliderInputNumber}
max={2048}
min={0}
/>
</Form.Item>
</Flex>
if (parserId === 'naive') {
return (
<Form.Item label="Max token number" tooltip="xxx">
<Flex gap={20} align="center">
<Flex flex={1}>
<Form.Item
name={['parser_config', 'chunk_token_num']}
noStyle
initialValue={128}
rules={[
{ required: true, message: 'Province is required' },
]}
>
<Slider className={styles.variableSlider} max={2048} />
</Form.Item>
</Flex>
<Form.Item
name={['parser_config', 'chunk_token_num']}
noStyle
rules={[{ required: true, message: 'Street is required' }]}
>
<InputNumber
className={styles.sliderInputNumber}
max={2048}
min={0}
/>
</Form.Item>
);
}
return null;
}}
</Form.Item>
<Form.Item>
<div className={styles.buttonWrapper}>
<Space>
<Button htmlType="reset" size={'middle'}>
Cancel
</Button>
<Button
htmlType="submit"
type="primary"
size={'middle'}
loading={submitLoading}
>
Save
</Button>
</Space>
</div>
</Form.Item>
</Form>
</Spin>
</div>
</Flex>
</Form.Item>
);
}
return null;
}}
</Form.Item>
<Form.Item>
<div className={styles.buttonWrapper}>
<Space>
<Button htmlType="reset" size={'middle'}>
Cancel
</Button>
<Button
htmlType="submit"
type="primary"
size={'middle'}
loading={submitLoading}
>
Save
</Button>
</Space>
</div>
</Form.Item>
</Form>
);
};
export default Configuration;
export default ConfigurationForm;

View File

@@ -15,6 +15,7 @@ import {
getUploadFileListFromBase64,
} from '@/utils/fileUtil';
import { Form, UploadFile } from 'antd';
import { FormInstance } from 'antd/lib';
import pick from 'lodash/pick';
import { useCallback, useEffect } from 'react';
import { LlmModelType } from '../../constant';
@@ -39,9 +40,8 @@ export const useSubmitKnowledgeConfiguration = () => {
return { submitKnowledgeConfiguration, submitLoading };
};
export const useFetchKnowledgeConfigurationOnMount = () => {
const [form] = Form.useForm();
const loading = useOneNamespaceEffectsLoading('kSModel', ['getKbDetail']);
export const useFetchKnowledgeConfigurationOnMount = (form: FormInstance) => {
// const [form] = Form.useForm();
const knowledgeDetails = useSelectKnowledgeDetails();
const parserList = useSelectParserList();
@@ -69,5 +69,15 @@ export const useFetchKnowledgeConfigurationOnMount = () => {
});
}, [form, knowledgeDetails]);
return { form, parserList, embeddingModelOptions, loading };
return { parserList, embeddingModelOptions };
};
export const useSelectKnowledgeDetailsLoading = () =>
useOneNamespaceEffectsLoading('kSModel', ['getKbDetail']);
export const useHandleChunkMethodChange = () => {
const [form] = Form.useForm();
const chunkMethod = Form.useWatch('parser_id', form);
return { form, chunkMethod };
};

View File

@@ -31,3 +31,15 @@
width: 100%;
}
}
.categoryPanelWrapper {
.topTitle {
margin-top: 0;
}
.imageRow {
margin-top: 16px;
}
.image {
width: 100%;
}
}

View File

@@ -1,3 +1,36 @@
import Configuration from './configuration';
import { Col, Divider, Row, Spin, Typography } from 'antd';
import CategoryPanel from './category-panel';
import ConfigurationForm from './configuration';
import {
useHandleChunkMethodChange,
useSelectKnowledgeDetailsLoading,
} from './hooks';
import styles from './index.less';
const { Title } = Typography;
const Configuration = () => {
const loading = useSelectKnowledgeDetailsLoading();
const { form, chunkMethod } = useHandleChunkMethodChange();
return (
<div className={styles.configurationWrapper}>
<Title level={5}>Configuration</Title>
<p>Update your knowledge base details especially parsing method here.</p>
<Divider></Divider>
<Spin spinning={loading}>
<Row gutter={32}>
<Col span={12}>
<ConfigurationForm form={form}></ConfigurationForm>
</Col>
<Col span={12}>
<CategoryPanel chunkMethod={chunkMethod}></CategoryPanel>
</Col>
</Row>
</Spin>
</div>
);
};
export default Configuration;

View File

@@ -0,0 +1,84 @@
const getImageName = (prefix: string, length: number) =>
new Array(length)
.fill(0)
.map((x, idx) => `chunk-method/${prefix}-0${idx + 1}`);
export const ImageMap = {
book: getImageName('book', 4),
laws: getImageName('law', 4),
manual: getImageName('manual', 4),
media: getImageName('media', 2),
naive: getImageName('naive', 2),
paper: getImageName('paper', 2),
presentation: getImageName('presentation', 2),
qa: getImageName('qa', 2),
resume: getImageName('resume', 2),
table: getImageName('table', 2),
};
export const TextMap = {
book: {
title: '',
description: `Supported file formats are docx, excel, pdf, txt.
Since a book is long and not all the parts are useful, if it's a PDF,
please setup the page ranges for every book in order eliminate negative effects and save computing time for analyzing.`,
},
laws: {
title: '',
description: `Supported file formats are docx, pdf, txt.`,
},
manual: { title: '', description: `Only pdf is supported.` },
media: { title: '', description: '' },
naive: {
title: '',
description: `Supported file formats are docx, pdf, txt.
This method apply the naive ways to chunk files.
Successive text will be sliced into pieces using 'delimiter'.
Next, these successive pieces are merge into chunks whose token number is no more than 'Max token number'.`,
},
paper: {
title: '',
description: `Only pdf is supported.
The special part is that, the abstract of the paper will be sliced as an entire chunk, and will not be sliced partly.`,
},
presentation: {
title: '',
description: `The supported file formats are pdf, pptx.
Every page will be treated as a chunk. And the thumbnail of every page will be stored.
PPT file will be parsed by using this method automatically, setting-up for every PPT file is not necessary.`,
},
qa: {
title: '',
description: `Excel and csv(txt) format files are supported.
If the file is in excel format, there should be 2 column question and answer without header.
And question column is ahead of answer column.
And it's O.K if it has multiple sheets as long as the columns are rightly composed.
If it's in csv format, it should be UTF-8 encoded. Use TAB as delimiter to separate question and answer.
All the deformed lines will be ignored.
Every pair of Q&A will be treated as a chunk.`,
},
resume: {
title: '',
description: `The supported file formats are pdf, docx and txt.`,
},
table: {
title: '',
description: `Excel and csv(txt) format files are supported.
For csv or txt file, the delimiter between columns is TAB.
The first line must be column headers.
Column headers must be meaningful terms inorder to make our NLP model understanding.
It's good to enumerate some synonyms using slash '/' to separate, and even better to
enumerate values using brackets like 'gender/sex(male, female)'.
Here are some examples for headers:
1. supplier/vendor\tcolor(yellow, red, brown)\tgender/sex(male, female)\tsize(M,L,XL,XXL)
2. 姓名/名字\t电话/手机/微信\t最高学历高中职高硕士本科博士初中中技中专专科专升本MPAMBAEMBA
Every row in table will be treated as a chunk.
visual:
Image files are supported. Video is comming soon.
If the picture has text in it, OCR is applied to extract the text as a description of it.
If the text extracted by OCR is not enough, visual LLM is used to get the descriptions.`,
},
};