feat: add pages to ChunkMethodModal (#143)

This commit is contained in:
balibabu
2024-03-22 16:57:09 +08:00
committed by GitHub
parent 1edbd36baf
commit 2f4c71b4b4
36 changed files with 1036 additions and 1322 deletions

View File

@@ -1,10 +1,23 @@
import { IModalManagerChildrenProps } from '@/components/modal-manager';
import {
useFetchTenantInfo,
useSelectParserList,
} from '@/hooks/userSettingHook';
import { Modal, Space, Tag } from 'antd';
import React, { useEffect, useState } from 'react';
Button,
Divider,
Form,
InputNumber,
Modal,
Space,
Switch,
Tag,
} from 'antd';
import React, { useEffect, useMemo } from 'react';
import MaxTokenNumber from '@/components/max-token-number';
import { IKnowledgeFileParserConfig } from '@/interfaces/database/knowledge';
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
import { MinusCircleOutlined, PlusOutlined } from '@ant-design/icons';
import omit from 'lodash/omit';
import {} from 'module';
import { useFetchParserListOnMount } from './hooks';
import styles from './index.less';
@@ -12,41 +25,74 @@ const { CheckableTag } = Tag;
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
loading: boolean;
onOk: (parserId: string) => void;
onOk: (
parserId: string,
parserConfig: IChangeParserConfigRequestBody,
) => void;
showModal?(): void;
parser_id: string;
parserId: string;
parserConfig: IKnowledgeFileParserConfig;
documentType: string;
}
const hidePagesChunkMethods = ['qa', 'table', 'picture', 'resume', 'one'];
const ChunkMethodModal: React.FC<IProps> = ({
parser_id,
parserId,
onOk,
hideModal,
visible,
documentType,
parserConfig,
}) => {
const [selectedTag, setSelectedTag] = useState('');
const parserList = useSelectParserList();
useFetchTenantInfo();
useEffect(() => {
setSelectedTag(parser_id);
}, [parser_id]);
const { parserList, handleChange, selectedTag } =
useFetchParserListOnMount(parserId);
const [form] = Form.useForm();
const handleOk = async () => {
onOk(selectedTag);
const values = await form.validateFields();
console.info(values);
const parser_config = {
...values.parser_config,
pages: values.pages?.map((x: any) => [x.from, x.to]) ?? [],
};
console.info(parser_config);
onOk(selectedTag, parser_config);
};
const handleChange = (tag: string, checked: boolean) => {
const nextSelectedTag = checked ? tag : selectedTag;
setSelectedTag(nextSelectedTag);
const showPages = useMemo(() => {
return (
documentType === 'pdf' &&
hidePagesChunkMethods.every((x) => x !== selectedTag)
);
}, [documentType, selectedTag]);
const showOne = useMemo(() => {
return showPages || selectedTag === 'one';
}, [showPages, selectedTag]);
const afterClose = () => {
form.resetFields();
};
useEffect(() => {
if (visible) {
const pages =
parserConfig.pages?.map((x) => ({ from: x[0], to: x[1] })) ?? [];
form.setFieldsValue({
pages: pages.length > 0 ? pages : [{ from: 1, to: 1024 }],
parser_config: omit(parserConfig, 'pages'),
});
}
}, [form, parserConfig, visible]);
return (
<Modal
title="Chunk Method"
open={visible}
onOk={handleOk}
onCancel={hideModal}
afterClose={afterClose}
>
<Space size={[0, 8]} wrap>
<div className={styles.tags}>
@@ -63,6 +109,138 @@ const ChunkMethodModal: React.FC<IProps> = ({
})}
</div>
</Space>
<Divider></Divider>
{
<Form name="dynamic_form_nest_item" autoComplete="off" form={form}>
{showPages && (
<>
<Form.List name="pages">
{(fields, { add, remove }) => (
<>
{fields.map(({ key, name, ...restField }) => (
<Space
key={key}
style={{
display: 'flex',
}}
align="baseline"
>
<Form.Item
{...restField}
name={[name, 'from']}
dependencies={name > 0 ? [name - 1, 'to'] : []}
rules={[
{
required: true,
message: 'Missing start page number',
},
({ getFieldValue }) => ({
validator(_, value) {
if (
name === 0 ||
!value ||
getFieldValue(['pages', name - 1, 'to']) <
value
) {
return Promise.resolve();
}
return Promise.reject(
new Error(
'The current value must be greater than the previous to!',
),
);
},
}),
]}
>
<InputNumber
placeholder="from"
min={0}
precision={0}
className={styles.pageInputNumber}
/>
</Form.Item>
<Form.Item
{...restField}
name={[name, 'to']}
dependencies={[name, 'from']}
rules={[
{
required: true,
message: 'Missing end page number(excluding)',
},
({ getFieldValue }) => ({
validator(_, value) {
if (
!value ||
getFieldValue(['pages', name, 'from']) < value
) {
return Promise.resolve();
}
return Promise.reject(
new Error(
'The current value must be greater than to!',
),
);
},
}),
]}
>
<InputNumber
placeholder="to"
min={0}
precision={0}
className={styles.pageInputNumber}
/>
</Form.Item>
{name > 0 && (
<MinusCircleOutlined onClick={() => remove(name)} />
)}
</Space>
))}
<Form.Item>
<Button
type="dashed"
onClick={() => add()}
block
icon={<PlusOutlined />}
>
Add page
</Button>
</Form.Item>
</>
)}
</Form.List>
<Form.Item
name={['parser_config', 'task_page_size']}
label="Task page size"
tooltip={'coming soon'}
initialValue={2}
rules={[
{
required: true,
message: 'Please input your task page size!',
},
]}
>
<InputNumber min={1} max={128} />
</Form.Item>
</>
)}
{showOne && (
<Form.Item
name={['parser_config', 'layout_recognize']}
label="Layout recognize"
initialValue={true}
valuePropName="checked"
tooltip={'coming soon'}
>
<Switch />
</Form.Item>
)}
{selectedTag === 'naive' && <MaxTokenNumber></MaxTokenNumber>}
</Form>
}
</Modal>
);
};

View File

@@ -7,9 +7,13 @@ import {
} from '@/hooks/documentHooks';
import { useGetKnowledgeSearchParams } from '@/hooks/routeHook';
import { useOneNamespaceEffectsLoading } from '@/hooks/storeHooks';
import { useFetchTenantInfo } from '@/hooks/userSettingHook';
import {
useFetchTenantInfo,
useSelectParserList,
} from '@/hooks/userSettingHook';
import { Pagination } from '@/interfaces/common';
import { IKnowledgeFile } from '@/interfaces/database/knowledge';
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
import { PaginationProps } from 'antd';
import { useCallback, useEffect, useMemo, useState } from 'react';
import { useDispatch, useNavigate, useSelector } from 'umi';
@@ -222,8 +226,8 @@ export const useChangeDocumentParser = (documentId: string) => {
]);
const onChangeParserOk = useCallback(
async (parserId: string) => {
const ret = await setDocumentParser(parserId, documentId);
async (parserId: string, parserConfig: IChangeParserConfigRequestBody) => {
const ret = await setDocumentParser(parserId, documentId, parserConfig);
if (ret === 0) {
hideChangeParserModal();
}
@@ -239,3 +243,21 @@ export const useChangeDocumentParser = (documentId: string) => {
showChangeParserModal,
};
};
export const useFetchParserListOnMount = (parserId: string) => {
const [selectedTag, setSelectedTag] = useState('');
const parserList = useSelectParserList();
useFetchTenantInfo();
useEffect(() => {
setSelectedTag(parserId);
}, [parserId]);
const handleChange = (tag: string, checked: boolean) => {
const nextSelectedTag = checked ? tag : selectedTag;
setSelectedTag(nextSelectedTag);
};
return { parserList, handleChange, selectedTag };
};

View File

@@ -34,3 +34,7 @@
.tochunks {
cursor: pointer;
}
.pageInputNumber {
width: 220px;
}

View File

@@ -224,7 +224,9 @@ const KnowledgeFile = () => {
onOk={onCreateOk}
/>
<ChunkMethodModal
parser_id={currentRecord.parser_id}
parserId={currentRecord.parser_id}
parserConfig={currentRecord.parser_config}
documentType={currentRecord.type}
onOk={onChangeParserOk}
visible={changeParserVisible}
hideModal={hideChangeParserModal}

View File

@@ -7,10 +7,6 @@ import pick from 'lodash/pick';
import { DvaModel } from 'umi';
export interface KFModelState extends BaseState {
isShowCEFwModal: boolean;
isShowTntModal: boolean;
isShowSegmentSetModal: boolean;
isShowRenameModal: boolean;
tenantIfo: any;
data: IKnowledgeFile[];
total: number;
@@ -21,10 +17,6 @@ export interface KFModelState extends BaseState {
const model: DvaModel<KFModelState> = {
namespace: 'kFModel',
state: {
isShowCEFwModal: false,
isShowTntModal: false,
isShowSegmentSetModal: false,
isShowRenameModal: false,
tenantIfo: {},
data: [],
total: 0,
@@ -43,9 +35,7 @@ const model: DvaModel<KFModelState> = {
...payload,
};
},
setIsShowRenameModal(state, { payload }) {
return { ...state, isShowRenameModal: payload };
},
setCurrentRecord(state, { payload }) {
return { ...state, currentRecord: payload };
},
@@ -120,7 +110,7 @@ const model: DvaModel<KFModelState> = {
const { retcode } = data;
if (retcode === 0) {
message.success('Modified!');
put({
yield put({
type: 'getKfList',
payload: { kb_id: payload.kb_id },
});
@@ -148,10 +138,7 @@ const model: DvaModel<KFModelState> = {
const { retcode } = data;
if (retcode === 0) {
message.success('rename success');
yield put({
type: 'setIsShowRenameModal',
payload: false,
});
yield put({
type: 'getKfList',
payload: { kb_id: payload.kb_id },
@@ -164,16 +151,11 @@ const model: DvaModel<KFModelState> = {
const { data } = yield call(kbService.document_create, payload);
const { retcode } = data;
if (retcode === 0) {
put({
yield put({
type: 'getKfList',
payload: { kb_id: payload.kb_id },
});
put({
type: 'kFModel/updateState',
payload: {
isShowCEFwModal: false,
},
});
message.success('Created!');
}
return retcode;
@@ -202,16 +184,11 @@ const model: DvaModel<KFModelState> = {
);
const { retcode } = data;
if (retcode === 0) {
put({
yield put({
type: 'getKfList',
payload: { kb_id: payload.kb_id },
});
put({
type: 'updateState',
payload: {
isShowSegmentSetModal: false,
},
});
message.success('Modified!');
}
return retcode;

View File

@@ -1,22 +1,12 @@
import { normFile } from '@/utils/fileUtil';
import { PlusOutlined } from '@ant-design/icons';
import {
Button,
Flex,
Form,
Input,
InputNumber,
Radio,
Select,
Slider,
Space,
Upload,
} from 'antd';
import { Button, Form, Input, Radio, Select, Space, Upload } from 'antd';
import {
useFetchKnowledgeConfigurationOnMount,
useSubmitKnowledgeConfiguration,
} from './hooks';
import MaxTokenNumber from '@/components/max-token-number';
import { FormInstance } from 'antd/lib';
import styles from './index.less';
@@ -121,35 +111,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
const parserId = getFieldValue('parser_id');
if (parserId === 'naive') {
return (
<Form.Item label="Token number" tooltip="It determine the token number of a chunk approximately.">
<Flex gap={20} align="center">
<Flex flex={1}>
<Form.Item
name={['parser_config', 'chunk_token_num']}
noStyle
initialValue={128}
rules={[
{ required: true, message: 'Province is required' },
]}
>
<Slider className={styles.variableSlider} max={2048} />
</Form.Item>
</Flex>
<Form.Item
name={['parser_config', 'chunk_token_num']}
noStyle
rules={[{ required: true, message: 'Street is required' }]}
>
<InputNumber
className={styles.sliderInputNumber}
max={2048}
min={0}
/>
</Form.Item>
</Flex>
</Form.Item>
);
return <MaxTokenNumber></MaxTokenNumber>;
}
return null;
}}

View File

@@ -5,7 +5,6 @@ import { DvaModel } from 'umi';
export interface KSModelState {
isShowPSwModal: boolean;
isShowTntModal: boolean;
tenantIfo: any;
knowledgeDetails: IKnowledge;
}
@@ -14,7 +13,6 @@ const model: DvaModel<KSModelState> = {
namespace: 'kSModel',
state: {
isShowPSwModal: false,
isShowTntModal: false,
tenantIfo: {},
knowledgeDetails: {} as any,
},

View File

@@ -5,9 +5,9 @@ const getImageName = (prefix: string, length: number) =>
export const ImageMap = {
book: getImageName('book', 4),
laws: getImageName('law', 4),
laws: getImageName('law', 2),
manual: getImageName('manual', 4),
picture: getImageName('picture', 2),
picture: getImageName('media', 2),
naive: getImageName('naive', 2),
paper: getImageName('paper', 2),
presentation: getImageName('presentation', 2),
@@ -32,10 +32,13 @@ export const TextMap = {
The chunk granularity is consistent with 'ARTICLE', and all the upper level text will be included in the chunk.
</p>`,
},
manual: { title: '', description: `<p>Only <b>PDF</b> is supported.</p><p>
manual: {
title: '',
description: `<p>Only <b>PDF</b> is supported.</p><p>
We assume manual has hierarchical section structure. We use the lowest section titles as pivots to slice documents.
So, the figures and tables in the same section will not be sliced apart, and chunk size might be large.
</p>` },
</p>`,
},
naive: {
title: '',
description: `<p>Supported file formats are <b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT</b>.</p>
@@ -100,19 +103,19 @@ export const TextMap = {
</li>
<li>Every row in table will be treated as a chunk.</li>
</ul>`,
},
picture: {
title: '',
description: `
},
picture: {
title: '',
description: `
<p>Image files are supported. Video is coming soon.</p><p>
If the picture has text in it, OCR is applied to extract the text as its text description.
</p><p>
If the text extracted by OCR is not enough, visual LLM is used to get the descriptions.
</p>`,
},
one: {
title: '',
description: `
one: {
title: '',
description: `
<p>Supported file formats are <b>DOCX, EXCEL, PDF, TXT</b>.
</p><p>
For a document, it will be treated as an entire chunk, no split at all.