### What problem does this PR solve? Implements RAPTOR for better chunking #882 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@@ -265,6 +265,26 @@ export default {
|
||||
</p><p>
|
||||
If you want to summarize something that needs all the context of an article and the selected LLM's context length covers the document length, you can try this method.
|
||||
</p>`,
|
||||
useRaptor: 'Use RAPTOR to enhance retrieval',
|
||||
useRaptorTip:
|
||||
'Recursive Abstractive Processing for Tree-Organized Retrieval, please refer to https://huggingface.co/papers/2401.18059',
|
||||
prompt: 'Prompt',
|
||||
promptTip: 'LLM prompt used for summarization.',
|
||||
promptMessage: 'Prompt is required',
|
||||
promptText: `Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:
|
||||
{cluster_content}
|
||||
The above is the content you need to summarize.`,
|
||||
maxToken: 'Max token',
|
||||
maxTokenTip: 'Maximum token number for summarization.',
|
||||
maxTokenMessage: 'Max token is required',
|
||||
threshold: 'Threshold',
|
||||
thresholdTip: 'The bigger the threshold is the less cluster will be.',
|
||||
thresholdMessage: 'Threshold is required',
|
||||
maxCluster: 'Max cluster',
|
||||
maxClusterTip: 'Maximum cluster number.',
|
||||
maxClusterMessage: 'Max cluster is required',
|
||||
randomSeed: 'Random seed',
|
||||
randomSeedMessage: 'Random seed is required',
|
||||
},
|
||||
chunk: {
|
||||
chunk: 'Chunk',
|
||||
|
||||
@@ -238,6 +238,25 @@ export default {
|
||||
</p><p>
|
||||
如果你要總結的東西需要一篇文章的全部上下文,並且所選LLM的上下文長度覆蓋了文檔長度,你可以嘗試這種方法。
|
||||
</p>`,
|
||||
useRaptor: '使用RAPTOR文件增強策略',
|
||||
useRaptorTip: '請參考 https://huggingface.co/papers/2401.18059',
|
||||
prompt: '提示詞',
|
||||
promptMessage: '提示詞是必填項',
|
||||
promptText: `请請總結以下段落。 小心數字,不要編造。 段落如下:
|
||||
{集群內容}
|
||||
以上就是你需要總結的內容。`,
|
||||
maxToken: '最大token數',
|
||||
maxTokenMessage: '最大token數是必填項',
|
||||
threshold: '臨界點',
|
||||
thresholdMessage: '臨界點是必填項',
|
||||
maxCluster: '最大聚類數',
|
||||
maxClusterMessage: '最大聚類數是必填項',
|
||||
randomSeed: '隨機種子',
|
||||
randomSeedMessage: '隨機種子是必填項',
|
||||
promptTip: 'LLM提示用於總結。',
|
||||
maxTokenTip: '用於匯總的最大token數。',
|
||||
thresholdTip: '閾值越大,聚類越少。',
|
||||
maxClusterTip: '最大聚類數。',
|
||||
},
|
||||
chunk: {
|
||||
chunk: '解析塊',
|
||||
|
||||
@@ -255,6 +255,25 @@ export default {
|
||||
</p><p>
|
||||
如果你要总结的东西需要一篇文章的全部上下文,并且所选LLM的上下文长度覆盖了文档长度,你可以尝试这种方法。
|
||||
</p>`,
|
||||
useRaptor: '使用召回增强RAPTOR策略',
|
||||
useRaptorTip: '请参考 https://huggingface.co/papers/2401.18059',
|
||||
prompt: '提示词',
|
||||
promptMessage: '提示词是必填项',
|
||||
promptText: `请总结以下段落。 小心数字,不要编造。 段落如下:
|
||||
{集群内容}
|
||||
以上就是你需要总结的内容。`,
|
||||
maxToken: '最大token数',
|
||||
maxTokenMessage: '最大token数是必填项',
|
||||
threshold: '临界点',
|
||||
thresholdMessage: '临界点是必填项',
|
||||
maxCluster: '最大聚类数',
|
||||
maxClusterMessage: '最大聚类数是必填项',
|
||||
randomSeed: '随机种子',
|
||||
randomSeedMessage: '随机种子是必填项',
|
||||
promptTip: 'LLM提示用于总结。',
|
||||
maxTokenTip: '用于汇总的最大token数。',
|
||||
thresholdTip: '阈值越大,聚类越少。',
|
||||
maxClusterTip: '最大聚类数。',
|
||||
},
|
||||
chunk: {
|
||||
chunk: '解析块',
|
||||
|
||||
Reference in New Issue
Block a user