说明文档往往有搜索框,可以根据关键字检索文档内容。我有时疑惑这种检索是后端DB检索还是其它的呢?
拿bootstrap-table的说明文档看下:是由algolia提供的检索服务api
那普通的内容页面有没有可能实现前端检索呢?
查资料,找到了flexsearch这个js检索组件,试用下:
说明及常用方法
有三种类型的索引:
Index是一个扁平的高性能索引,用于存储id内容对。
Worker/WorkerIndex也是一个平面索引,它存储id内容对,但在后台作为专用工作线程运行。
Document是一个多字段索引,可以存储复杂的JSON文档(也可能存在worker索引)。
worker继承自Index类型,不继承自Document类型。因此,WorkerIndex的工作原理基本上类似于标准的FlexSearch索引。文档中的Worker支持需要在创建过程中通过传递适当的选项来启用{Worker:true}。
在Worker索引上调用的每个方法都被视为异步方法。您将返回一个Promise,或者您也可以提供一个回调函数作为最后一个参数。
查看代码
const index = new FlexSearch.Index();
const document = new FlexSearch.Document();
const worker = new FlexSearch.Worker();
index.add(id, text);
index.search(text);
index.search(text, limit);
index.search(text, options);
index.search(text, limit, options);
index.search(options);
document.add(doc);
document.add(id, doc);
document.search(text);
document.search(text, limit);
document.search(text, options);
document.search(text, limit, options);
document.search(options);
worker.add(id, text);
worker.search(text);
worker.search(text, limit);
worker.search(text, options);
worker.search(text, limit, options);
worker.search(text, limit, options, callback);
worker.search(options);
简单使用
<script src="../plugins/flexsearch.bundle.min.js"></script>
<script>
// 英文检索
const movie_en = new FlexSearch.Index();
movie_en.add(1, "明天");
movie_en.add(2, "this today");
movie_en.add(3, "tomorow");
movie_en.add(4, "hello world");
movie_en.append(2, "some appended content");
movie_en.update(3, "tomorow is anthor day");
movie_en.remove(4);
//不支持英文单词中部分字母的模糊检索和中文检索
const arr = ["today", "天", "row", "content", "world"];
for (let str of arr) {
console.log(str, movie_en.search(str));
}
console.log("-----------------");
</script>
中文检索
查看代码
// 中文检索,指定分词器,不支持英文检索
const movie_cn = FlexSearch.Index({
encode: (str) => str.replace(/[\x00-\x7F]/g, "").split(""),
});
movie_cn.add(1, "明天是新的一天,出去游玩");
movie_cn.add(2, "西游记");
movie_cn.add(3, "红楼梦");
movie_cn.add(4, "hello world");
const arr1 = ["天", "游记", "world"];
for (let str of arr1) {
console.log(str, movie_cn.search(str));
}
console.log("-----------------");
中英文混合检索
分词算法来自于 https://liaoxuefeng.com/blogs/all/2024-01-05-js-full-text-search/index.html
查看代码
const ALPHABETS = [
[0x30, 0x39], // 0-9
[0x41, 0x5a], // A-Z
[0x61, 0x7a], // a-z
[0xc0, 0x2af], // part of Latin-1 supplement / Latin extended A/B / IPA
[0x370, 0x52f], // Greek / Cyrillic / Cyrillic supplement
];
const SINGLE_CHARS = [
[0xe00, 0x0e5b], // Thai
[0x3040, 0x309f], // Hiragana
[0x4e00, 0x9fff], // CJK
[0xac00, 0xd7af], // Hangul syllables
];
function isAlphabet(n) {
for (let range of ALPHABETS) {
if (n >= range[0] && n <= range[1]) {
return true;
}
}
return false;
}
function isSingleChar(n) {
for (let range of SINGLE_CHARS) {
if (n >= range[0] && n <= range[1]) {
return true;
}
}
return false;
}
function tokenizer(str) {
const length = str.length;
const tokens = [];
let last = "";
for (let i = 0; i < length; i++) {
let code = str.charCodeAt(i);
if (isSingleChar(code)) {
if (last) {
if (last.length > 1) {
tokens.push(last.toLowerCase());
}
last = "";
}
tokens.push(str[i]);
} else if (isAlphabet(code)) {
last = last + str[i];
} else {
if (last) {
if (last.length > 1) {
tokens.push(last.toLowerCase());
}
last = "";
}
}
}
if (last) {
if (last.length > 1) {
tokens.push(last.toLowerCase());
}
last = "";
}
//console.log(str, tokens);
return tokens;
}
const movie = new FlexSearch.Index({
encode: tokenizer,
});
movie.add(1, "明天,又是新的一天");
movie.add(2, "The Lock Artist");
movie.add(3, "明天,The Lock Artist");
movie.add(4, "天空很蓝");
const arr2 = ["天", "明天", "artist"];
for (let str of arr2) {
console.log(str, movie.search(str));
}
使用全唐诗测试
查看代码
<?php
try {
$dsn = "mysql:host=127.0.0.1;port=3306;dbname=test;charset=utf8";
$user = "root";
$password = "";
$pdo = new PDO($dsn, $user, $password, [PDO::ATTR_PERSISTENT => true]);
$pdo->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC);
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
} catch (Exception $error) {
die('connect error');
}
$sql = "select title,author,content from tb_chinese_poems";
$stmt = $pdo->prepare($sql);
$stmt->execute();
$data = [];
while ($row = $stmt->fetch()) {
$data[] = str_replace(["\n", "\r"], '', $row['title'] . ' - ' . $row['author'] . ' - ' . $row['content']);
}
?>
<script type="text/javascript" src="../plugins/alpine.min.js"></script>
<script src="../plugins/flexsearch.bundle.min.js"></script>
<script>
const word = {
ALPHABETS: [
[0x30, 0x39], // 0-9
[0x41, 0x5a], // A-Z
[0x61, 0x7a], // a-z
[0xc0, 0x2af], // part of Latin-1 supplement / Latin extended A/B / IPA
[0x370, 0x52f], // Greek / Cyrillic / Cyrillic supplement
],
SINGLE_CHARS: [
[0xe00, 0x0e5b], // Thai
[0x3040, 0x309f], // Hiragana
[0x4e00, 0x9fff], // CJK
[0xac00, 0xd7af], // Hangul syllables
],
isAlphabet: function(n) {
for (let range of this.ALPHABETS) {
if (n >= range[0] && n <= range[1]) {
return true;
}
}
return false;
},
isSingleChar: function(n) {
for (let range of this.SINGLE_CHARS) {
if (n >= range[0] && n <= range[1]) {
return true;
}
}
return false;
}
}
function tokenizer(str) {
const length = str.length;
const tokens = [];
let last = "";
for (let i = 0; i < length; i++) {
let code = str.charCodeAt(i);
if (word.isSingleChar(code)) {
if (last) {
if (last.length > 1) {
tokens.push(last.toLowerCase());
}
last = "";
}
tokens.push(str[i]);
} else if (word.isAlphabet(code)) {
last = last + str[i];
} else {
if (last) {
if (last.length > 1) {
tokens.push(last.toLowerCase());
}
last = "";
}
}
}
if (last) {
if (last.length > 1) {
tokens.push(last.toLowerCase());
}
last = "";
}
return tokens;
}
let documents = JSON.parse('<?= json_encode($data) ?>');
const doc_index = FlexSearch.Index({
encode: tokenizer,
});
for (let i in documents) {
doc_index.add(parseInt(i), documents[i])
}
</script>
<script>
function search() {
return {
keyword: "",
items: documents,
num: 0,
get filteredItems() {
res = doc_index.search(this.keyword);
this.num = res.length;
return this.items.filter((item, i) => {
return res.indexOf(i) >= 0
});
},
};
}
</script>
<div x-data="search()">
<input x-model="keyword" placeholder="Search..." />
<p>共检索到<span x-text="num"></span>条</p>
<ul>
<template x-for="(item,index) in filteredItems" :key="index">
<li x-text="item"></li>
</template>
</ul>
</div>
前端检索的速度很快。
标签:search,last,movie,js,add,str,text,组件,flexsearch From: https://www.cnblogs.com/caroline2016/p/18588999