【百度云搜索,搜各種資料:http://bdy.lqkweb.com】
【搜網(wǎng)盤,搜各種資料:http://www.swpan.cn】
elasticsearch(搜索引擎)提供了自動補全接口
官方說明:https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html
1霞幅、創(chuàng)建搜索自動補全字段suggest
自動補全需要用到一個字段名稱為suggest類型為Completion類型的一個字段
所以我們需要用將前面的elasticsearch-dsl操作elasticsearch(搜索引擎)增加suggest類型為Completion
注意:因為elasticsearch-dsl源碼問題漠吻,設置字段為Completion類型指定分詞器時會報錯,所以我們需要重寫CustomAnalyzer類
只有Completion類型才是司恳,其他類型不用途乃,其他類型直接指定分詞器即可
#!/usr/bin/env python
from datetime import datetime
from elasticsearch_dsl import DocType, Date, Nested, Boolean, \
analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer
# 更多字段類型見第三百六十四節(jié)elasticsearch(搜索引擎)的mapping映射管理
from elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzer #導入CustomAnalyzer類
from elasticsearch_dsl.connections import connections # 導入連接elasticsearch(搜索引擎)服務器方法
connections.create_connection(hosts=['127.0.0.1'])
class CustomAnalyzer(_CustomAnalyzer): # 自定義CustomAnalyzer類,來重寫CustomAnalyzer類
def get_analysis_definition(self):
return {}
ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"]) # 實例化重寫的CustomAnalyzer類傳入分詞器和大小寫轉(zhuǎn)扔傅,將大寫轉(zhuǎn)換成小寫
class lagouType(DocType): # 自定義一個類來繼承DocType類
suggest = Completion(analyzer=ik_analyzer)
# Text類型需要分詞耍共,所以需要知道中文分詞器烫饼,ik_max_wordwei為中文分詞器
title = Text(analyzer="ik_max_word") # 設置,字段名稱=字段類型试读,Text為字符串類型并且可以分詞建立倒排索引
description = Text(analyzer="ik_max_word")
keywords = Text(analyzer="ik_max_word")
url = Keyword() # 設置杠纵,字段名稱=字段類型,Keyword為普通字符串類型钩骇,不分詞
riqi = Date() # 設置比藻,字段名稱=字段類型,Date日期類型
class Meta: # Meta是固定寫法
index = "lagou" # 設置索引名稱(相當于數(shù)據(jù)庫名稱)
doc_type = 'biao' # 設置表名稱
if __name__ == "__main__": # 判斷在本代碼文件執(zhí)行才執(zhí)行里面的方法倘屹,其他頁面調(diào)用的則不執(zhí)行里面的方法
lagouType.init() # 生成elasticsearch(搜索引擎)的索引银亲,表,字段等信息
# 使用方法說明:
# 在要要操作elasticsearch(搜索引擎)的頁面纽匙,導入此模塊
# lagou = lagouType() #實例化類
# lagou.title = '值' #要寫入字段=值
# lagou.description = '值'
# lagou.keywords = '值'
# lagou.url = '值'
# lagou.riqi = '值'
# lagou.save() #將數(shù)據(jù)寫入elasticsearch(搜索引擎)
2务蝠、搜索自動補全字段suggest寫入數(shù)據(jù)
搜索自動補全字段suggest接收的要搜索的字段分詞數(shù)據(jù),詳情見下面的自定義分詞函數(shù)
elasticsearch-dsl操作elasticsearch(搜索引擎)
#!/usr/bin/env python
# -*- coding:utf8 -*-
#!/usr/bin/env python
from datetime import datetime
from elasticsearch_dsl import DocType, Date, Nested, Boolean, \
analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer
from elasticsearch_dsl.connections import connections # 導入連接elasticsearch(搜索引擎)服務器方法
# 更多字段類型見第三百六十四節(jié)elasticsearch(搜索引擎)的mapping映射管理
from elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzer #導入CustomAnalyzer類
connections.create_connection(hosts=['127.0.0.1'])
class CustomAnalyzer(_CustomAnalyzer): # 自定義CustomAnalyzer類烛缔,來重寫CustomAnalyzer類
def get_analysis_definition(self):
return {}
ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"]) # 實例化重寫的CustomAnalyzer類傳入分詞器和大小寫轉(zhuǎn)馏段,將大寫轉(zhuǎn)換成小寫
class lagouType(DocType): # 自定義一個類來繼承DocType類
suggest = Completion(analyzer=ik_analyzer)
# Text類型需要分詞,所以需要知道中文分詞器力穗,ik_max_wordwei為中文分詞器
title = Text(analyzer="ik_max_word") # 設置毅弧,字段名稱=字段類型,Text為字符串類型并且可以分詞建立倒排索引
description = Text(analyzer="ik_max_word")
keywords = Text(analyzer="ik_max_word")
url = Keyword() # 設置当窗,字段名稱=字段類型够坐,Keyword為普通字符串類型,不分詞
riqi = Date() # 設置崖面,字段名稱=字段類型元咙,Date日期類型
class Meta: # Meta是固定寫法
index = "lagou" # 設置索引名稱(相當于數(shù)據(jù)庫名稱)
doc_type = 'biao' # 設置表名稱
def gen_suggest(index, info_tuple):
# 根據(jù)字符串生成搜索建議數(shù)組
"""
此函數(shù)主要用于,連接elasticsearch(搜索引擎),使用ik_max_word分詞器巫员,將傳入的字符串進行分詞庶香,返回分詞后的結(jié)果
此函數(shù)需要兩個參數(shù):
第一個參數(shù):要調(diào)用elasticsearch(搜索引擎)分詞的索引index,一般是(索引操作類._doc_type.index)
第二個參數(shù):是一個元組简识,元祖的元素也是元組赶掖,元素元祖里有兩個值一個是要分詞的字符串,第二個是分詞的權(quán)重七扰,多個分詞傳多個元祖如下
書寫格式:
gen_suggest(lagouType._doc_type.index, (('字符串', 10),('字符串', 8)))
"""
es = connections.create_connection(lagouType._doc_type.using) # 連接elasticsearch(搜索引擎)奢赂,使用操作搜索引擎的類下面的_doc_type.using連接
used_words = set()
suggests = []
for text, weight in info_tuple:
if text:
# 調(diào)用es的analyze接口分析字符串,
words = es.indices.analyze(index=index, analyzer="ik_max_word", params={'filter':["lowercase"]}, body=text)
anylyzed_words = set([r["token"] for r in words["tokens"] if len(r["token"])>1])
new_words = anylyzed_words - used_words
else:
new_words = set()
if new_words:
suggests.append({"input":list(new_words), "weight":weight})
# 返回分詞后的列表颈走,里面是字典膳灶,
# 如:[{'input': ['錄音', '廣告'], 'weight': 10}, {'input': ['新能源', '汽車',], 'weight': 8}]
return suggests
if __name__ == "__main__": # 判斷在本代碼文件執(zhí)行才執(zhí)行里面的方法,其他頁面調(diào)用的則不執(zhí)行里面的方法
lagouType.init() # 生成elasticsearch(搜索引擎)的索引,表轧钓,字段等信息
# 使用方法說明:
# 在要要操作elasticsearch(搜索引擎)的頁面序厉,導入此模塊
# lagou = lagouType() #實例化類
# lagou.title = '值' #要寫入字段=值
# lagou.description = '值'
# lagou.keywords = '值'
# lagou.url = '值'
# lagou.riqi = '值'
# lagou.save() #將數(shù)據(jù)寫入elasticsearch(搜索引擎)
suggest字段寫入數(shù)據(jù)
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
# items.py,文件是專門用于,接收爬蟲獲取到的數(shù)據(jù)信息的毕箍,就相當于是容器文件
import scrapy
from scrapy.loader.processors import MapCompose, TakeFirst
from scrapy.loader import ItemLoader # 導入ItemLoader類也就加載items容器類填充數(shù)據(jù)
from adc.models.elasticsearch_orm import lagouType, gen_suggest # 導入elasticsearch操作模塊
class LagouItemLoader(ItemLoader): # 自定義Loader繼承ItemLoader類弛房,在爬蟲頁面調(diào)用這個類填充數(shù)據(jù)到Item類
default_output_processor = TakeFirst() # 默認利用ItemLoader類,加載items容器類填充數(shù)據(jù)霉晕,是列表類型庭再,可以通過TakeFirst()方法,獲取到列表里的內(nèi)容
def tianjia(value): # 自定義數(shù)據(jù)預處理函數(shù)
return value # 將處理后的數(shù)據(jù)返給Item
class LagouItem(scrapy.Item): # 設置爬蟲獲取到的信息容器類
title = scrapy.Field( # 接收爬蟲獲取到的title信息
input_processor=MapCompose(tianjia), # 將數(shù)據(jù)預處理函數(shù)名稱傳入MapCompose方法里處理牺堰,數(shù)據(jù)預處理函數(shù)的形式參數(shù)value會自動接收字段title
)
description = scrapy.Field()
keywords = scrapy.Field()
url = scrapy.Field()
riqi = scrapy.Field()
def save_to_es(self):
lagou = lagouType() # 實例化elasticsearch(搜索引擎對象)
lagou.title = self['title'] # 字段名稱=值
lagou.description = self['description']
lagou.keywords = self['keywords']
lagou.url = self['url']
lagou.riqi = self['riqi']
# 將title和keywords數(shù)據(jù)傳入分詞函數(shù)拄轻,進行分詞組合后返回寫入搜索建議字段suggest
lagou.suggest = gen_suggest(lagouType._doc_type.index, ((lagou.title, 10),(lagou.keywords, 8)))
lagou.save() # 將數(shù)據(jù)寫入elasticsearch(搜索引擎對象)
return
寫入elasticsearch(搜索引擎)后的情況
{
"_index": "lagou",
"_type": "biao",
"_id": "AV5MDu0NXJs9MkF5tFxW",
"_version": 1,
"_score": 1,
"_source": {
"title": "LED光催化滅蚊燈廣告錄音_廣告錄音網(wǎng)-火紅廣告錄音_叫賣錄音下載_語音廣告制作",
"keywords": "各類小商品,廣告錄音,叫賣錄音,火紅廣告錄音",
"url": "http://www.luyin.org/post/2486.html",
"suggest": [
{
"input": [
"廣告"
,
"火紅"
,
"制作"
,
"叫賣"
,
"滅蚊燈"
,
"語音"
,
"下載"
,
"led"
,
"錄音"
,
"滅蚊"
,
"光催化"
,
"催化"
],
"weight": 10
}
,
{
"input": [
"小商品"
,
"廣告"
,
"各類"
,
"火紅"
,
"叫賣"
,
"商品"
,
"小商"
,
"錄音"
],
"weight": 8
}
],
"riqi": "2017-09-04T16:43:20",
"description": "LED光催化滅蚊燈廣告錄音 是廣告錄音網(wǎng)-火紅廣告錄音中一篇關(guān)于 各類小商品 的文章,歡迎您閱讀和評論,專業(yè)叫賣錄音-廣告錄音-語音廣告制作"
}
}
用Django實現(xiàn)搜索的自動補全功能說明
1.將搜索框綁定一個事件伟葫,每輸入一個字觸發(fā)這個事件恨搓,獲取到輸入框里的內(nèi)容,用ajax將輸入的詞請求到Django的邏輯處理函數(shù)筏养。
2.在邏輯處理函數(shù)里斧抱,將請求詞用elasticsearch(搜索引擎)的fuzzy模糊查詢,查詢suggest字段里存在請求詞的數(shù)據(jù)渐溶,將查詢到的數(shù)據(jù)添加到自動補全
html代碼:
<!DOCTYPE html >
<html xmlns="http://www.w3.org/1999/xhtml">
{#引入靜態(tài)文件路徑#}
{% load staticfiles %}
<head>
<meta http-equiv="X-UA-Compatible" content="IE=emulateIE7" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>lcv-search 搜索引擎</title>
<link href="{% static 'css/style.css'%}" rel="stylesheet" type="text/css" />
<link href="{% static 'css/index.css'%}" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="container">
<div id="bd">
<div id="main">
<h1 class="title">
<div class="logo large"></div>
</h1>
<div class="nav ue-clear">
<ul class="searchList">
<li class="searchItem current" data-type="article">文章</li>
<li class="searchItem" data-type="question">問答</li>
<li class="searchItem" data-type="job">職位</li>
</ul>
</div>
<div class="inputArea">
{% csrf_token %}
<input type="text" class="searchInput" />
<input type="button" class="searchButton" onclick="add_search()" />
<ul class="dataList">
<li>如何學好設計</li>
<li>界面設計</li>
<li>UI設計培訓要多少錢</li>
<li>設計師學習</li>
<li>哪里有好的網(wǎng)站</li>
</ul>
</div>
<div class="historyArea">
<p class="history">
<label>熱門搜索:</label>
</p>
<p class="history mysearch">
<label>我的搜索:</label>
<span class="all-search">
<a href="javascript:;">專注界面設計網(wǎng)站</a>
<a href="javascript:;">用戶體驗</a>
<a href="javascript:;">互聯(lián)網(wǎng)</a>
<a href="javascript:;">資費套餐</a>
</span>
</p>
</div>
</div><!-- End of main -->
</div><!--End of bd-->
<div class="foot">
<div class="wrap">
<div class="copyright">Copyright ©uimaker.com 版權(quán)所有 E-mail:admin@uimaker.com</div>
</div>
</div>
</div>
</body>
<script type="text/javascript" src="{% static 'js/jquery.js'%}"></script>
<script type="text/javascript" src="{% static 'js/global.js'%}"></script>
<script type="text/javascript">
var suggest_url = "/suggest/"
var search_url = "/search/"
$('.searchList').on('click', '.searchItem', function(){
$('.searchList .searchItem').removeClass('current');
$(this).addClass('current');
});
function removeByValue(arr, val) {
for(var i=0; i<arr.length; i++) {
if(arr[i] == val) {
arr.splice(i, 1);
break;
}
}
}
// 搜索建議
$(function(){
$('.searchInput').bind(' input propertychange ',function(){
var searchText = $(this).val();
var tmpHtml = ""
$.ajax({
cache: false,
type: 'get',
dataType:'json',
url:suggest_url+"?s="+searchText+"&s_type="+$(".searchItem.current").attr('data-type'),
async: true,
success: function(data) {
for (var i=0;i<data.length;i++){
tmpHtml += '<li><a href="'+search_url+'?q='+data[i]+'">'+data[i]+'</a></li>'
}
$(".dataList").html("")
$(".dataList").append(tmpHtml);
if (data.length == 0){
$('.dataList').hide()
}else {
$('.dataList').show()
}
}
});
} );
})
hideElement($('.dataList'), $('.searchInput'));
</script>
<script>
var searchArr;
//定義一個search的辉浦,判斷瀏覽器有無數(shù)據(jù)存儲(搜索歷史)
if(localStorage.search){
//如果有,轉(zhuǎn)換成 數(shù)組的形式存放到searchArr的數(shù)組里(localStorage以字符串的形式存儲茎辐,所以要把它轉(zhuǎn)換成數(shù)組的形式)
searchArr= localStorage.search.split(",")
}else{
//如果沒有宪郊,則定義searchArr為一個空的數(shù)組
searchArr = [];
}
//把存儲的數(shù)據(jù)顯示出來作為搜索歷史
MapSearchArr();
function add_search(){
var val = $(".searchInput").val();
if (val.length>=2){
//點擊搜索按鈕時,去重
KillRepeat(val);
//去重后把數(shù)組存儲到瀏覽器localStorage
localStorage.search = searchArr;
//然后再把搜索內(nèi)容顯示出來
MapSearchArr();
}
window.location.href=search_url+'?q='+val+"&s_type="+$(".searchItem.current").attr('data-type')
}
function MapSearchArr(){
var tmpHtml = "";
var arrLen = 0
if (searchArr.length >= 5){
arrLen = 5
}else {
arrLen = searchArr.length
}
for (var i=0;i<arrLen;i++){
tmpHtml += '<a href="'+search_url+'?q='+searchArr[i]+'">'+searchArr[i]+'</a>'
}
$(".mysearch .all-search").html(tmpHtml);
}
//去重
function KillRepeat(val){
var kill = 0;
for (var i=0;i<searchArr.length;i++){
if(val===searchArr[i]){
kill ++;
}
}
if(kill<1){
searchArr.unshift(val);
}else {
removeByValue(searchArr, val)
searchArr.unshift(val)
}
}
</script>
</html>
Django路由映射
"""pachong URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/1.10/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: url(r'^/pre>, views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: url(r'^/pre>, Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.conf.urls import url, include
2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls'))
"""
from django.conf.urls import url
from django.contrib import admin
from app1 import views
urlpatterns = [
url(r'^admin/', admin.site.urls),
url(r'^/pre>, views.indexluoji),
url(r'^index/', views.indexluoji),
url(r'^suggest//pre>, views.suggestluoji,name="suggest"), # 搜索字段補全請求
]
Django靜態(tài)文件配置
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.10/howto/static-files/
#配置靜態(tài)文件前綴
STATIC_URL = '/static/'
#配置靜態(tài)文件目錄
STATICFILES_DIRS = [
os.path.join(BASE_DIR, 'static')
]
備注:搜索自動補全fuzzy查詢
#搜索自動補全fuzzy查詢
POST lagou/biao/_search?pretty
{
"suggest":{ #字段名稱
"my_suggest":{ #自定義變量
"text":"廣告", #搜索詞
"completion":{
"field":"suggest", #搜索字段
"fuzzy":{
"fuzziness":1 #編輯距離
}
}
}
},
"_source":"title"
}
Django邏輯處理文件
from django.shortcuts import render
# Create your views here.
from django.shortcuts import render,HttpResponse
from django.views.generic.base import View
from app1.models import lagouType #導入操作elasticsearch(搜索引擎)類
import json
def indexluoji(request):
print(request.method) # 獲取用戶請求的路徑
return render(request, 'index.html')
def suggestluoji(request): # 搜索自動補全邏輯處理
key_words = request.GET.get('s', '') # 獲取到請求詞
re_datas = []
if key_words:
s = lagouType.search() # 實例化elasticsearch(搜索引擎)類的search查詢
s = s.suggest('my_suggest', key_words, completion={
"field": "suggest", "fuzzy": {
"fuzziness": 2
},
"size": 5
})
suggestions = s.execute_suggest()
for match in suggestions.my_suggest[0].options:
source = match._source
re_datas.append(source["title"])
return HttpResponse(json.dumps(re_datas), content_type="application/json")
最終完成