AI LLM與搜尋引擎的結合 - 搜尋功能實作

前一篇文章提到AI可結合搜尋引擎,進而利用LLM的特性來把問題拆解為關鍵字,本篇文章則著重在如何實作出簡單的搜尋引擎。
在這裡,會用到的套件為Lucene.Net,雖然跟Java版本差了不少,但基本功能都有,下列是各步驟的程式碼:
public class LBSearchManager : ILBSearchManager
{
private IndexWriter _writet;
private RAMDirectory _directory;
private const LuceneVersion lv = LuceneVersion.LUCENE_48;
public LBSearchManager()
{
Analyzer a = new StandardAnalyzer(lv);
_directory = new RAMDirectory();
var config = new IndexWriterConfig(lv, a);
_writet = new IndexWriter(_directory, config);
}
}
首先把必要的物件都先宣告好,再來就是替資料建索引,首先定義好資料:
public class TestData
{
public string TextField1 { get; set; } = string.Empty;
public string LongTextField { get; set; } = string.Empty;
}
然後建假資料並建立索引:
private List<TestData> CreateTestDatas()
{
return new List<TestData>
{
new TestData { TextField1 = "C#課程", NumField1 = 1, LongTextField = "C#課程" },
new TestData { TextField1 = "從零開始認識C#", NumField1 = 2, LongTextField = "從零開始認識C#" },
new TestData { TextField1 = "進階C#教材", NumField1 = 3, LongTextField = "進階C#教材" },
new TestData { TextField1 = "21天學會C#程式語言", NumField1 = 4, LongTextField = "21天學會C#程式語言" },
new TestData { TextField1 = "21天學會Java程式語言", NumField1 = 4, LongTextField = "21天學會Java程式語言" },
new TestData { TextField1 = "Python入門", NumField1 = 4, LongTextField = "Python入門" }
};
}
public void CreateIndex()
{
var list = CreateTestDatas();
_writet.AddDocuments(list.Select(testData => new Document()
{
new TextField(nameof(testData.TextField1), testData.TextField1, Field.Store.YES),
new NumericDocValuesField(nameof(testData.NumField1), testData.NumField1),
new TextField(nameof(testData.LongTextField), testData.LongTextField, Field.Store.YES),
}));
_writet.Commit();
}
接下來是搜尋的method:
public List<string> Search(string keyword)
{
Analyzer a = new StandardAnalyzer(lv);
var dirReader = DirectoryReader.Open(_directory);
var searcher = new IndexSearcher(dirReader);
string[] fnames = { "TextField1" };
var multiFieldQP = new MultiFieldQueryParser(lv, fnames, a);
Query query = multiFieldQP.Parse(keyword.Trim());
ScoreDoc[] docs = searcher.Search(query, null, 1000).ScoreDocs;
var results = new List<string>();
for (int i = 0; i < docs.Length; i++)
{
Document d = searcher.Doc(docs[i].Doc);
string text = d.Get("TextField1");
results.Add(text);
}
dirReader.Dispose();
return results;
}
再來就是直接跟AI結合:
var builder = Microsoft.Extensions.Hosting.Host.CreateApplicationBuilder();
string model = _modelName;
builder.Services.AddChatClient(new OllamaSharp.OllamaApiClient(new Uri("http://localhost:11434"), model));
var app = builder.Build();
var chatClient = app.Services.GetRequiredService<IChatClient>();
string question = "請幫我搜尋C#的入門教材";
string prompt = $"你現在是一個搜尋引擎專家,請把使用者的問題轉成搜尋引擎使用的關鍵字,並以下列JSON格式表示:\n" +
"{\"Query\": \"使用者的問題\"}\n" +
$"使用者的問題如下:{question}";
var chatCompletion = await chatClient.GetResponseAsync(prompt);
List<string> args = new List<string>();
Console.WriteLine("Arg:");
foreach (var message in chatCompletion.Messages)
{
var queryMessage = System.Text.Json.JsonSerializer.Deserialize<QueryMessage>(message.Text);
args.Add(queryMessage!.Query);
Console.Write(message.Text);
}
Console.WriteLine();
LBSearchManager searchManager = new LBSearchManager();
var ans = searchManager.Search(string.Join(',', args));
string prompt2 = $"根據以下搜尋結果,回答使用者的問題:{question}\n" +
"搜尋結果內容如下:\n" + string.Join('\n', ans);
chatCompletion = await chatClient.GetResponseAsync(prompt2);
Console.WriteLine("Final Ans:");
foreach (var message in chatCompletion.Messages)
{
Console.Write(message.Text);
}
Console.WriteLine();
上面的程式碼,就是單純把搜尋引擎的結果塞給AI,請它做整理並回答。