前一篇文章提到AI可結合搜尋引擎,進而利用LLM的特性來把問題拆解為關鍵字,本篇文章則著重在如何實作出簡單的搜尋引擎。

在這裡,會用到的套件為Lucene.Net,雖然跟Java版本差了不少,但基本功能都有,下列是各步驟的程式碼:

public class LBSearchManager : ILBSearchManager
{
	private IndexWriter _writet;
	private RAMDirectory _directory;
	private const LuceneVersion lv = LuceneVersion.LUCENE_48;

	public LBSearchManager()
	{
        Analyzer a = new StandardAnalyzer(lv);
        _directory = new RAMDirectory();
        var config = new IndexWriterConfig(lv, a);
        _writet = new IndexWriter(_directory, config);
    }
}

首先把必要的物件都先宣告好,再來就是替資料建索引,首先定義好資料:

public class TestData
{
	public string TextField1 { get; set; } = string.Empty;
	public string LongTextField { get; set; } = string.Empty;
}

然後建假資料並建立索引:

private List<TestData> CreateTestDatas()
{
	return new List<TestData>
	{
		new TestData { TextField1 = "C#課程", NumField1 = 1, LongTextField = "C#課程" },
		new TestData { TextField1 = "從零開始認識C#", NumField1 = 2, LongTextField = "從零開始認識C#" },
		new TestData { TextField1 = "進階C#教材", NumField1 = 3, LongTextField = "進階C#教材" },
		new TestData { TextField1 = "21天學會C#程式語言", NumField1 = 4, LongTextField = "21天學會C#程式語言" },
        new TestData { TextField1 = "21天學會Java程式語言", NumField1 = 4, LongTextField = "21天學會Java程式語言" },
        new TestData { TextField1 = "Python入門", NumField1 = 4, LongTextField = "Python入門" }
    };
}

public void CreateIndex()
{
	var list = CreateTestDatas();
	_writet.AddDocuments(list.Select(testData => new Document()
	{
		new TextField(nameof(testData.TextField1), testData.TextField1, Field.Store.YES),
		new NumericDocValuesField(nameof(testData.NumField1), testData.NumField1),
		new TextField(nameof(testData.LongTextField), testData.LongTextField, Field.Store.YES),
	}));
    _writet.Commit();
}

接下來是搜尋的method:

public List<string> Search(string keyword)
{
	Analyzer a = new StandardAnalyzer(lv);
	var dirReader = DirectoryReader.Open(_directory);
	var searcher = new IndexSearcher(dirReader);

	string[] fnames = { "TextField1" };
	var multiFieldQP = new MultiFieldQueryParser(lv, fnames, a);
	Query query = multiFieldQP.Parse(keyword.Trim());
	ScoreDoc[] docs = searcher.Search(query, null, 1000).ScoreDocs;

	var results = new List<string>();
	for (int i = 0; i < docs.Length; i++)
	{
		Document d = searcher.Doc(docs[i].Doc);
		string text = d.Get("TextField1");
		results.Add(text);
	}
	dirReader.Dispose();

	return results;
}

再來就是直接跟AI結合:

var builder = Microsoft.Extensions.Hosting.Host.CreateApplicationBuilder();
string model = _modelName;
builder.Services.AddChatClient(new OllamaSharp.OllamaApiClient(new Uri("http://localhost:11434"), model));
var app = builder.Build();
var chatClient = app.Services.GetRequiredService<IChatClient>();
string question = "請幫我搜尋C#的入門教材";
string prompt = $"你現在是一個搜尋引擎專家,請把使用者的問題轉成搜尋引擎使用的關鍵字,並以下列JSON格式表示:\n" +
    "{\"Query\": \"使用者的問題\"}\n" +
    $"使用者的問題如下:{question}";
var chatCompletion = await chatClient.GetResponseAsync(prompt);

List<string> args = new List<string>();
Console.WriteLine("Arg:");
foreach (var message in chatCompletion.Messages)
{
    var queryMessage = System.Text.Json.JsonSerializer.Deserialize<QueryMessage>(message.Text);
    args.Add(queryMessage!.Query);
    Console.Write(message.Text);
}
Console.WriteLine();

LBSearchManager searchManager = new LBSearchManager();
var ans = searchManager.Search(string.Join(',', args));
string prompt2 = $"根據以下搜尋結果,回答使用者的問題:{question}\n" +
    "搜尋結果內容如下:\n" + string.Join('\n', ans);
chatCompletion = await chatClient.GetResponseAsync(prompt2);
Console.WriteLine("Final Ans:");
foreach (var message in chatCompletion.Messages)
{
    Console.Write(message.Text);
}
Console.WriteLine();

上面的程式碼,就是單純把搜尋引擎的結果塞給AI,請它做整理並回答。