デスクトップスクリーンショットとLlavaを使った簡単なAIチャットc#プログラム

2025年4月11日 2025年4月15日

犬マンマ(doghouse)

前々回のデスクトップスクリーンショットとLlavaを組み合わせたチャットプログラムです。

Nuget情報

Program.c(2025/4/12更新)
デスクトップを見ながらAIとチャットするプログラムです。
※2025/4/14 一回目に読み込んだ画像しか認識していないようです。改善案を模索中。
※2025/4/15 ChatHistoryをやめてテキストにしました。システムプロンプトもコメントで止めています。LLAVAが言うことを聞いてくれない…

using LLama.Common;
using LLama;
using LLama.Native;
using LLama.Sampling;
using System.Drawing;
using System.Drawing.Imaging;
using Spectre.Console;
using System.Runtime.InteropServices;

namespace ChatProgram
{
    public class Program
    {
        static void Main(string[] args)
        {
            Task task = MainAsync();
            task.Wait();
        }

        public static async Task MainAsync()
        {
            try
            {
                // LLMモデルの場所
                //string strMdl = @"E:\lm-studio\cjpais\llava-1.6-mistral-7b-gguf\llava-1.6-mistral-7b.Q8_0.gguf";
                //string strClp = @"E:\lm-studio\cjpais\llava-1.6-mistral-7b-gguf\mmproj-model-f16.gguf";

                //string strMdl = @"E:\lm-studio\unsloth\gemma-3-12b-it-GGUF\gemma-3-12b-it-Q8_0.gguf";
                //string strClp = @"E:\lm-studio\unsloth\gemma-3-12b-it-GGUF\mmproj-BF16.gguf";
                //string strMdl = @"E:\lm-studio\xtuner\llava-llama-3-8b-v1_1-gguf\llava-llama-3-8b-v1_1-int4.gguf";
                //string strClp = @"E:\lm-studio\xtuner\llava-llama-3-8b-v1_1-gguf\llava-llama-3-8b-v1_1-mmproj-f16.gguf";
                string strMdl = @"E:\lm-studio\cjpais\llava-v1.6-vicuna-13b-gguf\llava-v1.6-vicuna-13b.Q8_0.gguf";
                string strClp = @"E:\lm-studio\cjpais\llava-v1.6-vicuna-13b-gguf\mmproj-model-f16.gguf";
                //string strMdl = @"E:\lm-studio\cjpais\llava-v1.6-34B-gguf\llava-v1.6-34b.Q4_K_M.gguf";
                //string strClp = @"E:\lm-studio\cjpais\llava-v1.6-34B-gguf\mmproj-model-f16.gguf";

                Console.ForegroundColor = ConsoleColor.Green;

                //LLMモデルのロードとパラメータの設定
                ModelParams modPara = new(strMdl)
                {
                    ContextSize = 4096,
                    GpuLayerCount = 60,
                };

                using LLamaWeights llmWeit = LLamaWeights.LoadFromFile(modPara);
                using LLamaContext llmContx = llmWeit.CreateContext(modPara);
                using var clipModel = LLavaWeights.LoadFromFile(strClp);
                InteractiveExecutor itrEx = new(llmContx, clipModel);

                //System Prompt＋チャットの方向づけ
                //string strHis = "";
                // 初期はチャットログにキャラ設定と出だしの会話を打ち込む
                //strHis +=  "<image>\nSYSTEM: あなたは、ユーザーと楽しく会話する女性を演じてね。" +
                //            "名前は、マイだよ。" +
                //            "マイは、明るくて、やさしい性格の女性なの。年齢は24歳なんだよ。" +
                //            "マイは、誰にでもタメ口で話すの。" +
                //            "マイは、一人称をわたしと言うよ。" +
                //            "マイは、恥ずかしがりやだけどエッチに興味があるのね。" +
                //            "マイは、応答の長さはできるだけ短く答えるよ。" +
                //            "マイは、好奇心旺盛だからユーザーに色々質問するの。";
                //strHis += "\nUSER: はじめまして、オレの名前は犬マンマです。呼びにくいと思うからマー君と呼んで。";
                //strHis += "\nASSISTANT: はじめまして、わたしの名前はマイです。わかったマー君。";

                InferenceParams infPara = new()
                {
                    SamplingPipeline = new DefaultSamplingPipeline()
                    {
                        Temperature = 0.6f,
                        Seed = 1337,
                        FrequencyPenalty = 1,
                    },
                    AntiPrompts = new List<string> { "\nUSER:", "\nUser:" },
                    MaxTokens = 256,
                };

                while (true)
                {
                    // ユーザーのターン
                    Console.ForegroundColor = ConsoleColor.White;
                    Image img = CaptureScreen();
                    Bitmap bitmap = new Bitmap(img);
                    MemoryStream ms = new MemoryStream();
                    bitmap.Save(ms, ImageFormat.Png);
                    CanvasImage consoleImage = new CanvasImage(ms.ToArray());
                    consoleImage.MaxWidth = 50;
                    AnsiConsole.Write(consoleImage);
                    Console.Write("\nUser: ");
                    string strInput = Console.ReadLine() ?? "";
                    if (strInput == "exit") break; // 'exit'と入力したら終わり
                    strInput = "\nUSER:<image>\n" + strInput + " \nASSISTANT: ";
                    //strHis += strInput;
                    //イメージをセット
                    //itrEx.Images.RemoveAll(itrEx.Images.Contains);
                    itrEx.Context.NativeHandle.KvCacheRemove(LLamaSeqId.Zero, -1, -1);
                    itrEx.Images.Add(ms.ToArray());
                    // ＡＩのターン
                    Console.ForegroundColor = ConsoleColor.Yellow;
                    string strAns = "";
                    await foreach (var text in itrEx.InferAsync(strInput, infPara))
                    {
                        Console.Write(text);
                        strAns += text;
                    }
                    //strHis += strAns;
                    ms.Dispose();
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }

        }
        private static Image CaptureScreen()
        {
            return CaptureWindow(GetDesktopWindow());
        }

        private static Image CaptureWindow(IntPtr handle)
        {
            IntPtr hdcSrc = GetWindowDC(handle);
            RECT windowRect = new RECT();
            GetWindowRect(handle, ref windowRect);
            int width = windowRect.right - windowRect.left;
            int height = windowRect.bottom - windowRect.top;
            IntPtr hdcDest = CreateCompatibleDC(hdcSrc);
            IntPtr hBitmap = CreateCompatibleBitmap(hdcSrc, width, height);
            IntPtr hOld = SelectObject(hdcDest, hBitmap);
            BitBlt(hdcDest, 0, 0, width, height, hdcSrc, 0, 0, SRCCOPY);
            SelectObject(hdcDest, hOld);
            DeleteDC(hdcDest);
            ReleaseDC(handle, hdcSrc);
            Image img = Image.FromHbitmap(hBitmap);
            DeleteObject(hBitmap);

            return img;
        }

        [StructLayout(LayoutKind.Sequential)]
        private struct RECT
        {
            public int left;
            public int top;
            public int right;
            public int bottom;
        }

        private const int SRCCOPY = 0x00CC0020; // BitBlt dwRop parameter

        [DllImport("gdi32.dll")]
        private static extern bool BitBlt(IntPtr hObject, int nXDest, int nYDest,
            int nWidth, int nHeight, IntPtr hObjectSource,
            int nXSrc, int nYSrc, int dwRop);
        [DllImport("gdi32.dll")]
        private static extern IntPtr CreateCompatibleBitmap(IntPtr hDC, int nWidth,
            int nHeight);
        [DllImport("gdi32.dll")]
        private static extern IntPtr CreateCompatibleDC(IntPtr hDC);
        [DllImport("gdi32.dll")]
        private static extern bool DeleteDC(IntPtr hDC);
        [DllImport("gdi32.dll")]
        private static extern bool DeleteObject(IntPtr hObject);
        [DllImport("gdi32.dll")]
        private static extern IntPtr SelectObject(IntPtr hDC, IntPtr hObject);
        [DllImport("user32.dll")]
        private static extern IntPtr GetDesktopWindow();
        [DllImport("user32.dll")]
        private static extern IntPtr GetWindowDC(IntPtr hWnd);
        [DllImport("user32.dll")]
        private static extern IntPtr ReleaseDC(IntPtr hWnd, IntPtr hDC);
        [DllImport("user32.dll")]
        private static extern IntPtr GetWindowRect(IntPtr hWnd, ref RECT rect);
        [DllImport("user32.dll", CharSet = CharSet.Auto)]
        private static extern IntPtr FindWindow(string lpClassName, string lpWindowName);

    }
}

実行結果
マルチモニターなのでサブモニターで実行してメインモニターにネットで見つけた馬の写真をブラウザに写しています。
ちゃんとウェブページをいうのを認識しているようです。