LLMでプロンプト文字列を作成してstable diffusionで画像生成する(StableDiffusion.NET 3.1対応)

2024年9月1日 2024年9月1日

犬マンマ(doghouse)

StableDiffusion.NETがFlux.1対応になって少しコーディングが変わったので修正しました。
モデルの読み込みとパラメータが少し変わりました。

//【旧】
StableDiffusionModel sdModel = await Task.Run(() => new StableDiffusionModel(ModelPath, new ModelParameter { VaePath = VaePath, Schedule = Schedule.Karras, EmbeddingsDirectory = EmbedPath }));

//【新】
DiffusionModel sdModel = await Task.Run(() => ModelBuilder.StableDiffusion(ModelPath).WithMultithreading().WithVae(VaePath).WithSchedule(Schedule.Karras).WithEmbeddingSupport(EmbedPath).Build());

//【旧】
                      objImage = await Task.Run(() => sdModel?.ImageToImage(strPrompt, Image2ImageSource, new StableDiffusionParameter
                        {
                            NegativePrompt = strAntiPrompt,
                            Width = intWidth,
                            Height = intHeight,
                            CfgScale = 7f,
                            SampleSteps = 100,
                            Seed = lngSeed,
                            SampleMethod = Sampler.DPMPP2Mv2,
                            Strength = 0.2f,
                        }));
//【新】
                        objImage = await Task.Run(() => sdModel?.ImageToImage(strPrompt, Image2ImageSource, new DiffusionParameter
                        {
                            NegativePrompt = strAntiPrompt,
                            Width = intWidth,
                            Height = intHeight,
                            CfgScale = 7f,
                            SampleSteps = 100,
                            Seed = lngSeed,
                            SampleMethod = Sampler.DPMPP2Mv2,
                            Strength = 0.25f,
                        }));

Nuget情報

概要
gemma2に日本語で指示してStableDiffusion用のプロンプトを作成し画像生成する。
モデルはBlessing Mixを使用。

using LLama.Common;
using LLama;
using System.Text.RegularExpressions;
using HPPH;
using HPPH.System.Drawing;
using StableDiffusion.NET;

namespace ChatProgram
{
    public class Program
    {
        static void Main(string[] args)
        {
            Task task = MainAsync();
            task.Wait();
        }

        public static async Task MainAsync()
        {
            string strPath = Environment.GetEnvironmentVariable("LLMPATH", System.EnvironmentVariableTarget.User)+@"dahara1\gemma-2-27b-it-gguf-japanese-imatrix\gemma-2-27b-it.f16.Q6_k.gguf";
            string ModelPath = Environment.GetEnvironmentVariable("LLMPATH", System.EnvironmentVariableTarget.User) + @"stable-diffusion\blessingMixAkaBracing_v1VAE.safetensors";
            string VaePath = Environment.GetEnvironmentVariable("LLMPATH", System.EnvironmentVariableTarget.User) + @"stable-diffusion\vae-ft-mse-840000-ema-pruned.safetensors";
            string EmbedPath = Environment.GetEnvironmentVariable("LLMPATH", System.EnvironmentVariableTarget.User) + @"stable-diffusion\negative_hand-neg.pt";

            const int intWidth = 768;
            const int intHeight = 1280;
            const float fltScale = 07f;
            const int intSteps = 42;
            const int intMax = 10;
            long lngSeed = 0;
            IImage objImage;

            Console.ForegroundColor = ConsoleColor.Blue;
            try
            {
                //LLMの設定
                ModelParams modPara = new(strPath)
                {
                    ContextSize = 1024,
                    Seed = 1337
                };
                using LLamaWeights llmWeit = LLamaWeights.LoadFromFile(modPara);
                using LLamaContext llmContx = llmWeit.CreateContext(modPara);
                InteractiveExecutor itrEx = new(llmContx);

                ChatHistory chtHis;
                chtHis = new ChatHistory();
                chtHis.AddMessage(AuthorRole.System, "あなたは優秀なAI画像生成プロンプトエンジニアです。ユーザーが要求した内容を忠実にstable diffusion用のプロンプトに変換してください。出力の形式は、例題のようにプロンプト文字列を【】で囲ってください。 例）【(best quality, masterpiece, absurbres, super-resolution), (photorealistic,realistic:1.4)】");
                ChatSession chtSess = new(itrEx, chtHis);
                var varHidewd = new LLamaTransforms.KeywordTextOutputStreamTransform(["User:", "Assistant:"]);
                chtSess.WithOutputTransform(varHidewd);
                InferenceParams infPara = new()
                {
                    Temperature = 0.1f,
                    AntiPrompts = new List<string> { "User:" }
                };
                //stable diffusion設定
                DiffusionModel sdModel = await Task.Run(() => ModelBuilder.StableDiffusion(ModelPath).WithMultithreading().WithVae(VaePath).WithSchedule(Schedule.Karras).WithEmbeddingSupport(EmbedPath).Build());

                while (true)
                {
                    // ユーザーのターン
                    Console.ForegroundColor = ConsoleColor.White;
                    Console.Write("\nUser: ");
                    string strInput = Console.ReadLine() ?? "";
                    ChatHistory.Message msg = new(AuthorRole.User, strInput);
                    if (strInput == "exit") break; // 'exit'と入力したら終わり

                    // ＡＩのターン
                    Console.ForegroundColor = ConsoleColor.Yellow;
                    string strMsg = "";
                    await foreach (string strAns in chtSess.ChatAsync(msg, infPara))
                    {
                        Console.Write(strAns);
                        strMsg += strAns;
                    }
                    var strRegex = new Regex("【(.+?)】").Matches(strMsg);
                    string strPrompt = strRegex[0].Value.Replace("【","").Replace("】","").Trim();
                    string strAntiPrompt = "(worst quality:2) , (low quality:2) , (normal quality:2) , lowres, ugly face, unclear eyes, bad mouth, bad tooth, bad anatomy, extra legs, (bad fingers, bad hands, missing fingers), negative_hand-neg";

                    if(strPrompt != "") //プロンプト文字が間違っていなければ
                    {
                        for (int i = 0; i < intMax; i++)
                        {
                            Console.WriteLine($"**{i+1}枚目画像生成 開始**");
                            //Seed Random
                            Random rSeed = new Random();
                            lngSeed = rSeed.Next(0, int.MaxValue);
                            objImage = await Task.Run(() => sdModel?.TextToImage(strPrompt, new DiffusionParameter
                            {
                                NegativePrompt = strAntiPrompt,
                                Width = intWidth,
                                Height = intHeight,
                                CfgScale = fltScale,
                                SampleSteps = intSteps,
                                Seed = lngSeed,
                                SampleMethod = Sampler.DPMPP2Mv2
                            }));

                            string savePath = Environment.GetEnvironmentVariable("TESTDATA", System.EnvironmentVariableTarget.User) + $"Sd{DateTime.Now.ToString("yyyyMMddhhmmssfff")}.png";
                            File.WriteAllBytes(savePath, objImage.ToPng());
                            Console.WriteLine($"**{i+1}枚目画像生成 終了**");
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine(ex.ToString());
            }
        }
    }
}

実行結果
「（ビキニを着た胸が大きい女性）と（太った中年の男性）が腕を組んでいる画像を生成してください。」