Skip to content

Delphi中的AI图像处理:从基础到高级应用

随着人工智能技术的快速发展,AI图像处理已经成为现代应用程序的重要组成部分。本文将探讨如何在Delphi应用中集成各种AI图像处理技术,从基础的图像增强到高级的对象检测和图像生成。

为什么在Delphi中使用AI图像处理?

Delphi作为一个成熟的开发平台,拥有强大的GUI和快速的编译能力,非常适合开发图像处理应用。将AI图像处理技术集成到Delphi应用中,可以带来以下优势:

  1. 增强用户体验:通过智能图像处理功能提升应用的用户体验
  2. 自动化图像处理流程:减少手动操作,提高效率
  3. 实现高级功能:如人脸识别、对象检测、图像分类等
  4. 保持竞争力:为传统应用注入现代AI能力

技术准备

在开始之前,你需要准备以下内容:

  1. Delphi开发环境:建议使用Delphi 10.4或更高版本
  2. 图像处理库:如VCL Imaging Library或CCR Exif
  3. AI模型接口:TensorFlow、ONNX Runtime或云服务API
  4. 基础图像处理知识:了解像素操作、滤镜和图像格式

基础:图像预处理

在应用AI模型之前,通常需要对图像进行预处理。以下是一个简单的图像预处理类:

unit ImagePreprocessor;

interface

uses
  System.SysUtils, System.Classes, Vcl.Graphics, Vcl.Imaging.jpeg,
  Vcl.Imaging.pngimage, System.Math;

type
  TImagePreprocessor = class
  private
    FImage: TBitmap;
  public
    constructor Create;
    destructor Destroy; override;

    procedure LoadFromFile(const FileName: string);
    procedure SaveToFile(const FileName: string);

    // 预处理方法
    procedure Resize(Width, Height: Integer);
    procedure Normalize; // 归一化像素值到0-1
    procedure GrayscaleConversion;
    procedure AdjustBrightness(Value: Integer);
    procedure AdjustContrast(Value: Double);

    property Image: TBitmap read FImage;
  end;

implementation

{ TImagePreprocessor }

constructor TImagePreprocessor.Create;
begin
  inherited;
  FImage := TBitmap.Create;
end;

destructor TImagePreprocessor.Destroy;
begin
  FImage.Free;
  inherited;
end;

procedure TImagePreprocessor.LoadFromFile(const FileName: string);
var
  Ext: string;
  JPG: TJPEGImage;
  PNG: TPngImage;
begin
  Ext := LowerCase(ExtractFileExt(FileName));

  if Ext = '.jpg' then
  begin
    JPG := TJPEGImage.Create;
    try
      JPG.LoadFromFile(FileName);
      FImage.Assign(JPG);
    finally
      JPG.Free;
    end;
  end
  else if Ext = '.png' then
  begin
    PNG := TPngImage.Create;
    try
      PNG.LoadFromFile(FileName);
      FImage.Assign(PNG);
    finally
      PNG.Free;
    end;
  end
  else
    FImage.LoadFromFile(FileName);
end;

procedure TImagePreprocessor.SaveToFile(const FileName: string);
var
  Ext: string;
  JPG: TJPEGImage;
  PNG: TPngImage;
begin
  Ext := LowerCase(ExtractFileExt(FileName));

  if Ext = '.jpg' then
  begin
    JPG := TJPEGImage.Create;
    try
      JPG.Assign(FImage);
      JPG.SaveToFile(FileName);
    finally
      JPG.Free;
    end;
  end
  else if Ext = '.png' then
  begin
    PNG := TPngImage.Create;
    try
      PNG.Assign(FImage);
      PNG.SaveToFile(FileName);
    finally
      PNG.Free;
    end;
  end
  else
    FImage.SaveToFile(FileName);
end;

procedure TImagePreprocessor.Resize(Width, Height: Integer);
var
  TempBitmap: TBitmap;
begin
  TempBitmap := TBitmap.Create;
  try
    TempBitmap.SetSize(Width, Height);
    TempBitmap.Canvas.StretchDraw(Rect(0, 0, Width, Height), FImage);
    FImage.Assign(TempBitmap);
  finally
    TempBitmap.Free;
  end;
end;

procedure TImagePreprocessor.Normalize;
var
  X, Y: Integer;
  Pixel: TRGBTriple;
  ScanLine: PRGBTripleArray;
begin
  FImage.PixelFormat := pf24bit;

  for Y := 0 to FImage.Height - 1 do
  begin
    ScanLine := FImage.ScanLine[Y];
    for X := 0 to FImage.Width - 1 do
    begin
      Pixel := ScanLine[X];
      // 归一化到0-1范围(这里乘以255保持在0-255范围内)
      ScanLine[X].rgbtRed := Round(Pixel.rgbtRed / 255 * 255);
      ScanLine[X].rgbtGreen := Round(Pixel.rgbtGreen / 255 * 255);
      ScanLine[X].rgbtBlue := Round(Pixel.rgbtBlue / 255 * 255);
    end;
  end;
end;

procedure TImagePreprocessor.GrayscaleConversion;
var
  X, Y: Integer;
  Gray: Byte;
  Pixel: TRGBTriple;
  ScanLine: PRGBTripleArray;
begin
  FImage.PixelFormat := pf24bit;

  for Y := 0 to FImage.Height - 1 do
  begin
    ScanLine := FImage.ScanLine[Y];
    for X := 0 to FImage.Width - 1 do
    begin
      Pixel := ScanLine[X];
      // 使用加权平均法计算灰度值
      Gray := Round(0.299 * Pixel.rgbtRed + 0.587 * Pixel.rgbtGreen + 0.114 * Pixel.rgbtBlue);
      ScanLine[X].rgbtRed := Gray;
      ScanLine[X].rgbtGreen := Gray;
      ScanLine[X].rgbtBlue := Gray;
    end;
  end;
end;

procedure TImagePreprocessor.AdjustBrightness(Value: Integer);
var
  X, Y: Integer;
  ScanLine: PRGBTripleArray;
begin
  FImage.PixelFormat := pf24bit;

  for Y := 0 to FImage.Height - 1 do
  begin
    ScanLine := FImage.ScanLine[Y];
    for X := 0 to FImage.Width - 1 do
    begin
      // 调整亮度,确保值在0-255范围内
      ScanLine[X].rgbtRed := EnsureRange(ScanLine[X].rgbtRed + Value, 0, 255);
      ScanLine[X].rgbtGreen := EnsureRange(ScanLine[X].rgbtGreen + Value, 0, 255);
      ScanLine[X].rgbtBlue := EnsureRange(ScanLine[X].rgbtBlue + Value, 0, 255);
    end;
  end;
end;

procedure TImagePreprocessor.AdjustContrast(Value: Double);
var
  X, Y: Integer;
  Factor: Double;
  ScanLine: PRGBTripleArray;
begin
  FImage.PixelFormat := pf24bit;
  Factor := (259 * (Value + 255)) / (255 * (259 - Value));

  for Y := 0 to FImage.Height - 1 do
  begin
    ScanLine := FImage.ScanLine[Y];
    for X := 0 to FImage.Width - 1 do
    begin
      // 调整对比度,确保值在0-255范围内
      ScanLine[X].rgbtRed := EnsureRange(Round(Factor * (ScanLine[X].rgbtRed - 128) + 128), 0, 255);
      ScanLine[X].rgbtGreen := EnsureRange(Round(Factor * (ScanLine[X].rgbtGreen - 128) + 128), 0, 255);
      ScanLine[X].rgbtBlue := EnsureRange(Round(Factor * (ScanLine[X].rgbtBlue - 128) + 128), 0, 255);
    end;
  end;
end;

end.

中级:集成TensorFlow Lite进行图像分类

TensorFlow Lite是一个轻量级的机器学习框架,适合在移动和嵌入式设备上运行。以下是如何在Delphi中集成TensorFlow Lite进行图像分类:

unit TensorFlowLiteClassifier;

interface

uses
  System.SysUtils, System.Classes, Vcl.Graphics,
  System.Net.HttpClient, System.IOUtils;

type
  TClassificationResult = record
    Label: string;
    Confidence: Double;
  end;

  TClassificationResults = TArray<TClassificationResult>;

  TTensorFlowLiteClassifier = class
  private
    FModelPath: string;
    FLabelsPath: string;
    FInputWidth: Integer;
    FInputHeight: Integer;
    FLabels: TStringList;

    procedure LoadLabels;
    function PreprocessImage(const Image: TBitmap): TArray<Single>;
  public
    constructor Create(const ModelPath, LabelsPath: string; InputWidth, InputHeight: Integer);
    destructor Destroy; override;

    function Classify(const Image: TBitmap; TopK: Integer = 5): TClassificationResults;
  end;

implementation

// 注意:这里需要使用第三方库来调用TensorFlow Lite
// 可以使用DelphiTensorFlow或其他TensorFlow Lite绑定库
// 以下代码仅为示例框架,实际实现需要根据所选库进行调整

{ TTensorFlowLiteClassifier }

constructor TTensorFlowLiteClassifier.Create(const ModelPath, LabelsPath: string; 
  InputWidth, InputHeight: Integer);
begin
  inherited Create;
  FModelPath := ModelPath;
  FLabelsPath := LabelsPath;
  FInputWidth := InputWidth;
  FInputHeight := InputHeight;
  FLabels := TStringList.Create;

  LoadLabels;
  // 初始化TensorFlow Lite模型
  // 实际代码取决于所使用的TensorFlow Lite绑定库
end;

destructor TTensorFlowLiteClassifier.Destroy;
begin
  FLabels.Free;
  // 释放TensorFlow Lite资源
  inherited;
end;

procedure TTensorFlowLiteClassifier.LoadLabels;
begin
  if FileExists(FLabelsPath) then
    FLabels.LoadFromFile(FLabelsPath);
end;

function TTensorFlowLiteClassifier.PreprocessImage(const Image: TBitmap): TArray<Single>;
var
  X, Y: Integer;
  ResizedImage: TBitmap;
  ScanLine: PRGBTripleArray;
  Index: Integer;
begin
  // 调整图像大小
  ResizedImage := TBitmap.Create;
  try
    ResizedImage.PixelFormat := pf24bit;
    ResizedImage.SetSize(FInputWidth, FInputHeight);
    ResizedImage.Canvas.StretchDraw(Rect(0, 0, FInputWidth, FInputHeight), Image);

    // 将图像转换为浮点数组,并进行归一化
    SetLength(Result, FInputWidth * FInputHeight * 3);
    Index := 0;

    for Y := 0 to ResizedImage.Height - 1 do
    begin
      ScanLine := ResizedImage.ScanLine[Y];
      for X := 0 to ResizedImage.Width - 1 do
      begin
        // 归一化到[-1, 1]范围
        Result[Index] := (ScanLine[X].rgbtRed / 127.5) - 1.0;
        Inc(Index);
        Result[Index] := (ScanLine[X].rgbtGreen / 127.5) - 1.0;
        Inc(Index);
        Result[Index] := (ScanLine[X].rgbtBlue / 127.5) - 1.0;
        Inc(Index);
      end;
    end;
  finally
    ResizedImage.Free;
  end;
end;

function TTensorFlowLiteClassifier.Classify(const Image: TBitmap; TopK: Integer): TClassificationResults;
var
  InputData: TArray<Single>;
  OutputData: TArray<Single>;
  SortedIndices: TArray<Integer>;
  I, ResultCount: Integer;
begin
  // 预处理图像
  InputData := PreprocessImage(Image);

  // 运行推理
  // 实际代码取决于所使用的TensorFlow Lite绑定库
  // OutputData := RunInference(InputData);

  // 这里仅为示例,实际需要调用TensorFlow Lite进行推理
  SetLength(OutputData, FLabels.Count);
  for I := 0 to Length(OutputData) - 1 do
    OutputData[I] := Random; // 模拟输出

  // 获取TopK结果
  SortedIndices := SortOutputs(OutputData);
  ResultCount := Min(TopK, Length(SortedIndices));

  SetLength(Result, ResultCount);
  for I := 0 to ResultCount - 1 do
  begin
    if SortedIndices[I] < FLabels.Count then
    begin
      Result[I].Label := FLabels[SortedIndices[I]];
      Result[I].Confidence := OutputData[SortedIndices[I]];
    end
    else
    begin
      Result[I].Label := 'Unknown';
      Result[I].Confidence := OutputData[SortedIndices[I]];
    end;
  end;
end;

function SortOutputs(const Data: TArray<Single>): TArray<Integer>;
var
  I, J, Temp: Integer;
  Pairs: TArray<TPair<Integer, Single>>;
begin
  SetLength(Pairs, Length(Data));
  for I := 0 to Length(Data) - 1 do
    Pairs[I] := TPair<Integer, Single>.Create(I, Data[I]);

  // 简单的冒泡排序,按置信度降序排列
  for I := 0 to Length(Pairs) - 2 do
    for J := 0 to Length(Pairs) - I - 2 do
      if Pairs[J].Value < Pairs[J + 1].Value then
      begin
        Temp := Pairs[J].Key;
        Pairs[J].Key := Pairs[J + 1].Key;
        Pairs[J + 1].Key := Temp;

        Temp := Pairs[J].Value;
        Pairs[J].Value := Pairs[J + 1].Value;
        Pairs[J + 1].Value := Temp;
      end;

  SetLength(Result, Length(Pairs));
  for I := 0 to Length(Pairs) - 1 do
    Result[I] := Pairs[I].Key;
end;

end.

高级:使用OpenAI DALL-E API生成图像

OpenAI的DALL-E API允许通过文本描述生成图像。以下是如何在Delphi中集成DALL-E API:

unit DALLEImageGenerator;

interface

uses
  System.SysUtils, System.Classes, System.Net.HttpClient, System.Net.URLClient,
  System.JSON, Vcl.Graphics, Vcl.Imaging.jpeg, System.NetEncoding;

type
  TImageSize = (is256x256, is512x512, is1024x1024);

  TDALLEImageGenerator = class
  private
    FApiKey: string;
    FHttpClient: THTTPClient;

    function GetImageSizeStr(Size: TImageSize): string;
  public
    constructor Create(const ApiKey: string);
    destructor Destroy; override;

    function GenerateImage(const Prompt: string; Size: TImageSize = is512x512): TBitmap;
    function GenerateImageToFile(const Prompt, FileName: string; Size: TImageSize = is512x512): Boolean;
  end;

implementation

{ TDALLEImageGenerator }

constructor TDALLEImageGenerator.Create(const ApiKey: string);
begin
  inherited Create;
  FApiKey := ApiKey;
  FHttpClient := THTTPClient.Create;
end;

destructor TDALLEImageGenerator.Destroy;
begin
  FHttpClient.Free;
  inherited;
end;

function TDALLEImageGenerator.GetImageSizeStr(Size: TImageSize): string;
begin
  case Size of
    is256x256: Result := '256x256';
    is512x512: Result := '512x512';
    is1024x1024: Result := '1024x1024';
  else
    Result := '512x512';
  end;
end;

function TDALLEImageGenerator.GenerateImage(const Prompt: string; Size: TImageSize): TBitmap;
var
  URL: string;
  RequestObj: TJSONObject;
  ResponseObj: TJSONObject;
  Response: IHTTPResponse;
  ResponseContent: string;
  ImageURL: string;
  ImageResponse: IHTTPResponse;
  ImageStream: TMemoryStream;
  JPG: TJPEGImage;
begin
  Result := nil;
  URL := 'https://api.openai.com/v1/images/generations';

  RequestObj := TJSONObject.Create;
  try
    RequestObj.AddPair('prompt', Prompt);
    RequestObj.AddPair('n', TJSONNumber.Create(1));
    RequestObj.AddPair('size', GetImageSizeStr(Size));

    // 设置请求头
    FHttpClient.CustomHeaders['Authorization'] := 'Bearer ' + FApiKey;
    FHttpClient.CustomHeaders['Content-Type'] := 'application/json';

    // 发送请求
    Response := FHttpClient.Post(URL, TStringStream.Create(RequestObj.ToJSON), nil);
    ResponseContent := Response.ContentAsString;

    // 解析响应
    if Response.StatusCode = 200 then
    begin
      ResponseObj := TJSONObject.ParseJSONValue(ResponseContent) as TJSONObject;
      try
        if ResponseObj.GetValue('data') <> nil then
        begin
          ImageURL := (ResponseObj.GetValue('data') as TJSONArray).Items[0].GetValue<string>('url');

          // 下载图像
          ImageStream := TMemoryStream.Create;
          try
            ImageResponse := FHttpClient.Get(ImageURL, ImageStream);
            if ImageResponse.StatusCode = 200 then
            begin
              ImageStream.Position := 0;
              JPG := TJPEGImage.Create;
              try
                JPG.LoadFromStream(ImageStream);

                Result := TBitmap.Create;
                Result.Assign(JPG);
              except
                FreeAndNil(Result);
                raise;
              end;
            end;
          finally
            ImageStream.Free;
          end;
        end;
      finally
        ResponseObj.Free;
      end;
    end
    else
      raise Exception.CreateFmt('API错误: %d - %s', [Response.StatusCode, ResponseContent]);
  finally
    RequestObj.Free;
  end;
end;

function TDALLEImageGenerator.GenerateImageToFile(const Prompt, FileName: string; Size: TImageSize): Boolean;
var
  Bitmap: TBitmap;
  JPG: TJPEGImage;
  PNG: TPngImage;
  Ext: string;
begin
  Result := False;

  Bitmap := GenerateImage(Prompt, Size);
  if Bitmap = nil then
    Exit;

  try
    Ext := LowerCase(ExtractFileExt(FileName));

    if Ext = '.jpg' then
    begin
      JPG := TJPEGImage.Create;
      try
        JPG.Assign(Bitmap);
        JPG.SaveToFile(FileName);
        Result := True;
      finally
        JPG.Free;
      end;
    end
    else if Ext = '.png' then
    begin
      PNG := TPngImage.Create;
      try
        PNG.Assign(Bitmap);
        PNG.SaveToFile(FileName);
        Result := True;
      finally
        PNG.Free;
      end;
    end
    else
    begin
      Bitmap.SaveToFile(FileName);
      Result := True;
    end;
  finally
    Bitmap.Free;
  end;
end;

end.

实际应用案例

1. 智能照片编辑器

// 在照片编辑器中使用AI增强功能
procedure TPhotoEditorForm.EnhanceImage;
var
  Preprocessor: TImagePreprocessor;
begin
  Preprocessor := TImagePreprocessor.Create;
  try
    Preprocessor.LoadFromFile(CurrentImageFile);

    // 根据用户选择应用不同的增强
    if cbBrightness.Checked then
      Preprocessor.AdjustBrightness(tbBrightness.Position);

    if cbContrast.Checked then
      Preprocessor.AdjustContrast(tbContrast.Position / 10);

    // 应用AI增强(这里需要集成特定的AI模型)
    if cbAIEnhance.Checked then
      ApplyAIEnhancement(Preprocessor.Image);

    // 显示结果
    imgResult.Picture.Assign(Preprocessor.Image);
  finally
    Preprocessor.Free;
  end;
end;

2. 内容审核系统

// 使用AI图像分类进行内容审核
function TContentModerationSystem.IsAppropriateImage(const ImageFile: string): Boolean;
var
  Bitmap: TBitmap;
  Classifier: TTensorFlowLiteClassifier;
  Results: TClassificationResults;
  I: Integer;
  InappropriateLabels: TArray<string>;
  InappropriateThreshold: Double;
begin
  Result := True; // 默认通过
  InappropriateLabels := ['explicit', 'violence', 'gore', 'drugs'];
  InappropriateThreshold := 0.7; // 置信度阈值

  Bitmap := TBitmap.Create;
  Classifier := TTensorFlowLiteClassifier.Create(
    'models/content_moderation.tflite',
    'models/content_labels.txt',
    224, 224);
  try
    Bitmap.LoadFromFile(ImageFile);
    Results := Classifier.Classify(Bitmap, 10);

    // 检查是否包含不适当内容
    for I := 0 to Length(Results) - 1 do
    begin
      if (Results[I].Confidence > InappropriateThreshold) and
         (IndexStr(Results[I].Label, InappropriateLabels) >= 0) then
      begin
        Result := False;
        LogModeration(ImageFile, Results[I].Label, Results[I].Confidence);
        Break;
      end;
    end;
  finally
    Bitmap.Free;
    Classifier.Free;
  end;
end;

最佳实践与注意事项

  1. 性能优化:图像处理是计算密集型任务,考虑使用多线程或GPU加速
  2. 内存管理:处理大图像时注意内存使用,及时释放资源
  3. 错误处理:实现完善的错误处理机制,特别是在调用外部API时
  4. 用户体验:添加进度指示器和取消选项,提升用户体验
  5. 模型选择:根据应用需求选择合适的模型,平衡准确性和性能
  6. 隐私考虑:处理用户图像时注意隐私保护,明确数据使用政策

结论

通过在Delphi应用中集成AI图像处理技术,我们可以显著提升应用的功能和用户体验。从基础的图像预处理到高级的对象检测和图像生成,AI技术为传统图像处理应用带来了革命性的变化。

随着AI技术的不断发展,我们可以期待更多创新的图像处理功能出现在Delphi应用中。通过持续学习和实践,Delphi开发者可以充分利用这些技术,创建更智能、更有价值的应用程序。


关于作者:付乙,资深Delphi开发者,专注于将现代技术与传统应用相结合,提升软件价值和用户体验。