Skip to content

Commit bdcf2de

Browse files
committed
fix: move processing implementation to a separate unit
1 parent 10c6501 commit bdcf2de

File tree

3 files changed

+233
-214
lines changed

3 files changed

+233
-214
lines changed

examples/cli/main.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
#include <string>
77
#include <vector>
88

9-
// #include "preprocessing.hpp"
109
#include "flux.hpp"
1110
#include "stable-diffusion.h"
11+
#include "preprocessing.hpp"
1212

1313
#define STB_IMAGE_IMPLEMENTATION
1414
#define STB_IMAGE_STATIC

preprocessing.cpp

+222
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
#include "preprocessing.hpp"
2+
#define M_PI_ 3.14159265358979323846
3+
4+
void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) {
5+
struct ggml_init_params params;
6+
params.mem_size = 20 * 1024 * 1024; // 10
7+
params.mem_buffer = NULL;
8+
params.no_alloc = false;
9+
struct ggml_context* ctx0 = ggml_init(params);
10+
struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1);
11+
ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel));
12+
ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1);
13+
ggml_cgraph* gf = ggml_new_graph(ctx0);
14+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, h, output));
15+
ggml_graph_compute_with_ctx(ctx0, gf, 1);
16+
ggml_free(ctx0);
17+
}
18+
19+
void gaussian_kernel(struct ggml_tensor* kernel) {
20+
int ks_mid = kernel->ne[0] / 2;
21+
float sigma = 1.4f;
22+
float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f));
23+
for (int y = 0; y < kernel->ne[0]; y++) {
24+
float gx = -ks_mid + y;
25+
for (int x = 0; x < kernel->ne[1]; x++) {
26+
float gy = -ks_mid + x;
27+
float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal;
28+
ggml_tensor_set_f32(kernel, k_, x, y);
29+
}
30+
}
31+
}
32+
33+
void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) {
34+
for (int iy = 0; iy < rgb_img->ne[1]; iy++) {
35+
for (int ix = 0; ix < rgb_img->ne[0]; ix++) {
36+
float r = ggml_tensor_get_f32(rgb_img, ix, iy);
37+
float g = ggml_tensor_get_f32(rgb_img, ix, iy, 1);
38+
float b = ggml_tensor_get_f32(rgb_img, ix, iy, 2);
39+
float gray = 0.2989f * r + 0.5870f * g + 0.1140f * b;
40+
ggml_tensor_set_f32(grayscale, gray, ix, iy);
41+
}
42+
}
43+
}
44+
45+
void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) {
46+
int n_elements = ggml_nelements(h);
47+
float* dx = (float*)x->data;
48+
float* dy = (float*)y->data;
49+
float* dh = (float*)h->data;
50+
for (int i = 0; i < n_elements; i++) {
51+
dh[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]);
52+
}
53+
}
54+
55+
void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) {
56+
int n_elements = ggml_nelements(h);
57+
float* dx = (float*)x->data;
58+
float* dy = (float*)y->data;
59+
float* dh = (float*)h->data;
60+
for (int i = 0; i < n_elements; i++) {
61+
dh[i] = atan2f(dy[i], dx[i]);
62+
}
63+
}
64+
65+
void normalize_tensor(struct ggml_tensor* g) {
66+
int n_elements = ggml_nelements(g);
67+
float* dg = (float*)g->data;
68+
float max = -INFINITY;
69+
for (int i = 0; i < n_elements; i++) {
70+
max = dg[i] > max ? dg[i] : max;
71+
}
72+
max = 1.0f / max;
73+
for (int i = 0; i < n_elements; i++) {
74+
dg[i] *= max;
75+
}
76+
}
77+
78+
void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struct ggml_tensor* D) {
79+
for (int iy = 1; iy < result->ne[1] - 1; iy++) {
80+
for (int ix = 1; ix < result->ne[0] - 1; ix++) {
81+
float angle = ggml_tensor_get_f32(D, ix, iy) * 180.0f / M_PI_;
82+
angle = angle < 0.0f ? angle += 180.0f : angle;
83+
float q = 1.0f;
84+
float r = 1.0f;
85+
86+
// angle 0
87+
if ((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)) {
88+
q = ggml_tensor_get_f32(G, ix, iy + 1);
89+
r = ggml_tensor_get_f32(G, ix, iy - 1);
90+
}
91+
// angle 45
92+
else if (22.5f >= angle && angle < 67.5f) {
93+
q = ggml_tensor_get_f32(G, ix + 1, iy - 1);
94+
r = ggml_tensor_get_f32(G, ix - 1, iy + 1);
95+
}
96+
// angle 90
97+
else if (67.5f >= angle && angle < 112.5) {
98+
q = ggml_tensor_get_f32(G, ix + 1, iy);
99+
r = ggml_tensor_get_f32(G, ix - 1, iy);
100+
}
101+
// angle 135
102+
else if (112.5 >= angle && angle < 157.5f) {
103+
q = ggml_tensor_get_f32(G, ix - 1, iy - 1);
104+
r = ggml_tensor_get_f32(G, ix + 1, iy + 1);
105+
}
106+
107+
float cur = ggml_tensor_get_f32(G, ix, iy);
108+
if ((cur >= q) && (cur >= r)) {
109+
ggml_tensor_set_f32(result, cur, ix, iy);
110+
} else {
111+
ggml_tensor_set_f32(result, 0.0f, ix, iy);
112+
}
113+
}
114+
}
115+
}
116+
117+
void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float low_threshold, float weak, float strong) {
118+
int n_elements = ggml_nelements(img);
119+
float* imd = (float*)img->data;
120+
float max = -INFINITY;
121+
for (int i = 0; i < n_elements; i++) {
122+
max = imd[i] > max ? imd[i] : max;
123+
}
124+
float ht = max * high_threshold;
125+
float lt = ht * low_threshold;
126+
for (int i = 0; i < n_elements; i++) {
127+
float img_v = imd[i];
128+
if (img_v >= ht) { // strong pixel
129+
imd[i] = strong;
130+
} else if (img_v <= ht && img_v >= lt) { // strong pixel
131+
imd[i] = weak;
132+
}
133+
}
134+
135+
for (int iy = 0; iy < img->ne[1]; iy++) {
136+
for (int ix = 0; ix < img->ne[0]; ix++) {
137+
if (ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) {
138+
ggml_tensor_set_f32(img, ggml_tensor_get_f32(img, ix, iy), ix, iy);
139+
} else {
140+
ggml_tensor_set_f32(img, 0.0f, ix, iy);
141+
}
142+
}
143+
}
144+
145+
// hysteresis
146+
for (int iy = 1; iy < img->ne[1] - 1; iy++) {
147+
for (int ix = 1; ix < img->ne[0] - 1; ix++) {
148+
float imd_v = ggml_tensor_get_f32(img, ix, iy);
149+
if (imd_v == weak) {
150+
if (ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong ||
151+
ggml_tensor_get_f32(img, ix, iy - 1) == strong || ggml_tensor_get_f32(img, ix, iy + 1) == strong ||
152+
ggml_tensor_get_f32(img, ix - 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy) == strong) {
153+
ggml_tensor_set_f32(img, strong, ix, iy);
154+
} else {
155+
ggml_tensor_set_f32(img, 0.0f, ix, iy);
156+
}
157+
}
158+
}
159+
}
160+
}
161+
162+
uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
163+
struct ggml_init_params params;
164+
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10
165+
params.mem_buffer = NULL;
166+
params.no_alloc = false;
167+
struct ggml_context* work_ctx = ggml_init(params);
168+
169+
if (!work_ctx) {
170+
LOG_ERROR("ggml_init() failed");
171+
return NULL;
172+
}
173+
174+
float kX[9] = {
175+
-1, 0, 1,
176+
-2, 0, 2,
177+
-1, 0, 1};
178+
179+
float kY[9] = {
180+
1, 2, 1,
181+
0, 0, 0,
182+
-1, -2, -1};
183+
184+
// generate kernel
185+
int kernel_size = 5;
186+
struct ggml_tensor* gkernel = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, kernel_size, kernel_size, 1, 1);
187+
struct ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
188+
memcpy(sf_kx->data, kX, ggml_nbytes(sf_kx));
189+
struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
190+
memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky));
191+
gaussian_kernel(gkernel);
192+
struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
193+
struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1);
194+
struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray);
195+
struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray);
196+
struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray);
197+
struct ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray);
198+
sd_image_to_tensor(img, image);
199+
grayscale(image, image_gray);
200+
convolve(image_gray, image_gray, gkernel, 2);
201+
convolve(image_gray, iX, sf_kx, 1);
202+
convolve(image_gray, iY, sf_ky, 1);
203+
prop_hypot(iX, iY, G);
204+
normalize_tensor(G);
205+
prop_arctan2(iX, iY, tetha);
206+
non_max_supression(image_gray, G, tetha);
207+
threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong);
208+
// to RGB channels
209+
for (int iy = 0; iy < height; iy++) {
210+
for (int ix = 0; ix < width; ix++) {
211+
float gray = ggml_tensor_get_f32(image_gray, ix, iy);
212+
gray = inverse ? 1.0f - gray : gray;
213+
ggml_tensor_set_f32(image, gray, ix, iy);
214+
ggml_tensor_set_f32(image, gray, ix, iy, 1);
215+
ggml_tensor_set_f32(image, gray, ix, iy, 2);
216+
}
217+
}
218+
free(img);
219+
uint8_t* output = sd_tensor_to_image(image);
220+
ggml_free(work_ctx);
221+
return output;
222+
}

0 commit comments

Comments
 (0)