|
| 1 | +#include "preprocessing.hpp" |
| 2 | +#define M_PI_ 3.14159265358979323846 |
| 3 | + |
| 4 | +void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) { |
| 5 | + struct ggml_init_params params; |
| 6 | + params.mem_size = 20 * 1024 * 1024; // 10 |
| 7 | + params.mem_buffer = NULL; |
| 8 | + params.no_alloc = false; |
| 9 | + struct ggml_context* ctx0 = ggml_init(params); |
| 10 | + struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1); |
| 11 | + ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel)); |
| 12 | + ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1); |
| 13 | + ggml_cgraph* gf = ggml_new_graph(ctx0); |
| 14 | + ggml_build_forward_expand(gf, ggml_cpy(ctx0, h, output)); |
| 15 | + ggml_graph_compute_with_ctx(ctx0, gf, 1); |
| 16 | + ggml_free(ctx0); |
| 17 | +} |
| 18 | + |
| 19 | +void gaussian_kernel(struct ggml_tensor* kernel) { |
| 20 | + int ks_mid = kernel->ne[0] / 2; |
| 21 | + float sigma = 1.4f; |
| 22 | + float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f)); |
| 23 | + for (int y = 0; y < kernel->ne[0]; y++) { |
| 24 | + float gx = -ks_mid + y; |
| 25 | + for (int x = 0; x < kernel->ne[1]; x++) { |
| 26 | + float gy = -ks_mid + x; |
| 27 | + float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal; |
| 28 | + ggml_tensor_set_f32(kernel, k_, x, y); |
| 29 | + } |
| 30 | + } |
| 31 | +} |
| 32 | + |
| 33 | +void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) { |
| 34 | + for (int iy = 0; iy < rgb_img->ne[1]; iy++) { |
| 35 | + for (int ix = 0; ix < rgb_img->ne[0]; ix++) { |
| 36 | + float r = ggml_tensor_get_f32(rgb_img, ix, iy); |
| 37 | + float g = ggml_tensor_get_f32(rgb_img, ix, iy, 1); |
| 38 | + float b = ggml_tensor_get_f32(rgb_img, ix, iy, 2); |
| 39 | + float gray = 0.2989f * r + 0.5870f * g + 0.1140f * b; |
| 40 | + ggml_tensor_set_f32(grayscale, gray, ix, iy); |
| 41 | + } |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { |
| 46 | + int n_elements = ggml_nelements(h); |
| 47 | + float* dx = (float*)x->data; |
| 48 | + float* dy = (float*)y->data; |
| 49 | + float* dh = (float*)h->data; |
| 50 | + for (int i = 0; i < n_elements; i++) { |
| 51 | + dh[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]); |
| 52 | + } |
| 53 | +} |
| 54 | + |
| 55 | +void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { |
| 56 | + int n_elements = ggml_nelements(h); |
| 57 | + float* dx = (float*)x->data; |
| 58 | + float* dy = (float*)y->data; |
| 59 | + float* dh = (float*)h->data; |
| 60 | + for (int i = 0; i < n_elements; i++) { |
| 61 | + dh[i] = atan2f(dy[i], dx[i]); |
| 62 | + } |
| 63 | +} |
| 64 | + |
| 65 | +void normalize_tensor(struct ggml_tensor* g) { |
| 66 | + int n_elements = ggml_nelements(g); |
| 67 | + float* dg = (float*)g->data; |
| 68 | + float max = -INFINITY; |
| 69 | + for (int i = 0; i < n_elements; i++) { |
| 70 | + max = dg[i] > max ? dg[i] : max; |
| 71 | + } |
| 72 | + max = 1.0f / max; |
| 73 | + for (int i = 0; i < n_elements; i++) { |
| 74 | + dg[i] *= max; |
| 75 | + } |
| 76 | +} |
| 77 | + |
| 78 | +void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struct ggml_tensor* D) { |
| 79 | + for (int iy = 1; iy < result->ne[1] - 1; iy++) { |
| 80 | + for (int ix = 1; ix < result->ne[0] - 1; ix++) { |
| 81 | + float angle = ggml_tensor_get_f32(D, ix, iy) * 180.0f / M_PI_; |
| 82 | + angle = angle < 0.0f ? angle += 180.0f : angle; |
| 83 | + float q = 1.0f; |
| 84 | + float r = 1.0f; |
| 85 | + |
| 86 | + // angle 0 |
| 87 | + if ((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)) { |
| 88 | + q = ggml_tensor_get_f32(G, ix, iy + 1); |
| 89 | + r = ggml_tensor_get_f32(G, ix, iy - 1); |
| 90 | + } |
| 91 | + // angle 45 |
| 92 | + else if (22.5f >= angle && angle < 67.5f) { |
| 93 | + q = ggml_tensor_get_f32(G, ix + 1, iy - 1); |
| 94 | + r = ggml_tensor_get_f32(G, ix - 1, iy + 1); |
| 95 | + } |
| 96 | + // angle 90 |
| 97 | + else if (67.5f >= angle && angle < 112.5) { |
| 98 | + q = ggml_tensor_get_f32(G, ix + 1, iy); |
| 99 | + r = ggml_tensor_get_f32(G, ix - 1, iy); |
| 100 | + } |
| 101 | + // angle 135 |
| 102 | + else if (112.5 >= angle && angle < 157.5f) { |
| 103 | + q = ggml_tensor_get_f32(G, ix - 1, iy - 1); |
| 104 | + r = ggml_tensor_get_f32(G, ix + 1, iy + 1); |
| 105 | + } |
| 106 | + |
| 107 | + float cur = ggml_tensor_get_f32(G, ix, iy); |
| 108 | + if ((cur >= q) && (cur >= r)) { |
| 109 | + ggml_tensor_set_f32(result, cur, ix, iy); |
| 110 | + } else { |
| 111 | + ggml_tensor_set_f32(result, 0.0f, ix, iy); |
| 112 | + } |
| 113 | + } |
| 114 | + } |
| 115 | +} |
| 116 | + |
| 117 | +void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float low_threshold, float weak, float strong) { |
| 118 | + int n_elements = ggml_nelements(img); |
| 119 | + float* imd = (float*)img->data; |
| 120 | + float max = -INFINITY; |
| 121 | + for (int i = 0; i < n_elements; i++) { |
| 122 | + max = imd[i] > max ? imd[i] : max; |
| 123 | + } |
| 124 | + float ht = max * high_threshold; |
| 125 | + float lt = ht * low_threshold; |
| 126 | + for (int i = 0; i < n_elements; i++) { |
| 127 | + float img_v = imd[i]; |
| 128 | + if (img_v >= ht) { // strong pixel |
| 129 | + imd[i] = strong; |
| 130 | + } else if (img_v <= ht && img_v >= lt) { // strong pixel |
| 131 | + imd[i] = weak; |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + for (int iy = 0; iy < img->ne[1]; iy++) { |
| 136 | + for (int ix = 0; ix < img->ne[0]; ix++) { |
| 137 | + if (ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) { |
| 138 | + ggml_tensor_set_f32(img, ggml_tensor_get_f32(img, ix, iy), ix, iy); |
| 139 | + } else { |
| 140 | + ggml_tensor_set_f32(img, 0.0f, ix, iy); |
| 141 | + } |
| 142 | + } |
| 143 | + } |
| 144 | + |
| 145 | + // hysteresis |
| 146 | + for (int iy = 1; iy < img->ne[1] - 1; iy++) { |
| 147 | + for (int ix = 1; ix < img->ne[0] - 1; ix++) { |
| 148 | + float imd_v = ggml_tensor_get_f32(img, ix, iy); |
| 149 | + if (imd_v == weak) { |
| 150 | + if (ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong || |
| 151 | + ggml_tensor_get_f32(img, ix, iy - 1) == strong || ggml_tensor_get_f32(img, ix, iy + 1) == strong || |
| 152 | + ggml_tensor_get_f32(img, ix - 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy) == strong) { |
| 153 | + ggml_tensor_set_f32(img, strong, ix, iy); |
| 154 | + } else { |
| 155 | + ggml_tensor_set_f32(img, 0.0f, ix, iy); |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | + } |
| 160 | +} |
| 161 | + |
| 162 | +uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) { |
| 163 | + struct ggml_init_params params; |
| 164 | + params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 |
| 165 | + params.mem_buffer = NULL; |
| 166 | + params.no_alloc = false; |
| 167 | + struct ggml_context* work_ctx = ggml_init(params); |
| 168 | + |
| 169 | + if (!work_ctx) { |
| 170 | + LOG_ERROR("ggml_init() failed"); |
| 171 | + return NULL; |
| 172 | + } |
| 173 | + |
| 174 | + float kX[9] = { |
| 175 | + -1, 0, 1, |
| 176 | + -2, 0, 2, |
| 177 | + -1, 0, 1}; |
| 178 | + |
| 179 | + float kY[9] = { |
| 180 | + 1, 2, 1, |
| 181 | + 0, 0, 0, |
| 182 | + -1, -2, -1}; |
| 183 | + |
| 184 | + // generate kernel |
| 185 | + int kernel_size = 5; |
| 186 | + struct ggml_tensor* gkernel = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, kernel_size, kernel_size, 1, 1); |
| 187 | + struct ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); |
| 188 | + memcpy(sf_kx->data, kX, ggml_nbytes(sf_kx)); |
| 189 | + struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); |
| 190 | + memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky)); |
| 191 | + gaussian_kernel(gkernel); |
| 192 | + struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); |
| 193 | + struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1); |
| 194 | + struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray); |
| 195 | + struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray); |
| 196 | + struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray); |
| 197 | + struct ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray); |
| 198 | + sd_image_to_tensor(img, image); |
| 199 | + grayscale(image, image_gray); |
| 200 | + convolve(image_gray, image_gray, gkernel, 2); |
| 201 | + convolve(image_gray, iX, sf_kx, 1); |
| 202 | + convolve(image_gray, iY, sf_ky, 1); |
| 203 | + prop_hypot(iX, iY, G); |
| 204 | + normalize_tensor(G); |
| 205 | + prop_arctan2(iX, iY, tetha); |
| 206 | + non_max_supression(image_gray, G, tetha); |
| 207 | + threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong); |
| 208 | + // to RGB channels |
| 209 | + for (int iy = 0; iy < height; iy++) { |
| 210 | + for (int ix = 0; ix < width; ix++) { |
| 211 | + float gray = ggml_tensor_get_f32(image_gray, ix, iy); |
| 212 | + gray = inverse ? 1.0f - gray : gray; |
| 213 | + ggml_tensor_set_f32(image, gray, ix, iy); |
| 214 | + ggml_tensor_set_f32(image, gray, ix, iy, 1); |
| 215 | + ggml_tensor_set_f32(image, gray, ix, iy, 2); |
| 216 | + } |
| 217 | + } |
| 218 | + free(img); |
| 219 | + uint8_t* output = sd_tensor_to_image(image); |
| 220 | + ggml_free(work_ctx); |
| 221 | + return output; |
| 222 | +} |
0 commit comments