[Impeller] Fix GLES gaussian implementation. (#55329)
Fixes https://github.com/flutter/flutter/issues/142355
problems: no uniform structs, no int uniforms, buffer bindings dont work when the struct type doesn't match the uniform name ð
diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc
index 1b595ca..4c7af0b 100644
--- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc
+++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc
@@ -12,6 +12,7 @@
#include "impeller/entity/texture_downsample.frag.h"
#include "impeller/entity/texture_fill.frag.h"
#include "impeller/entity/texture_fill.vert.h"
+#include "impeller/geometry/color.h"
#include "impeller/renderer/render_pass.h"
#include "impeller/renderer/vertex_buffer_builder.h"
@@ -316,7 +317,7 @@
fml::StatusOr<RenderTarget> MakeDownsampleSubpass(
const ContentContext& renderer,
const std::shared_ptr<CommandBuffer>& command_buffer,
- std::shared_ptr<Texture> input_texture,
+ const std::shared_ptr<Texture>& input_texture,
const SamplerDescriptor& sampler_descriptor,
const DownsamplePassArgs& pass_args,
Entity::TileMode tile_mode) {
@@ -338,7 +339,8 @@
TextureFillVertexShader::FrameInfo frame_info;
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1));
- frame_info.texture_sampler_y_coord_scale = 1.0;
+ frame_info.texture_sampler_y_coord_scale =
+ input_texture->GetYCoordScale();
TextureFillFragmentShader::FragInfo frag_info;
frag_info.alpha = 1.0;
@@ -391,7 +393,8 @@
TextureFillVertexShader::FrameInfo frame_info;
frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1));
- frame_info.texture_sampler_y_coord_scale = 1.0;
+ frame_info.texture_sampler_y_coord_scale =
+ input_texture->GetYCoordScale();
TextureDownsampleFragmentShader::FragInfo frag_info;
frag_info.edge = edge;
@@ -442,16 +445,18 @@
return input_pass;
}
- std::shared_ptr<Texture> input_texture = input_pass.GetRenderTargetTexture();
+ const std::shared_ptr<Texture>& input_texture =
+ input_pass.GetRenderTargetTexture();
// TODO(gaaclarke): This blurs the whole image, but because we know the clip
// region we could focus on just blurring that.
ISize subpass_size = input_texture->GetSize();
ContentContext::SubpassCallback subpass_callback =
[&](const ContentContext& renderer, RenderPass& pass) {
- GaussianBlurVertexShader::FrameInfo frame_info{
- .mvp = Matrix::MakeOrthographic(ISize(1, 1)),
- .texture_sampler_y_coord_scale = 1.0};
+ GaussianBlurVertexShader::FrameInfo frame_info;
+ frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1)),
+ frame_info.texture_sampler_y_coord_scale =
+ input_texture->GetYCoordScale();
HostBuffer& host_buffer = renderer.GetTransientsBuffer();
@@ -476,11 +481,9 @@
linear_sampler_descriptor));
GaussianBlurVertexShader::BindFrameInfo(
pass, host_buffer.EmplaceUniform(frame_info));
- GaussianBlurPipeline::FragmentShader::KernelSamples kernel_samples =
- LerpHackKernelSamples(GenerateBlurInfo(blur_info));
- FML_CHECK(kernel_samples.sample_count <= kGaussianBlurMaxKernelSize);
GaussianBlurFragmentShader::BindKernelSamples(
- pass, host_buffer.EmplaceUniform(kernel_samples));
+ pass, host_buffer.EmplaceUniform(
+ LerpHackKernelSamples(GenerateBlurInfo(blur_info))));
return pass.Draw().ok();
};
if (destination_target.has_value()) {
@@ -893,7 +896,7 @@
Scalar tally = 0.0f;
for (int i = 0; i < result.sample_count; ++i) {
int x = x_offset + (i * parameters.step_size) - parameters.blur_radius;
- result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{
+ result.samples[i] = KernelSample{
.uv_offset = parameters.blur_uv_offset * x,
.coefficient = expf(-0.5f * (x * x) /
(parameters.blur_sigma * parameters.blur_sigma)) /
@@ -914,25 +917,31 @@
// between the samples.
GaussianBlurPipeline::FragmentShader::KernelSamples LerpHackKernelSamples(
KernelSamples parameters) {
- GaussianBlurPipeline::FragmentShader::KernelSamples result;
+ GaussianBlurPipeline::FragmentShader::KernelSamples result = {};
result.sample_count = ((parameters.sample_count - 1) / 2) + 1;
int32_t middle = result.sample_count / 2;
int32_t j = 0;
FML_DCHECK(result.sample_count <= kGaussianBlurMaxKernelSize);
+ static_assert(sizeof(result.sample_data) ==
+ sizeof(std::array<Vector4, kGaussianBlurMaxKernelSize>));
+
for (int i = 0; i < result.sample_count; i++) {
if (i == middle) {
- result.samples[i] = parameters.samples[j++];
+ result.sample_data[i].x = parameters.samples[j].uv_offset.x;
+ result.sample_data[i].y = parameters.samples[j].uv_offset.y;
+ result.sample_data[i].z = parameters.samples[j].coefficient;
+ j++;
} else {
- GaussianBlurPipeline::FragmentShader::KernelSample left =
- parameters.samples[j];
- GaussianBlurPipeline::FragmentShader::KernelSample right =
- parameters.samples[j + 1];
- result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{
- .uv_offset = (left.uv_offset * left.coefficient +
- right.uv_offset * right.coefficient) /
- (left.coefficient + right.coefficient),
- .coefficient = left.coefficient + right.coefficient,
- };
+ KernelSample left = parameters.samples[j];
+ KernelSample right = parameters.samples[j + 1];
+
+ result.sample_data[i].z = left.coefficient + right.coefficient;
+
+ Point uv = (left.uv_offset * left.coefficient +
+ right.uv_offset * right.coefficient) /
+ (left.coefficient + right.coefficient);
+ result.sample_data[i].x = uv.x;
+ result.sample_data[i].y = uv.y;
j += 2;
}
}
diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.h b/impeller/entity/contents/filters/gaussian_blur_filter_contents.h
index 4408858..b24c87f 100644
--- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.h
+++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.h
@@ -9,12 +9,16 @@
#include "impeller/entity/contents/content_context.h"
#include "impeller/entity/contents/filters/filter_contents.h"
#include "impeller/entity/geometry/geometry.h"
+#include "impeller/geometry/color.h"
namespace impeller {
// Comes from gaussian.frag.
static constexpr int32_t kGaussianBlurMaxKernelSize = 50;
+static_assert(sizeof(GaussianBlurPipeline::FragmentShader::KernelSamples) ==
+ sizeof(Vector4) * kGaussianBlurMaxKernelSize + sizeof(Vector4));
+
struct BlurParameters {
Point blur_uv_offset;
Scalar blur_sigma;
@@ -22,6 +26,11 @@
int step_size;
};
+struct KernelSample {
+ Vector2 uv_offset;
+ float coefficient;
+};
+
/// A larger mirror of GaussianBlurPipeline::FragmentShader::KernelSamples.
///
/// This is a mirror of GaussianBlurPipeline::FragmentShader::KernelSamples that
@@ -30,7 +39,7 @@
struct KernelSamples {
static constexpr int kMaxKernelSize = kGaussianBlurMaxKernelSize * 2;
int sample_count;
- GaussianBlurPipeline::FragmentShader::KernelSample samples[kMaxKernelSize];
+ KernelSample samples[kMaxKernelSize];
};
KernelSamples GenerateBlurInfo(BlurParameters parameters);
diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc
index 59618db..26449a2 100644
--- a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc
+++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc
@@ -9,6 +9,7 @@
#include "impeller/entity/contents/filters/gaussian_blur_filter_contents.h"
#include "impeller/entity/contents/texture_contents.h"
#include "impeller/entity/entity_playground.h"
+#include "impeller/geometry/color.h"
#include "impeller/geometry/geometry_asserts.h"
#include "impeller/renderer/testing/mocks.h"
@@ -51,6 +52,14 @@
return x;
}
+Scalar GetCoefficient(const Vector4& vec) {
+ return vec.z;
+}
+
+Vector2 GetUVOffset(const Vector4& vec) {
+ return vec.xy();
+}
+
fml::StatusOr<Scalar> CalculateSigmaForBlurRadius(
Scalar radius,
const Matrix& effect_transform) {
@@ -508,27 +517,24 @@
},
};
- GaussianBlurPipeline::FragmentShader::KernelSamples fast_kernel_samples =
+ GaussianBlurPipeline::FragmentShader::KernelSamples blur_info =
LerpHackKernelSamples(kernel_samples);
- EXPECT_EQ(fast_kernel_samples.sample_count, 3);
+ EXPECT_EQ(blur_info.sample_count, 3);
- GaussianBlurPipeline::FragmentShader::KernelSample* samples =
- kernel_samples.samples;
- GaussianBlurPipeline::FragmentShader::KernelSample* fast_samples =
- fast_kernel_samples.samples;
+ KernelSample* samples = kernel_samples.samples;
//////////////////////////////////////////////////////////////////////////////
// Check output kernel.
- EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.x, -1.3333333);
- EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.y, 0);
- EXPECT_FLOAT_EQ(fast_samples[0].coefficient, 0.3);
- EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.x, 0);
- EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.y, 0);
- EXPECT_FLOAT_EQ(fast_samples[1].coefficient, 0.4);
- EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.x, 1.3333333);
- EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.y, 0);
- EXPECT_FLOAT_EQ(fast_samples[2].coefficient, 0.3);
+ EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[0]),
+ Point(-1.3333333, 0));
+ EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[0]), 0.3);
+
+ EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[1]), Point(0, 0));
+ EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[1]), 0.4);
+
+ EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[2]), Point(1.333333, 0));
+ EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[2]), 0.3);
//////////////////////////////////////////////////////////////////////////////
// Check output of fast kernel versus original kernel.
@@ -549,11 +555,11 @@
}
};
Scalar fast_output =
- /*1st*/ lerp(fast_samples[0].uv_offset, data[0], data[1]) *
- fast_samples[0].coefficient +
- /*2nd*/ data[2] * fast_samples[1].coefficient +
- /*3rd*/ lerp(fast_samples[2].uv_offset, data[3], data[4]) *
- fast_samples[2].coefficient;
+ /*1st*/ lerp(GetUVOffset(blur_info.sample_data[0]), data[0], data[1]) *
+ GetCoefficient(blur_info.sample_data[0]) +
+ /*2nd*/ data[2] * GetCoefficient(blur_info.sample_data[1]) +
+ /*3rd*/ lerp(GetUVOffset(blur_info.sample_data[2]), data[3], data[4]) *
+ GetCoefficient(blur_info.sample_data[2]);
EXPECT_NEAR(original_output, fast_output, 0.01);
}
@@ -604,9 +610,9 @@
}
Scalar fast_output = 0.0;
- for (int i = 0; i < fast_kernel_samples.sample_count; ++i) {
- auto sample = fast_kernel_samples.samples[i];
- fast_output += sample.coefficient * sampler(sample.uv_offset);
+ for (int i = 0; i < fast_kernel_samples.sample_count; i++) {
+ fast_output += GetCoefficient(fast_kernel_samples.sample_data[i]) *
+ sampler(GetUVOffset(fast_kernel_samples.sample_data[i]));
}
EXPECT_NEAR(output, fast_output, 0.1);
diff --git a/impeller/entity/shaders/filters/gaussian.frag b/impeller/entity/shaders/filters/gaussian.frag
index f83a599..a6d58f8 100644
--- a/impeller/entity/shaders/filters/gaussian.frag
+++ b/impeller/entity/shaders/filters/gaussian.frag
@@ -11,16 +11,13 @@
layout(constant_id = 0) const float supports_decal = 1.0;
-struct KernelSample {
- vec2 uv_offset;
- float coefficient;
-};
-
uniform KernelSamples {
- int sample_count;
- KernelSample samples[50];
+ float sample_count;
+
+ // X, Y are uv offset and Z is Coefficient. W is padding.
+ vec4 sample_data[50];
}
-blur_info;
+kernel_samples;
f16vec4 Sample(f16sampler2D tex, vec2 coords) {
if (supports_decal == 1.0) {
@@ -36,11 +33,11 @@
void main() {
f16vec4 total_color = f16vec4(0.0hf);
- for (int i = 0; i < blur_info.sample_count; ++i) {
- float16_t coefficient = float16_t(blur_info.samples[i].coefficient);
- total_color +=
- coefficient * Sample(texture_sampler,
- v_texture_coords + blur_info.samples[i].uv_offset);
+ for (int i = 0; i < int(kernel_samples.sample_count); i++) {
+ float16_t coefficient = float16_t(kernel_samples.sample_data[i].z);
+ total_color += coefficient *
+ Sample(texture_sampler,
+ v_texture_coords + kernel_samples.sample_data[i].xy);
}
frag_color = total_color;
diff --git a/impeller/geometry/vector.h b/impeller/geometry/vector.h
index 31e894c..d1358bf 100644
--- a/impeller/geometry/vector.h
+++ b/impeller/geometry/vector.h
@@ -310,6 +310,8 @@
return *this + (v - *this) * t;
}
+ constexpr Vector2 xy() const { return Vector2(x, y); }
+
std::string ToString() const;
};
diff --git a/impeller/renderer/backend/gles/buffer_bindings_gles.cc b/impeller/renderer/backend/gles/buffer_bindings_gles.cc
index 4067498..71d186f 100644
--- a/impeller/renderer/backend/gles/buffer_bindings_gles.cc
+++ b/impeller/renderer/backend/gles/buffer_bindings_gles.cc
@@ -279,20 +279,20 @@
auto* buffer_data =
reinterpret_cast<const GLfloat*>(buffer_ptr + member.offset);
- std::vector<uint8_t> array_element_buffer;
- if (element_count > 1) {
- // When binding uniform arrays, the elements must be contiguous. Copy
- // the uniforms to a temp buffer to eliminate any padding needed by the
- // other backends.
- array_element_buffer.resize(member.size * element_count);
+ // When binding uniform arrays, the elements must be contiguous. Copy
+ // the uniforms to a temp buffer to eliminate any padding needed by the
+ // other backends if the array elements have padding.
+ std::vector<uint8_t> array_element_buffer_;
+ if (element_count > 1 && element_stride != member.size) {
+ array_element_buffer_.resize(member.size * element_count);
for (size_t element_i = 0; element_i < element_count; element_i++) {
- std::memcpy(array_element_buffer.data() + element_i * member.size,
+ std::memcpy(array_element_buffer_.data() + element_i * member.size,
reinterpret_cast<const char*>(buffer_data) +
element_i * element_stride,
member.size);
}
buffer_data =
- reinterpret_cast<const GLfloat*>(array_element_buffer.data());
+ reinterpret_cast<const GLfloat*>(array_element_buffer_.data());
}
switch (member.type) {
diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json
index c6e8eb4..65d3b54 100644
--- a/impeller/tools/malioc.json
+++ b/impeller/tools/malioc.json
@@ -2581,9 +2581,9 @@
"arith_cvt"
],
"shortest_path_cycles": [
- 0.109375,
+ 0.09375,
0.0,
- 0.109375,
+ 0.09375,
0.0,
0.0,
0.0,
@@ -2593,11 +2593,11 @@
"load_store"
],
"total_cycles": [
- 0.3125,
+ 0.265625,
0.09375,
- 0.3125,
+ 0.265625,
0.0,
- 2.0,
+ 1.0,
0.25,
0.25
]
@@ -2641,10 +2641,11 @@
0.0
],
"total_bound_pipelines": [
+ "arithmetic",
"load_store"
],
"total_cycles": [
- 1.6666666269302368,
+ 2.0,
2.0,
1.0
]