在Android端使用OpenGL的compute shader加速計算

compute shader的介紹和使用參看博客使用compute shader進行通用計算及示例

在Android端使用compute shader需要OpenGL ES3.1,即Android5.1以上的平臺。可能是oples的原因,在Android上使用compute shader有幾個注意要點:

  • 生成texture時不能使用glTexIamge2D, 需使用glTexStorage2D,然後使用glTexSubImage2D將數據賦予texture
  • 在寫shader時,輸入輸出image2D需要顯式地用限定符readonly或writeonly限定其讀寫權限,不然編譯shader程序會失敗
  • 注意生成texture時的level值需要與數據格式對應
layout(binding = 0, rgba32f) readonly uniform  image2D input_image;
layout(binding = 1, rgba32f) writeonly uniform  image2D output_image;

在Android上使用opengl最方便地做法就是就是使用GLSurfaceView生成EGL環境,具體用法不清楚的話可以參看網上教程,有很多,這裏不再詳述。這個例子仍然是生成模擬數據,然後通過compute shader對數據做一些加法後再讀回。

先在Activity中設置EGL環境

public class ComputeActivity extends Activity {

    private GLSurfaceView glsv;

    @Override
    protected void onCreate(@Nullable Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_compute);
        glsv = findViewById(R.id.glsv);
        glsv.setEGLContextClientVersion(3);
        glsv.setRenderer(new ComputeRender(this));
        glsv.setRenderMode(GLSurfaceView.RENDERMODE_WHEN_DIRTY);
    }
}

在ComputeRender中生成模擬數據

    private FloatBuffer createInputBuffer() {
        FloatBuffer floatBuffer = FloatBuffer.allocate(mSize);
        for (int i = 0; i < mSize; i++) {
            floatBuffer.put(i);
        }
        floatBuffer.position(0);
        return floatBuffer;
    }

生成FrameBuffer和Texture

    public void createEnvi() {
        GLES31.glGenFramebuffers(1, fFrame, 0);
        GLES31.glBindFramebuffer(GLES31.GL_FRAMEBUFFER, fFrame[0]);
        GLES31.glGenTextures(3, fTexture, 0);
        for (int i = 0; i < 3; i++) {
            GLES31.glBindTexture(GLES31.GL_TEXTURE_2D, fTexture[i]);
            GLES31.glTexStorage2D(GLES31.GL_TEXTURE_2D, 1, GLES31.GL_RGBA32F, mWidth, mHeight);
            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
            GLES31.glBindTexture(GL_TEXTURE_2D, 0);
        }
        GLES31.glFramebufferTexture2D(GLES31.GL_FRAMEBUFFER, GLES31.GL_COLOR_ATTACHMENT0,
                GLES31.GL_TEXTURE_2D, fTexture[0], 0);
        GLES31.glFramebufferTexture2D(GLES31.GL_FRAMEBUFFER, GLES31.GL_COLOR_ATTACHMENT1,
                GLES31.GL_TEXTURE_2D, fTexture[1], 0);
        GLES31.glFramebufferTexture2D(GLES31.GL_FRAMEBUFFER, GLES31.GL_COLOR_ATTACHMENT2,
                GLES31.GL_TEXTURE_2D, fTexture[2], 0);
    }

綁定數據和Texture

    private void transferToTexture(Buffer data, int texID) {
        GLES31.glBindTexture(GLES31.GL_TEXTURE_2D, texID);
        GLES31.glTexSubImage2D(GLES31.GL_TEXTURE_2D, 0, 0, 0, mWidth, mHeight, GLES31.GL_RGBA, GLES31.GL_FLOAT, data);
    }

創建並鏈接shader程序

#version 310 es

layout (local_size_x = 32, local_size_y = 32, local_size_z = 1) in;

uniform float v[1000];
layout(binding = 0, rgba32f) readonly uniform  image2D input_image;
layout(binding = 1, rgba32f) writeonly uniform  image2D output_image;

shared vec4 scanline[32][32];

void main(void)
{
    ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
    scanline[pos.x][pos.y] = imageLoad(input_image, pos);
    barrier();
    vec4 data = scanline[pos.x][pos.y];
    data.r = data.r + v[999] ;
    data.g = data.g;
    data.b = data.b;
    data.a = data.a;
    imageStore(output_image, pos.xy, data);
}
    private void initGLSL() {
        mComputeProg = GLES31.glCreateProgram();
        String source = ShaderUtils.loadFromAssetsFile("compute.cs", mContext.getResources());
        ShaderUtils.vglAttachShaderSource(mComputeProg, GLES31.GL_COMPUTE_SHADER, source);
        GLES31.glLinkProgram(mComputeProg);
    }

執行計算

    private void performCompute(int inputTeture, int outputTexture) {
        GLES31.glUseProgram(mComputeProg);
        GLES31.glUniform1fv(GLES31.glGetUniformLocation(mComputeProg, "v"), mValueSize, mValueBuffer);

        GLES31.glBindImageTexture(0, inputTeture, 0, false, 0, GLES31.GL_READ_ONLY, GLES31.GL_RGBA32F);
        GLES31.glBindImageTexture(1, outputTexture, 0, false, 0, GLES31.GL_WRITE_ONLY, GLES31.GL_RGBA32F);

        GLES31.glDispatchCompute(1, 1, 1);
        GLES31.glMemoryBarrier(GLES31.GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
    }

讀回數據

    @Override
    public void onDrawFrame(GL10 gl) {
        createEnvi();
        transferToTexture(mInputBuffer, fTexture[0]);
        FloatBuffer a0 = FloatBuffer.allocate(mSize);
        FloatBuffer a1 = FloatBuffer.allocate(mSize);
        FloatBuffer a2 = FloatBuffer.allocate(mSize);

        long begin = System.currentTimeMillis();

        performCompute(fTexture[0], fTexture[1]);
        performCompute(fTexture[1], fTexture[2]);

        Log.w(TAG, "total compute spent:" + (System.currentTimeMillis() - begin));
        GLES31.glReadBuffer(GLES31.GL_COLOR_ATTACHMENT0);
        GLES31.glReadPixels(0, 0, mWidth, mHeight, GLES31.GL_RGBA, GLES31.GL_FLOAT, a0);
        GLES31.glReadBuffer(GLES31.GL_COLOR_ATTACHMENT1);
        GLES31.glReadPixels(0, 0, mWidth, mHeight, GLES31.GL_RGBA, GLES31.GL_FLOAT, a1);
        GLES31.glReadBuffer(GLES31.GL_COLOR_ATTACHMENT2);
        GLES31.glReadPixels(0, 0, mWidth, mHeight, GLES31.GL_RGBA, GLES31.GL_FLOAT, a2);
        float[] o1 = a0.array();
        float[] o2 = a1.array();
        float[] o3 = a2.array();
    }

最後可以觀察o1,o2,o3三個數據數據是否正確。經測試通過compute shader計算,運行200次計算着色器計算,也僅耗時5~7ms。因此用來做移動端深度學習加速完全可行。

全部代碼

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章