xbox上的Pixel Shader性能

| 我有一个与XNA一起使用的pixelshader(下图)。在我的笔记本电脑(cr脚的图形卡)上,它有点生涩,但是还可以。我刚刚尝试在xbox上运行它,这太可怕了! 游戏没有任何内容(它只是一个分形渲染器),因此它必须是引起问题的像素着色器。我还认为这是PS代码,因为我降低了迭代次数,没关系。我也检查过了,GC增量为零。 在xbox上有没有禁止的HLSL函数?我在这里一定做错了,性能不会那么差!
#include \"FractalBase.fxh\"

float ZPower;

float3 Colour;
float3 ColourScale;

float ComAbs(float2 Arg)
{
    return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}

float2 ComPow(float2 Arg, float Power)
{
    float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
    float Ang = atan2(Arg.y, Arg.x) * Power;

    return float2(Mod * cos(Ang), Mod * sin(Ang));
}

float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
    float2 c = texCoord.xy;
    float2 z = 0;

    float i;

    float oldBailoutTest = 0;
    float bailoutTest = 0;

    for(i = 0; i < Iterations; i++)
    {
        z = ComPow(z, ZPower) + c;

        bailoutTest = z.x * z.x + z.y * z.y;

        if(bailoutTest >= ZPower * ZPower)
        {
            break;
        }

        oldBailoutTest = bailoutTest;
    }

    float normalisedIterations = i / Iterations;
    float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);

    float4 Result = normalisedIterations + (1 / factor / Iterations);

    Result = (i >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);

    return Result;
}

technique Technique1
{
    pass
    {
        VertexShader = compile vs_3_0 SpriteVertexShader();
        PixelShader = compile ps_3_0 FractalPixelShader(128);
    }
}
以下是FractalBase.fxh:
float4x4 MatrixTransform : register(vs, c0);

float2 Pan;
float Zoom;
float Aspect;

void SpriteVertexShader(inout float4 Colour    : COLOR0,
                        inout float2 texCoord : TEXCOORD0,
                        inout float4 position : SV_Position)
{
    position = mul(position, MatrixTransform);

    // Convert the position into from screen space into complex coordinates
    texCoord = (position) * Zoom * float2(1, Aspect) - float2(Pan.x, -Pan.y);
}
编辑我确实尝试通过使用大量的removing来去除条件,但是当我这样做时,我得到了很多文物(而不是“属于博物馆的那种”!)。我改变了周围的情况,并修复了一些逻辑错误,但是关键是将GreaterThan结果乘以1 + epsilon,以解决仅使0.9999 = 0(整数)的舍入错误。请参阅下面的固定代码:
#include \"FractalBase.fxh\"

float ZPower;

float3 Colour;
float3 ColourScale;

float ComAbs(float2 Arg)
{
    return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}

float2 ComPow(float2 Arg, float Power)
{
    float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
    float Ang = atan2(Arg.y, Arg.x) * Power;

    return float2(Mod * cos(Ang), Mod * sin(Ang));
}

float GreaterThan(float x, float y)
{
    return ((x - y) / (2 * abs(x - y)) + 0.5) * 1.001;
}

float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
    float2 c = texCoord.xy;
    float2 z = 0;

    int i;

    float oldBailoutTest = 0;
    float bailoutTest = 0;

    int KeepGoing = 1;

    int DoneIterations = Iterations;

    int Bailout = 0;

    for(i = 0; i < Iterations; i++)
    {
        z = lerp(z, ComPow(z, ZPower) + c, KeepGoing);

        bailoutTest = lerp(bailoutTest, z.x * z.x + z.y * z.y, KeepGoing);

        Bailout = lerp(Bailout, GreaterThan(bailoutTest, ZPower * ZPower), -abs(Bailout) + 1);

        KeepGoing = lerp(KeepGoing, 0.0, Bailout);
        DoneIterations = lerp(DoneIterations, min(i, DoneIterations), Bailout);

        oldBailoutTest = lerp(oldBailoutTest, bailoutTest, KeepGoing);
    }

    float normalisedIterations = DoneIterations / Iterations;
    float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);

    float4 Result = normalisedIterations + (1 / factor / Iterations);

    Result = (DoneIterations >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);

    return Result;
}

technique Technique1
{
    pass
    {
        VertexShader = compile vs_3_0 SpriteVertexShader();
        PixelShader = compile ps_3_0 FractalPixelShader(128);
    }
}
    
已邀请:
xbox的块大小非常大,因此xbox上的分支并不总是那么大。同样,编译器并非总是最有效地发出代码似乎使用的动态分支。 查看分支属性:http://msdn.microsoft.com/zh-cn/library/bb313972%28v=xnagamestudio.31%29.aspx 另外,如果您提早进行了救助,PC是否会与Xbox更加相似? 请记住,现代图形卡实际上实际上比Xenon单元要快得多。     

要回复问题请先登录注册