[ardour-dev] AMD64 SSE optimisation
John Rigg
ardev at sound-man.co.uk
Fri Dec 16 05:00:51 PST 2005
Hi
Here's a patch to allow Ardour's SSE optimising code to be used on x86_64.
I've got it running on my AMD64 system but haven't done any performance tests
yet.
If anyone notices any glaring errors or wrong assumptions in the patch,
please let me know. BTW this is just a basic patch and will break
compilation on 32-bit systems.
John
-------------- next part --------------
diff -uprN ardour-0.99/libs/ardour/globals.cc ardour-sse64/libs/ardour/globals.cc
--- ardour-0.99/libs/ardour/globals.cc 2005-09-22 04:26:06.000000000 +0100
+++ ardour-sse64/libs/ardour/globals.cc 2005-12-16 11:32:46.000000000 +0000
@@ -193,15 +193,15 @@ ARDOUR::init (AudioEngine& engine, bool
unsigned int use_sse = 0;
asm volatile (
- "mov $1, %%eax\n"
- "pushl %%ebx\n"
+ "movq $1, %%rax\n"
+ "pushq %%rbx\n"
"cpuid\n"
- "popl %%ebx\n"
- "andl $33554432, %%edx\n"
- "movl %%edx, %0\n"
+ "popq %%rbx\n"
+ "andq $33554432, %%rdx\n"
+ "movq %%rdx, %0\n"
: "=m" (use_sse)
:
- : "%eax", "%ecx", "%edx", "memory");
+ : "%rax", "%rcx", "%rdx", "memory");
if (use_sse) {
cerr << "Enabling SSE optimized routines" << endl;
diff -uprN ardour-0.99/libs/ardour/mix.cc ardour-sse64/libs/ardour/mix.cc
--- ardour-0.99/libs/ardour/mix.cc 2005-09-22 04:26:06.000000000 +0100
+++ ardour-sse64/libs/ardour/mix.cc 2005-12-16 11:32:28.000000000 +0000
@@ -31,7 +31,7 @@
float
debug_compute_peak (ARDOUR::Sample *buf, jack_nframes_t nsamples, float current)
{
- if ( ((int)buf % 16) != 0) {
+ if ( ((long int)buf % 16) != 0) {
cerr << "compute_peak(): buffer unaligned!" << endl;
}
@@ -41,7 +41,7 @@ debug_compute_peak (ARDOUR::Sample *buf,
void
debug_apply_gain_to_buffer (ARDOUR::Sample *buf, jack_nframes_t nframes, float gain)
{
- if ( ((int)buf % 16) != 0) {
+ if ( ((long int)buf % 16) != 0) {
cerr << "apply_gain_to_buffer(): buffer unaligned!" << endl;
}
@@ -51,11 +51,11 @@ debug_apply_gain_to_buffer (ARDOUR::Samp
void
debug_mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, jack_nframes_t nframes, float gain)
{
- if ( ((int)dst & 15) != 0) {
+ if ( ((long int)dst & 15) != 0) {
cerr << "mix_buffers_with_gain(): dst unaligned!" << endl;
}
- if ( ((int)dst & 15) != ((int)src & 15) ) {
+ if ( ((long int)dst & 15) != ((long int)src & 15) ) {
cerr << "mix_buffers_with_gain(): dst & src don't have the same alignment!" << endl;
mix_buffers_with_gain(dst, src, nframes, gain);
} else {
@@ -66,11 +66,11 @@ debug_mix_buffers_with_gain (ARDOUR::Sam
void
debug_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, jack_nframes_t nframes)
{
- if ( ((int)dst & 15) != 0) {
+ if ( ((long int)dst & 15) != 0) {
cerr << "mix_buffers_no_gain(): dst unaligned!" << endl;
}
- if ( ((int)dst & 15) != ((int)src & 15) ) {
+ if ( ((long int)dst & 15) != ((long int)src & 15) ) {
cerr << "mix_buffers_no_gain(): dst & src don't have the same alignment!" << endl;
mix_buffers_no_gain(dst, src, nframes);
} else {
diff -uprN ardour-0.99/libs/ardour/sse_functions.s ardour-sse64/libs/ardour/sse_functions.s
--- ardour-0.99/libs/ardour/sse_functions.s 2005-09-08 20:41:11.000000000 +0100
+++ ardour-sse64/libs/ardour/sse_functions.s 2005-12-16 11:33:10.000000000 +0000
@@ -25,95 +25,95 @@
.type x86_sse_mix_buffers_with_gain, at function
x86_sse_mix_buffers_with_gain:
-#; 8(%ebp) = float *dst = %edi
-#; 12(%ebp) = float *src = %esi
-#; 16(%ebp) = long nframes = %ecx
-#; 20(%ebp) = float gain = st(0)
+#; 8(%rbp) = float *dst = %rdi
+#; 12(%rbp) = float *src = %rsi
+#; 16(%rbp) = long nframes = %rcx
+#; 20(%rbp) = float gain = %st(0)
- pushl %ebp
- movl %esp, %ebp
+ pushq %rbp
+ movq %rsp, %rbp
#; save the registers
-#; pushl %eax
- pushl %ebx
-#; pushl %ecx
- pushl %edi
- pushl %esi
+#; pushq %rax
+ pushq %rbx
+#; pushq %rcx
+ pushq %rdi
+ pushq %rsi
#; if nframes == 0, go to end
- movl 16(%ebp), %ecx #; nframes
- cmp $0, %ecx
+ movq 16(%rbp), %rcx #; nframes
+ cmp $0, %rcx
je .MBWG_END
#; Check for alignment
- movl 8(%ebp), %edi #; dst
- movl 12(%ebp), %esi #; src
+ movq 8(%rbp), %rdi #; dst
+ movq 12(%rbp), %rsi #; src
- movl %edi, %eax
- andl $12, %eax #; mask alignemnt offset
+ movq %rdi, %rax
+ andq $12, %rax #; mask alignment offset
- movl %esi, %ebx
- andl $12, %ebx #; mask alignment offset
+ movq %rsi, %rbx
+ andq $12, %rbx #; mask alignment offset
- cmp %eax, %ebx
+ cmp %rax, %rbx
jne .MBWG_NONALIGN #; if not aligned, calculate manually
#; if we are aligned
- cmp $0, %ebx
+ cmp $0, %rbx
jz .MBWG_SSE
#; Pre-loop, we need to run 1-3 frames "manually" without
#; SSE instructions
- movss 20(%ebp), %xmm1 #; xmm1
+ movss 20(%rbp), %xmm1 #; xmm1
.MBWG_PRELOOP:
- movss (%esi), %xmm0
+ movss (%rsi), %xmm0
mulss %xmm1, %xmm0
- addss (%edi), %xmm0
- movss %xmm0, (%edi)
+ addss (%rdi), %xmm0
+ movss %xmm0, (%rdi)
- addl $4, %edi #; dst++
- addl $4, %esi #; src++
- decl %ecx #; nframes--
+ addq $4, %rdi #; dst++
+ addq $4, %rsi #; src++
+ decq %rcx #; nframes--
jz .MBWG_END
-#; cmp $0, %ecx
+#; cmp $0, %rcx
#; je .MBWG_END #; if we run out of frames, go to end
- addl $4, %ebx
+ addq $4, %rbx
- cmp $16, %ebx #; test if we've reached 16 byte alignment
+ cmp $16, %rbx #; test if we've reached 16 byte alignment
jne .MBWG_PRELOOP
.MBWG_SSE:
- cmp $4, %ecx #; we know it's not zero, but if it's not >=4, then
+ cmp $4, %rcx #; we know it's not zero, but if it's not >=4, then
jnge .MBWG_NONALIGN #; we jump straight to the "normal" code
#; copy gain to fill %xmm1
- movss 20(%ebp), %xmm1
+ movss 20(%rbp), %xmm1
shufps $0x00, %xmm1, %xmm1
.MBWG_SSELOOP:
- movaps (%esi), %xmm0 #; source => xmm0
+ movaps (%rsi), %xmm0 #; source => xmm0
mulps %xmm1, %xmm0 #; apply gain to source
- addps (%edi), %xmm0 #; mix with destination
- movaps %xmm0, (%edi) #; copy result to destination
+ addps (%rdi), %xmm0 #; mix with destination
+ movaps %xmm0, (%rdi) #; copy result to destination
- addl $16, %edi #; dst+=4
- addl $16, %esi #; src+=4
+ addq $16, %rdi #; dst+=4
+ addq $16, %rsi #; src+=4
- subl $4, %ecx #; nframes-=4
- cmp $4, %ecx
+ subq $4, %rcx #; nframes-=4
+ cmp $4, %rcx
jge .MBWG_SSELOOP
- cmp $0, %ecx
+ cmp $0, %rcx
je .MBWG_END
#; if there are remaining frames, the nonalign code will do nicely
@@ -122,28 +122,28 @@ x86_sse_mix_buffers_with_gain:
.MBWG_NONALIGN:
#; not aligned!
- movss 20(%ebp), %xmm1 #; gain => xmm1
+ movss 20(%rbp), %xmm1 #; gain => xmm1
.MBWG_NONALIGNLOOP:
- movss (%esi), %xmm0
+ movss (%rsi), %xmm0
mulss %xmm1, %xmm0
- addss (%edi), %xmm0
- movss %xmm0, (%edi)
+ addss (%rdi), %xmm0
+ movss %xmm0, (%rdi)
- addl $4, %edi
- addl $4, %esi
+ addq $4, %rdi
+ addq $4, %rsi
- decl %ecx
+ decq %rcx
jnz .MBWG_NONALIGNLOOP
.MBWG_END:
- popl %esi
- popl %edi
-#; popl %ecx
- popl %ebx
-#; popl %eax
+ popq %rsi
+ popq %rdi
+#; popq %rcx
+ popq %rbx
+#; popq %rax
#; return
leave
@@ -160,42 +160,42 @@ x86_sse_mix_buffers_with_gain:
.type x86_sse_mix_buffers_no_gain, at function
x86_sse_mix_buffers_no_gain:
-#; 8(%ebp) = float *dst = %edi
-#; 12(%ebp) = float *src = %esi
-#; 16(%ebp) = long nframes = %ecx
+#; 8(%rbp) = float *dst = %rdi
+#; 12(%rbp) = float *src = %rsi
+#; 16(%rbp) = long nframes = %rcx
- pushl %ebp
- movl %esp, %ebp
+ pushq %rbp
+ movq %rsp, %rbp
#; save the registers
-#; pushl %eax
- pushl %ebx
-#; pushl %ecx
- pushl %edi
- pushl %esi
+#; pushq %rax
+ pushq %rbx
+#; pushq %rcx
+ pushq %rdi
+ pushq %rsi
#; the real function
#; if nframes == 0, go to end
- movl 16(%ebp), %ecx #; nframes
- cmp $0, %ecx
+ movq 16(%rbp), %rcx #; nframes
+ cmp $0, %rcx
je .MBNG_END
#; Check for alignment
- movl 8(%ebp), %edi #; dst
- movl 12(%ebp), %esi #; src
+ movq 8(%rbp), %rdi #; dst
+ movq 12(%rbp), %rsi #; src
- movl %edi, %eax
- andl $12, %eax #; mask alignemnt offset
+ movq %rdi, %rax
+ andq $12, %rax #; mask alignemnt offset
- movl %esi, %ebx
- andl $12, %ebx #; mask alignment offset
+ movq %rsi, %rbx
+ andq $12, %rbx #; mask alignment offset
- cmp %eax, %ebx
+ cmp %rax, %rbx
jne .MBNG_NONALIGN #; if not aligned, calculate manually
- cmp $0, %ebx
+ cmp $0, %rbx
je .MBNG_SSE
#; Pre-loop, we need to run 1-3 frames "manually" without
@@ -203,38 +203,38 @@ x86_sse_mix_buffers_no_gain:
.MBNG_PRELOOP:
- movss (%esi), %xmm0
- addss (%edi), %xmm0
- movss %xmm0, (%edi)
-
- addl $4, %edi #; dst++
- addl $4, %esi #; src++
- decl %ecx #; nframes--
+ movss (%rsi), %xmm0
+ addss (%rdi), %xmm0
+ movss %xmm0, (%rdi)
+
+ addq $4, %rdi #; dst++
+ addq $4, %rsi #; src++
+ decq %rcx #; nframes--
jz .MBNG_END
- addl $4, %ebx
+ addq $4, %rbx
- cmp $16, %ebx #; test if we've reached 16 byte alignment
+ cmp $16, %rbx #; test if we've reached 16 byte alignment
jne .MBNG_PRELOOP
.MBNG_SSE:
- cmp $4, %ecx #; if there are frames left, but less than 4
+ cmp $4, %rcx #; if there are frames left, but less than 4
jnge .MBNG_NONALIGN #; we can't run SSE
.MBNG_SSELOOP:
- movaps (%esi), %xmm0 #; source => xmm0
- addps (%edi), %xmm0 #; mix with destination
- movaps %xmm0, (%edi) #; copy result to destination
+ movaps (%rsi), %xmm0 #; source => xmm0
+ addps (%rdi), %xmm0 #; mix with destination
+ movaps %xmm0, (%rdi) #; copy result to destination
- addl $16, %edi #; dst+=4
- addl $16, %esi #; src+=4
+ addq $16, %rdi #; dst+=4
+ addq $16, %rsi #; src+=4
- subl $4, %ecx #; nframes-=4
- cmp $4, %ecx
+ subq $4, %rcx #; nframes-=4
+ cmp $4, %rcx
jge .MBNG_SSELOOP
- cmp $0, %ecx
+ cmp $0, %rcx
je .MBNG_END
#; if there are remaining frames, the nonalign code will do nicely
@@ -243,23 +243,23 @@ x86_sse_mix_buffers_no_gain:
.MBNG_NONALIGN:
#; not aligned!
- movss (%esi), %xmm0 #; src => xmm0
- addss (%edi), %xmm0 #; xmm0 += dst
- movss %xmm0, (%edi) #; xmm0 => dst
+ movss (%rsi), %xmm0 #; src => xmm0
+ addss (%rdi), %xmm0 #; xmm0 += dst
+ movss %xmm0, (%rdi) #; xmm0 => dst
- addl $4, %edi
- addl $4, %esi
+ addq $4, %rdi
+ addq $4, %rsi
- decl %ecx
+ decq %rcx
jnz .MBNG_NONALIGN
.MBNG_END:
- popl %esi
- popl %edi
-#; popl %ecx
- popl %ebx
-#; popl %eax
+ popq %rsi
+ popq %rdi
+#; popq %rcx
+ popq %rbx
+#; popq %rax
#; return
leave
@@ -276,110 +276,110 @@ x86_sse_mix_buffers_no_gain:
.type x86_sse_apply_gain_to_buffer, at function
x86_sse_apply_gain_to_buffer:
-#; 8(%ebp) = float *buf = %edi
-#; 12(%ebp) = long nframes = %ecx
-#; 16(%ebp) = float gain = st(0)
+#; 8(%rbp) = float *buf = %rdi
+#; 12(%rbp) = long nframes = %rcx
+#; 16(%rbp) = float gain = st(0)
- pushl %ebp
- movl %esp, %ebp
+ pushq %rbp
+ movq %rsp, %rbp
- #; save %edi
- pushl %edi
+ #; save %rdi
+ pushq %rdi
#; the real function
#; if nframes == 0, go to end
- movl 12(%ebp), %ecx #; nframes
- cmp $0, %ecx
+ movq 12(%rbp), %rcx #; nframes
+ cmp $0, %rcx
je .AG_END
#; create the gain buffer in %xmm1
- movss 16(%ebp), %xmm1
+ movss 16(%rbp), %xmm1
shufps $0x00, %xmm1, %xmm1
#; Check for alignment
- movl 8(%ebp), %edi #; buf
- movl %edi, %edx #; buf => %edx
- andl $12, %edx #; mask bits 1 & 2, result = 0, 4, 8 or 12
+ movq 8(%rbp), %rdi #; buf
+ movq %rdi, %rdx #; buf => %rdx
+ andq $12, %rdx #; mask bits 1 & 2, result = 0, 4, 8 or 12
jz .AG_SSE #; if buffer IS aligned
#; PRE-LOOP
#; we iterate 1-3 times, doing normal x87 float comparison
- #; so we reach a 16 byte aligned "buf" (=%edi) value
+ #; so we reach a 16 byte aligned "buf" (=%rdi) value
.AGLP_START:
#; Load next value from the buffer
- movss (%edi), %xmm0
+ movss (%rdi), %xmm0
mulss %xmm1, %xmm0
- movss %xmm0, (%edi)
+ movss %xmm0, (%rdi)
#; increment buffer, decrement counter
- addl $4, %edi #; buf++;
+ addq $4, %rdi #; buf++;
- decl %ecx #; nframes--
+ decq %rcx #; nframes--
jz .AG_END #; if we run out of frames, we go to the end
- addl $4, %edx #; one non-aligned byte less
- cmp $16, %edx
+ addq $4, %rdx #; one non-aligned byte less
+ cmp $16, %rdx
jne .AGLP_START #; if more non-aligned frames exist, we do a do-over
.AG_SSE:
- #; We have reached the 16 byte aligned "buf" ("edi") value
+ #; We have reached the 16 byte aligned "buf" ("rdi") value
#; Figure out how many loops we should do
- movl %ecx, %eax #; copy remaining nframes to %eax for division
- movl $0, %edx #; 0 the edx register
+ movq %rcx, %rax #; copy remaining nframes to %rax for division
+ movq $0, %rdx #; 0 the rdx register
- pushl %edi
- movl $4, %edi
- divl %edi #; %edx = remainder == 0
- popl %edi
+ pushq %rdi
+ movq $4, %rdi
+ divq %rdi #; %rdx = remainder == 0
+ popq %rdi
- #; %eax = SSE iterations
- cmp $0, %eax
+ #; %rax = SSE iterations
+ cmp $0, %rax
je .AGPOST_START
.AGLP_SSE:
- movaps (%edi), %xmm0
+ movaps (%rdi), %xmm0
mulps %xmm1, %xmm0
- movaps %xmm0, (%edi)
+ movaps %xmm0, (%rdi)
- addl $16, %edi
-#; subl $4, %ecx #; nframes-=4
+ addq $16, %rdi
+#; subq $4, %rcx #; nframes-=4
- decl %eax
+ decq %rax
jnz .AGLP_SSE
#; Next we need to post-process all remaining frames
- #; the remaining frame count is in %ecx
+ #; the remaining frame count is in %rcx
#; if no remaining frames, jump to the end
-#; cmp $0, %ecx
- andl $3, %ecx #; nframes % 4
+#; cmp $0, %rcx
+ andq $3, %rcx #; nframes % 4
je .AG_END
.AGPOST_START:
- movss (%edi), %xmm0
+ movss (%rdi), %xmm0
mulss %xmm1, %xmm0
- movss %xmm0, (%edi)
+ movss %xmm0, (%rdi)
#; increment buffer, decrement counter
- addl $4, %edi #; buf++;
+ addq $4, %rdi #; buf++;
- decl %ecx #; nframes--
+ decq %rcx #; nframes--
jnz .AGPOST_START #; if we run out of frames, we go to the end
.AG_END:
- popl %edi
+ popq %rdi
#; return
leave
@@ -400,24 +400,24 @@ abs_mask:
x86_sse_compute_peak:
-#; 8(%ebp) = float *buf = %edi
-#; 12(%ebp) = long nframes = %ecx
-#; 16(%ebp) = float current = st(0)
+#; 8(%rbp) = float *buf = %rdi
+#; 12(%rbp) = long nframes = %rcx
+#; 16(%rbp) = float current = st(0)
- pushl %ebp
- movl %esp, %ebp
+ pushq %rbp
+ movq %rsp, %rbp
- #; save %edi
- pushl %edi
+ #; save %rdi
+ pushq %rdi
#; the real function
#; Load "current" in xmm0
- movss 16(%ebp), %xmm0
+ movss 16(%rbp), %xmm0
#; if nframes == 0, go to end
- movl 12(%ebp), %ecx #; nframes
- cmp $0, %ecx
+ movq 12(%rbp), %rcx #; nframes
+ cmp $0, %rcx
je .CP_END
#; create the "abs" mask in %xmm2
@@ -426,58 +426,58 @@ x86_sse_compute_peak:
#; Check for alignment
- movl 8(%ebp), %edi #; buf
- movl %edi, %edx #; buf => %edx
- andl $12, %edx #; mask bits 1 & 2, result = 0, 4, 8 or 12
+ movq 8(%rbp), %rdi #; buf
+ movq %rdi, %rdx #; buf => %rdx
+ andq $12, %rdx #; mask bits 1 & 2, result = 0, 4, 8 or 12
jz .CP_SSE #; if buffer IS aligned
#; PRE-LOOP
#; we iterate 1-3 times, doing normal x87 float comparison
- #; so we reach a 16 byte aligned "buf" (=%edi) value
+ #; so we reach a 16 byte aligned "buf" (=%rdi) value
.LP_START:
#; Load next value from the buffer
- movss (%edi), %xmm1
+ movss (%rdi), %xmm1
andps %xmm2, %xmm1
maxss %xmm1, %xmm0
#; increment buffer, decrement counter
- addl $4, %edi #; buf++;
+ addq $4, %rdi #; buf++;
- decl %ecx #; nframes--
+ decq %rcx #; nframes--
jz .CP_END #; if we run out of frames, we go to the end
- addl $4, %edx #; one non-aligned byte less
- cmp $16, %edx
+ addq $4, %rdx #; one non-aligned byte less
+ cmp $16, %rdx
jne .LP_START #; if more non-aligned frames exist, we do a do-over
.CP_SSE:
- #; We have reached the 16 byte aligned "buf" ("edi") value
+ #; We have reached the 16 byte aligned "buf" ("rdi") value
#; Figure out how many loops we should do
- movl %ecx, %eax #; copy remaining nframes to %eax for division
+ movq %rcx, %rax #; copy remaining nframes to %rax for division
- shr $2,%eax #; unsigned divide by 4
+ shr $2,%rax #; unsigned divide by 4
jz .POST_START
- #; %eax = SSE iterations
+ #; %rax = SSE iterations
#; current maximum is at %xmm0, but we need to ..
shufps $0x00, %xmm0, %xmm0 #; shuffle "current" to all 4 FP's
- #;prefetcht0 16(%edi)
+ #;prefetcht0 16(%rdi)
.LP_SSE:
- movaps (%edi), %xmm1
+ movaps (%rdi), %xmm1
andps %xmm2, %xmm1
maxps %xmm1, %xmm0
- addl $16, %edi
+ addq $16, %rdi
- decl %eax
+ decq %rax
jnz .LP_SSE
#; Calculate the maximum value contained in the 4 FP's in %xmm0
@@ -491,31 +491,31 @@ x86_sse_compute_peak:
#; now every float in %xmm0 is the same value, current maximum value
#; Next we need to post-process all remaining frames
- #; the remaining frame count is in %ecx
+ #; the remaining frame count is in %rcx
#; if no remaining frames, jump to the end
- andl $3, %ecx #; nframes % 4
+ andq $3, %rcx #; nframes % 4
jz .CP_END
.POST_START:
- movss (%edi), %xmm1
+ movss (%rdi), %xmm1
andps %xmm2, %xmm1
maxss %xmm1, %xmm0
- addl $4, %edi #; buf++;
+ addq $4, %rdi #; buf++;
- decl %ecx #; nframes--;
+ decq %rcx #; nframes--;
jnz .POST_START
.CP_END:
#; Load the value from xmm0 to the float stack for returning
- movss %xmm0, 16(%ebp)
- flds 16(%ebp)
+ movss %xmm0, 16(%rbp)
+ flds 16(%rbp)
- popl %edi
+ popq %rdi
#; return
leave
diff -uprN ardour-0.99/SConstruct ardour-sse64/SConstruct
--- ardour-0.99/SConstruct 2005-09-24 03:53:13.000000000 +0100
+++ ardour-sse64/SConstruct 2005-12-16 11:32:00.000000000 +0000
@@ -36,8 +36,8 @@ opts.AddOptions(
PathOption('PREFIX', 'Set the install "prefix"', '/usr/local'),
BoolOption('VST', 'Compile with support for VST', 0),
BoolOption('VERSIONED', 'Add version information to ardour/gtk executable name inside the build directory', 0),
- BoolOption('USE_SSE_EVERYWHERE', 'Ask the compiler to use x86/SSE instructions and also our hand-written x86/SSE optimizations when possible (off by default)', 0),
- BoolOption('BUILD_SSE_OPTIMIZATIONS', 'Use our hand-written x86/SSE optimizations when possible (off by default)', 0)
+ BoolOption('USE_SSE_EVERYWHERE', 'Ask the compiler to use x86/SSE instructions and also our hand-written x86/SSE optimizations when possible (off by default)', 1),
+ BoolOption('BUILD_SSE_OPTIMIZATIONS', 'Use our hand-written x86/SSE optimizations when possible (off by default)', 1)
)
More information about the Ardour-Dev
mailing list