Есть вот такая функция для тестирования fpu:
float __attribute__ ((noinline)) testfpu(float a) {
	return __builtin_sqrtf(__builtin_sqrtf(__builtin_sqrtf(a)));
}
/ (fcn) sym.testfpu (thumb) 80
|           ; CALL XREF from 0x08001cb6 (sym.testfpu)
|           0x08001c50      b1eec07a       vsqrt.f32 s14, s0
|           0x08001c54      08b5           push {r3, lr}
|           0x08001c56      b4ee477a       vcmp.f32 s14, s14
|           0x08001c5a      f1ee10fa       vmrs apsr_nzcv, fpscr
|       ,=< 0x08001c5e      0ed1           bne 0x8001c7e
|       |   ; JMP XREF from 0x08001c86 (sym.testfpu)
|      .--> 0x08001c60      f1eec77a       vsqrt.f32 s15, s14
|      ||   0x08001c64      f4ee677a       vcmp.f32 s15, s15
|      ||   0x08001c68      f1ee10fa       vmrs apsr_nzcv, fpscr
|     ,===< 0x08001c6c      11d1           bne 0x8001c92
|     |||   ; JMP XREF from 0x08001c9e (sym.testfpu)
|    .----> 0x08001c6e      b1eee70a       vsqrt.f32 s0, s15
|    ||||   0x08001c72      b4ee400a       vcmp.f32 s0, s0
|    ||||   0x08001c76      f1ee10fa       vmrs apsr_nzcv, fpscr
|   ,=====< 0x08001c7a      05d1           bne 0x8001c88
|   |||||   ; JMP XREF from 0x08001c90 (sym.testfpu)
|  .------> 0x08001c7c      08bd           pop {r3, pc}
|  |||||`-> 0x08001c7e      08f067f9       bl sym.sqrtf
|  |||||    0x08001c82      b0ee407a       vmov.f32 s14, s0
|  ||||`==< 0x08001c86      ebe7           b 0x8001c60
|  |`-----> 0x08001c88      b0ee670a       vmov.f32 s0, s15
|  | ||     0x08001c8c      08f060f9       bl sym.sqrtf
|  `======< 0x08001c90      f4e7           b 0x8001c7c
|    |`---> 0x08001c92      b0ee470a       vmov.f32 s0, s14
|    |      0x08001c96      08f05bf9       bl sym.sqrtf
|    |      0x08001c9a      f0ee407a       vmov.f32 s15, s0
\    `====< 0x08001c9e      e6e7           b 0x8001c6e
-O2 -mfpu=vfpv4-d16 -mfloat-abi=hard
PS: Забавно, что без использования FPU код получается куда проще:
/ (fcn) sym.testfpu (thumb) 18
|           ; CALL XREF from 0x08001b30 (sym.testfpu)
|           0x08001b0c      08b5           push {r3, lr}
|           0x08001b0e      08f0fff8       bl sym.sqrtf
|           0x08001b12      08f0fdf8       bl sym.sqrtf
|           0x08001b16      bde80840       pop.w {r3, lr}
\       ,=< 0x08001b1a      08f0f9b8       b.w sym.sqrtf

