jsimd_dspr2_asm.h 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. /*
  2. * MIPS DSPr2 optimizations for libjpeg-turbo
  3. *
  4. * Copyright (C) 2013, MIPS Technologies, Inc., California.
  5. * Copyright (C) 2018, Matthieu Darbois.
  6. * All Rights Reserved.
  7. * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com)
  8. * Darko Laus (darko.laus@imgtec.com)
  9. * This software is provided 'as-is', without any express or implied
  10. * warranty. In no event will the authors be held liable for any damages
  11. * arising from the use of this software.
  12. *
  13. * Permission is granted to anyone to use this software for any purpose,
  14. * including commercial applications, and to alter it and redistribute it
  15. * freely, subject to the following restrictions:
  16. *
  17. * 1. The origin of this software must not be misrepresented; you must not
  18. * claim that you wrote the original software. If you use this software
  19. * in a product, an acknowledgment in the product documentation would be
  20. * appreciated but is not required.
  21. * 2. Altered source versions must be plainly marked as such, and must not be
  22. * misrepresented as being the original software.
  23. * 3. This notice may not be removed or altered from any source distribution.
  24. */
  25. #define zero $0
  26. #define AT $1
  27. #define v0 $2
  28. #define v1 $3
  29. #define a0 $4
  30. #define a1 $5
  31. #define a2 $6
  32. #define a3 $7
  33. #define t0 $8
  34. #define t1 $9
  35. #define t2 $10
  36. #define t3 $11
  37. #define t4 $12
  38. #define t5 $13
  39. #define t6 $14
  40. #define t7 $15
  41. #define s0 $16
  42. #define s1 $17
  43. #define s2 $18
  44. #define s3 $19
  45. #define s4 $20
  46. #define s5 $21
  47. #define s6 $22
  48. #define s7 $23
  49. #define t8 $24
  50. #define t9 $25
  51. #define k0 $26
  52. #define k1 $27
  53. #define gp $28
  54. #define sp $29
  55. #define fp $30
  56. #define s8 $30
  57. #define ra $31
  58. #define f0 $f0
  59. #define f1 $f1
  60. #define f2 $f2
  61. #define f3 $f3
  62. #define f4 $f4
  63. #define f5 $f5
  64. #define f6 $f6
  65. #define f7 $f7
  66. #define f8 $f8
  67. #define f9 $f9
  68. #define f10 $f10
  69. #define f11 $f11
  70. #define f12 $f12
  71. #define f13 $f13
  72. #define f14 $f14
  73. #define f15 $f15
  74. #define f16 $f16
  75. #define f17 $f17
  76. #define f18 $f18
  77. #define f19 $f19
  78. #define f20 $f20
  79. #define f21 $f21
  80. #define f22 $f22
  81. #define f23 $f23
  82. #define f24 $f24
  83. #define f25 $f25
  84. #define f26 $f26
  85. #define f27 $f27
  86. #define f28 $f28
  87. #define f29 $f29
  88. #define f30 $f30
  89. #define f31 $f31
  90. #ifdef __ELF__
  91. #define HIDDEN_SYMBOL(symbol) .hidden symbol;
  92. #else
  93. #define HIDDEN_SYMBOL(symbol)
  94. #endif
  95. /*
  96. * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
  97. */
  98. #define LEAF_MIPS32R2(symbol) \
  99. .globl symbol; \
  100. HIDDEN_SYMBOL(symbol) \
  101. .align 2; \
  102. .type symbol, @function; \
  103. .ent symbol, 0; \
  104. symbol: \
  105. .frame sp, 0, ra; \
  106. .set push; \
  107. .set arch = mips32r2; \
  108. .set noreorder; \
  109. .set noat;
  110. /*
  111. * LEAF_DSPR2 - declare leaf routine for MIPS DSPr2
  112. */
  113. #define LEAF_DSPR2(symbol) \
  114. LEAF_MIPS32R2(symbol) \
  115. .set dspr2;
  116. /*
  117. * END - mark end of function
  118. */
  119. #define END(function) \
  120. .set pop; \
  121. .end function; \
  122. .size function, .-function
  123. /*
  124. * Checks if stack offset is big enough for storing/restoring regs_num
  125. * number of register to/from stack. Stack offset must be greater than
  126. * or equal to the number of bytes needed for storing registers (regs_num*4).
  127. * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
  128. * preserved for input arguments of the functions, already stored in a0-a3),
  129. * stack size can be further optimized by utilizing this space.
  130. */
  131. .macro CHECK_STACK_OFFSET regs_num, stack_offset
  132. .if \stack_offset < \regs_num * 4 - 16
  133. .error "Stack offset too small."
  134. .endif
  135. .endm
  136. /*
  137. * Saves set of registers on stack. Maximum number of registers that
  138. * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
  139. * Stack offset is number of bytes that are added to stack pointer (sp)
  140. * before registers are pushed in order to provide enough space on stack
  141. * (offset must be multiple of 4, and must be big enough, as described by
  142. * CHECK_STACK_OFFSET macro). This macro is intended to be used in
  143. * combination with RESTORE_REGS_FROM_STACK macro. Example:
  144. * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
  145. * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
  146. */
  147. .macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
  148. r2 = 0, r3 = 0, r4 = 0, \
  149. r5 = 0, r6 = 0, r7 = 0, \
  150. r8 = 0, r9 = 0, r10 = 0, \
  151. r11 = 0, r12 = 0, r13 = 0, \
  152. r14 = 0
  153. .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
  154. .error "Stack offset must be pozitive and multiple of 4."
  155. .endif
  156. .if \stack_offset != 0
  157. addiu sp, sp, -\stack_offset
  158. .endif
  159. sw \r1, 0(sp)
  160. .if \r2 != 0
  161. sw \r2, 4(sp)
  162. .endif
  163. .if \r3 != 0
  164. sw \r3, 8(sp)
  165. .endif
  166. .if \r4 != 0
  167. sw \r4, 12(sp)
  168. .endif
  169. .if \r5 != 0
  170. CHECK_STACK_OFFSET 5, \stack_offset
  171. sw \r5, 16(sp)
  172. .endif
  173. .if \r6 != 0
  174. CHECK_STACK_OFFSET 6, \stack_offset
  175. sw \r6, 20(sp)
  176. .endif
  177. .if \r7 != 0
  178. CHECK_STACK_OFFSET 7, \stack_offset
  179. sw \r7, 24(sp)
  180. .endif
  181. .if \r8 != 0
  182. CHECK_STACK_OFFSET 8, \stack_offset
  183. sw \r8, 28(sp)
  184. .endif
  185. .if \r9 != 0
  186. CHECK_STACK_OFFSET 9, \stack_offset
  187. sw \r9, 32(sp)
  188. .endif
  189. .if \r10 != 0
  190. CHECK_STACK_OFFSET 10, \stack_offset
  191. sw \r10, 36(sp)
  192. .endif
  193. .if \r11 != 0
  194. CHECK_STACK_OFFSET 11, \stack_offset
  195. sw \r11, 40(sp)
  196. .endif
  197. .if \r12 != 0
  198. CHECK_STACK_OFFSET 12, \stack_offset
  199. sw \r12, 44(sp)
  200. .endif
  201. .if \r13 != 0
  202. CHECK_STACK_OFFSET 13, \stack_offset
  203. sw \r13, 48(sp)
  204. .endif
  205. .if \r14 != 0
  206. CHECK_STACK_OFFSET 14, \stack_offset
  207. sw \r14, 52(sp)
  208. .endif
  209. .endm
  210. /*
  211. * Restores set of registers from stack. Maximum number of registers that
  212. * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
  213. * Stack offset is number of bytes that are added to stack pointer (sp)
  214. * after registers are restored (offset must be multiple of 4, and must
  215. * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
  216. * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
  217. * Example:
  218. * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
  219. * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
  220. */
  221. .macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
  222. r2 = 0, r3 = 0, r4 = 0, \
  223. r5 = 0, r6 = 0, r7 = 0, \
  224. r8 = 0, r9 = 0, r10 = 0, \
  225. r11 = 0, r12 = 0, r13 = 0, \
  226. r14 = 0
  227. .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
  228. .error "Stack offset must be pozitive and multiple of 4."
  229. .endif
  230. lw \r1, 0(sp)
  231. .if \r2 != 0
  232. lw \r2, 4(sp)
  233. .endif
  234. .if \r3 != 0
  235. lw \r3, 8(sp)
  236. .endif
  237. .if \r4 != 0
  238. lw \r4, 12(sp)
  239. .endif
  240. .if \r5 != 0
  241. CHECK_STACK_OFFSET 5, \stack_offset
  242. lw \r5, 16(sp)
  243. .endif
  244. .if \r6 != 0
  245. CHECK_STACK_OFFSET 6, \stack_offset
  246. lw \r6, 20(sp)
  247. .endif
  248. .if \r7 != 0
  249. CHECK_STACK_OFFSET 7, \stack_offset
  250. lw \r7, 24(sp)
  251. .endif
  252. .if \r8 != 0
  253. CHECK_STACK_OFFSET 8, \stack_offset
  254. lw \r8, 28(sp)
  255. .endif
  256. .if \r9 != 0
  257. CHECK_STACK_OFFSET 9, \stack_offset
  258. lw \r9, 32(sp)
  259. .endif
  260. .if \r10 != 0
  261. CHECK_STACK_OFFSET 10, \stack_offset
  262. lw \r10, 36(sp)
  263. .endif
  264. .if \r11 != 0
  265. CHECK_STACK_OFFSET 11, \stack_offset
  266. lw \r11, 40(sp)
  267. .endif
  268. .if \r12 != 0
  269. CHECK_STACK_OFFSET 12, \stack_offset
  270. lw \r12, 44(sp)
  271. .endif
  272. .if \r13 != 0
  273. CHECK_STACK_OFFSET 13, \stack_offset
  274. lw \r13, 48(sp)
  275. .endif
  276. .if \r14 != 0
  277. CHECK_STACK_OFFSET 14, \stack_offset
  278. lw \r14, 52(sp)
  279. .endif
  280. .if \stack_offset != 0
  281. addiu sp, sp, \stack_offset
  282. .endif
  283. .endm