;**************************************************************************
;*                     This file is part of the                           *
;*                      Mpxplay - audio player.                           *
;*                  The source code of Mpxplay is                         *
;*        (C) copyright 1998-2011 by PDSoft (Attila Padar)                *
;*                  http://mpxplay.sourceforge.net                        *
;*                    email: mpxplay@freemail.hu                          *
;**************************************************************************
;*  This program is distributed in the hope that it will be useful,       *
;*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
;*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
;*  Please contact with the author (with me) if you want to use           *
;*  or modify this source.                                                *
;**************************************************************************
;mmx routines for APE decoder

.586p
.mmx
.model flat

		PUBLIC	apedec_mmx_getmmxavailable_
                PUBLIC  apedec_mmx_cnn_adapt_
                PUBLIC  apedec_mmx_cnn_dotprod_

_TEXTMMX	SEGMENT	DWORD PUBLIC USE32 'CODE'

apedec_mmx_getmmxavailable proc c public
apedec_mmx_getmmxavailable_:
 pushad
 pushfd
 pop  eax
 mov  ecx,eax
 xor  eax,200000H
 push eax
 popfd
 pushfd
 pop  eax
 cmp  eax,ecx
 jz   nocpuid
  mov  eax,1
  CPUID
  test edx,800000H
 nocpuid:
 popad
 setnz al
 and  eax,1
 ret
apedec_mmx_getmmxavailable endp

apedec_mmx_cnn_adapt_:
 shr ecx,4
 test ecx,ecx
 jz adaptdone
 cmp ebx,0
 jle AdaptSub
 AdaptAddLoop:
  movq  mm0, [eax]
  paddw mm0, [edx]
  movq  [eax], mm0
  movq  mm1, [eax + 8]
  paddw mm1, [edx + 8]
  movq  [eax + 8], mm1
  movq  mm2, [eax + 16]
  paddw mm2, [edx + 16]
  movq  [eax + 16], mm2
  movq  mm3, [eax + 24]
  paddw mm3, [edx + 24]
  movq  [eax + 24], mm3
  add   eax,32
  add   edx,32
  dec   ecx
 jnz AdaptAddLoop
 jmp adaptend
 AdaptSub:
 je adaptdone
 AdaptSubLoop:
  movq  mm0, [eax]
  psubw mm0, [edx]
  movq  [eax], mm0
  movq  mm1, [eax + 8]
  psubw mm1, [edx + 8]
  movq  [eax + 8], mm1
  movq  mm2, [eax + 16]
  psubw mm2, [edx + 16]
  movq  [eax + 16], mm2
  movq  mm3, [eax + 24]
  psubw mm3, [edx + 24]
  movq  [eax + 24], mm3
  add   eax,32
  add   edx,32
  dec   ecx
  jnz   AdaptSubLoop
 adaptend:emms
 adaptdone:
 ret

apedec_mmx_cnn_dotprod_:
 push ebp
 mov ebp,esp
 sub esp,4
 mov dword ptr [ebp-4],0
 shr ebx,4
 test ebx,ebx
 jz adaptdoned
 pxor    mm7, mm7
 loopDot:
  movq    mm0, [eax]
  pmaddwd mm0, [edx]
  paddd   mm7, mm0
  movq    mm1, [eax +  8]
  pmaddwd mm1, [edx +  8]
  paddd   mm7, mm1
  movq    mm2, [eax + 16]
  pmaddwd mm2, [edx + 16]
  paddd   mm7, mm2
  movq    mm3, [eax + 24]
  pmaddwd mm3, [edx + 24]
  add     eax, 32
  add     edx, 32
  paddd   mm7, mm3
  dec     ebx
 jnz loopDot
 movq   mm6, mm7
 psrlq  mm7, 32
 paddd  mm6, mm7
 movd   dword ptr [ebp-4], mm6
 emms
 adaptdoned:
 mov eax,dword ptr [ebp-4]
 mov esp,ebp
 pop ebp
 ret

apedec_mmx_cnn_adapt proc c public
 push eax
 push edx
 push ebx
 push ecx
 mov eax,dword ptr [esp+ 4+16]
 mov edx,dword ptr [esp+ 8+16]
 mov ebx,dword ptr [esp+12+16]
 mov ecx,dword ptr [esp+16+16]
 call apedec_mmx_cnn_adapt_
 pop ecx
 pop ebx
 pop edx
 pop eax
 ret
apedec_mmx_cnn_adapt endp

apedec_mmx_cnn_dotprod proc c public
 push edx
 push ebx
 mov eax,dword ptr [esp+ 4+ 8]
 mov edx,dword ptr [esp+ 8+ 8]
 mov ebx,dword ptr [esp+12+ 8]
 call apedec_mmx_cnn_dotprod_
 pop ebx
 pop edx
;eax=dotprod
 ret
apedec_mmx_cnn_dotprod endp

_TEXTMMX	ENDS
		END
