/* neon_memsets.c
 *
 * Copyright (c) 2009, Code Aurora Forum. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of Code Aurora nor
 *       the names of its contributors may be used to endorse or promote
 *       products derived from this software without specific prior written
 *       permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NON-INFRINGEMENT ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "msm-swblits.h"

void memset16(uint16_t dst[], uint16_t value, int count)
{
    if (count <= 0)
        return;

    asm volatile(
                 "       pld        [%[dst], #0]                           \n"
                 "       cmp        %[count], #4                           \n"
                 "       blt        6f                                     \n"
                 "       tst        %[dst], #0x3                           \n"
                 "       strneh     %[value], [%[dst]], #2                 \n"
                 "       subne      %[count], %[count], #1                 \n"
                 "       vdup.u16   q8, %[value]                           \n"
                 "       vmov       q9, q8                                 \n"
                 "       cmp        %[count], #64                          \n"
                 "       bge        0f                                     \n"
                 "       cmp        %[count], #32                          \n"
                 "       bge        2f                                     \n"
                 "       cmp        %[count], #16                          \n"
                 "       bge        3f                                     \n"
                 "       cmp        %[count], #8                           \n"
                 "       bge        4f                                     \n"
                 "       b          5f                                     \n"
                 "0:                                                       \n"
                 "       mov        r12, %[count], lsr #6                  \n"
                 "1:                                                       \n"
                 "       vst1.16    {q8, q9}, [%[dst]]!                    \n"
                 "       vst1.16    {q8, q9}, [%[dst]]!                    \n"
                 "       vst1.16    {q8, q9}, [%[dst]]!                    \n"
                 "       vst1.16    {q8, q9}, [%[dst]]!                    \n"
                 "       subs       r12, r12, #1                           \n"
                 "       bne        1b                                     \n"
                 "       ands       %[count], %[count], #0x3f              \n"
                 "       beq        7f                                     \n"
                 "2:                                                       \n"
                 "       cmp        %[count], #32                          \n"
                 "       blt        3f                                     \n"
                 "       vst1.16    {q8, q9}, [%[dst]]!                    \n"
                 "       vst1.16    {q8, q9}, [%[dst]]!                    \n"
                 "       subs       %[count], %[count], #32                \n"
                 "       beq        7f                                     \n"
                 "3:                                                       \n"
                 "       cmp        %[count], #16                          \n"
                 "       blt        4f                                     \n"
                 "       vst1.16    {q8, q9}, [%[dst]]!                    \n"
                 "       subs       %[count], %[count], #16                \n"
                 "       beq        7f                                     \n"
                 "4:                                                       \n"
                 "       cmp        %[count], #8                           \n"
                 "       blt        5f                                     \n"
                 "       vst1.16    {q8}, [%[dst]]!                        \n"
                 "       subs       %[count], %[count], #8                 \n"
                 "       beq        7f                                     \n"
                 "5:                                                       \n"
                 "       cmp        %[count], #4                           \n"
                 "       blt        6f                                     \n"
                 "       vst1.16    {d16}, [%[dst]]!                       \n"
                 "       subs       %[count], %[count], #4                 \n"
                 "       beq        7f                                     \n"
                 "6:                                                       \n"
                 "       cmp        %[count], #0                           \n"
                 "       blt        7f                                     \n"
                 "       lsls       %[count], #31                          \n"
                 "       strmih     %[value], [%[dst]], #2                 \n"
                 "       strcsh     %[value], [%[dst]], #2                 \n"
                 "       strcsh     %[value], [%[dst]], #2                 \n"
                 "7:                                                       \n"
                 // Clobbered input registers
                 : [dst] "+r" (dst), [count] "+r" (count)
                 // Unclobbered input
                 : [value] "r" (value)
                 // Clobbered registers
                 : "q8", "q9", "r12", "cc", "memory"
                 );
}

void memset32(uint32_t dst[], uint32_t value, int count)
{
    asm volatile(
                 "       pld        [%[dst], #0]                           \n"
                 "       cmp        %[count], #4                           \n"
                 "       blt        5f                                     \n"
                 "       vdup.u32   q8, %[value]                           \n"
                 "       vmov       q9, q8                                 \n"
                 "       cmp        %[count], #32                          \n"
                 "       bge        0f                                     \n"
                 "       cmp        %[count], #16                          \n"
                 "       bge        2f                                     \n"
                 "       cmp        %[count], #8                           \n"
                 "       bge        3f                                     \n"
                 "       b          4f                                     \n"
                 "0:                                                       \n"
                 "       mov        r12, %[count], lsr #5                  \n"
                 "1:                                                       \n"
                 "       vst1.32    {q8, q9}, [%[dst]]!                    \n"
                 "       vst1.32    {q8, q9}, [%[dst]]!                    \n"
                 "       vst1.32    {q8, q9}, [%[dst]]!                    \n"
                 "       vst1.32    {q8, q9}, [%[dst]]!                    \n"
                 "       pld        [%[dst], #0]                           \n"
                 "       subs       r12, r12, #1                           \n"
                 "       bne        1b                                     \n"
                 "       ands       %[count], %[count], #0x1f              \n"
                 "       beq        6f                                     \n"
                 "2:                                                       \n"
                 "       cmp        %[count], #16                          \n"
                 "       blt        3f                                     \n"
                 "       vst1.32    {q8, q9}, [%[dst]]!                    \n"
                 "       vst1.32    {q8, q9}, [%[dst]]!                    \n"
                 "       subs       %[count], %[count], #16                \n"
                 "       beq        6f                                     \n"
                 "3:                                                       \n"
                 "       cmp        %[count], #8                           \n"
                 "       blt        4f                                     \n"
                 "       vst1.32    {q8, q9}, [%[dst]]!                    \n"
                 "       subs       %[count], %[count], #8                 \n"
                 "       beq        6f                                     \n"
                 "4:                                                       \n"
                 "       cmp        %[count], #4                           \n"
                 "       blt        5f                                     \n"
                 "       vst1.32    {q8}, [%[dst]]!                        \n"
                 "       subs       %[count], %[count], #4                 \n"
                 "       beq        6f                                     \n"
                 "5:                                                       \n"
                 "       cmp        %[count], #0                           \n"
                 "       beq        6f                                     \n"
                 "       lsls       %[count], #31                          \n"
                 "       strmi      %[value], [%[dst]], #4                 \n"
                 "       strcs      %[value], [%[dst]], #4                 \n"
                 "       strcs      %[value], [%[dst]], #4                 \n"
                 "6: @end                                                  \n"
                 // Clobbered input registers
                 : [dst] "+r" (dst), [count] "+r" (count)
                 // Unclobbered input
                 : [value] "r" (value)
                 // Clobbered registers
                 : "q8", "q9", "r12", "cc", "memory"
                 );
}
