unit WStrUtils; {.$WARN SYMBOL_DEPRECATED OFF} interface uses Windows, Classes; const WideNull = WideChar(#0); function StrLenW(Str: PWideChar): Cardinal; function StrEndW(Str: PWideChar): PWideChar; function StrMoveW(Dest, Source: PWideChar; Count: Cardinal): PWideChar; function StrCopyW(Dest, Source: PWideChar): PWideChar; function StrECopyW(Dest, Source: PWideChar): PWideChar; function StrLCopyW(Dest, Source: PWideChar; MaxLen: Cardinal): PWideChar; function StrPCopyW(Dest: PWideChar; const Source: string): PWideChar; function StrPLCopyW(Dest: PWideChar; const Source: string; MaxLen: Cardinal): PWideChar; function StrCatW(Dest, Source: PWideChar): PWideChar; function StrLCatW(Dest, Source: PWideChar; MaxLen: Cardinal): PWideChar; function StrCompW(Str1, Str2: PWideChar): Integer; function StrICompW(Str1, Str2: PWideChar): Integer; function StrLCompW(Str1, Str2: PWideChar; MaxLen: Cardinal): Integer; function StrLICompW(Str1, Str2: PWideChar; MaxLen: Cardinal): Integer; function StrNScanW(S1, S2: PWideChar): Integer; function StrRNScanW(S1, S2: PWideChar): Integer; function StrScanW(Str: PWideChar; Chr: WideChar): PWideChar; overload; function StrScanW(Str: PWideChar; Chr: WideChar; StrLen: Cardinal): PWideChar; overload; function StrRScanW(Str: PWideChar; Chr: WideChar): PWideChar; function StrPosW(Str, SubStr: PWideChar): PWideChar; function StrAllocW(Size: Cardinal): PWideChar; function StrBufSizeW(Str: PWideChar): Cardinal; function StrNewW(Str: PWideChar): PWideChar; procedure StrDisposeW(Str: PWideChar); procedure StrSwapByteOrder(Str: PWideChar); implementation function StrLenW(Str: PWideChar): Cardinal; // returns number of characters in a string excluding the null terminator asm MOV EDX, EDI MOV EDI, EAX MOV ECX, 0FFFFFFFFH XOR AX, AX REPNE SCASW MOV EAX, 0FFFFFFFEH SUB EAX, ECX MOV EDI, EDX end; //---------------------------------------------------------------------------------------------------------------------- function StrEndW(Str: PWideChar): PWideChar; // returns a pointer to the end of a null terminated string asm MOV EDX, EDI MOV EDI, EAX MOV ECX, 0FFFFFFFFH XOR AX, AX REPNE SCASW LEA EAX, [EDI - 2] MOV EDI, EDX end; //---------------------------------------------------------------------------------------------------------------------- function StrMoveW(Dest, Source: PWideChar; Count: Cardinal): PWideChar; // Copies the specified number of characters to the destination string and returns Dest // also as result. Dest must have enough room to store at least Count characters. asm PUSH ESI PUSH EDI MOV ESI, EDX MOV EDI, EAX MOV EDX, ECX CMP EDI, ESI JG @@1 JE @@2 SHR ECX, 1 REP MOVSD MOV ECX, EDX AND ECX, 1 REP MOVSW JMP @@2 @@1: LEA ESI, [ESI + 2 * ECX - 2] LEA EDI, [EDI + 2 * ECX - 2] STD AND ECX, 1 REP MOVSW SUB EDI, 2 SUB ESI, 2 MOV ECX, EDX SHR ECX, 1 REP MOVSD CLD @@2: POP EDI POP ESI end; //---------------------------------------------------------------------------------------------------------------------- function StrCopyW(Dest, Source: PWideChar): PWideChar; // copies Source to Dest and returns Dest asm PUSH EDI PUSH ESI MOV ESI, EAX MOV EDI, EDX MOV ECX, 0FFFFFFFFH XOR AX, AX REPNE SCASW NOT ECX MOV EDI, ESI MOV ESI, EDX MOV EDX, ECX MOV EAX, EDI SHR ECX, 1 REP MOVSD MOV ECX, EDX AND ECX, 1 REP MOVSW POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrECopyW(Dest, Source: PWideChar): PWideChar; // copies Source to Dest and returns a pointer to the null character ending the string asm PUSH EDI PUSH ESI MOV ESI, EAX MOV EDI, EDX MOV ECX, 0FFFFFFFFH XOR AX, AX REPNE SCASW NOT ECX MOV EDI, ESI MOV ESI, EDX MOV EDX, ECX SHR ECX, 1 REP MOVSD MOV ECX, EDX AND ECX, 1 REP MOVSW LEA EAX, [EDI - 2] POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrLCopyW(Dest, Source: PWideChar; MaxLen: Cardinal): PWideChar; // copies a specified maximum number of characters from Source to Dest asm PUSH EDI PUSH ESI PUSH EBX MOV ESI, EAX MOV EDI, EDX MOV EBX, ECX XOR AX, AX TEST ECX, ECX JZ @@1 REPNE SCASW JNE @@1 INC ECX @@1: SUB EBX, ECX MOV EDI, ESI MOV ESI, EDX MOV EDX, EDI MOV ECX, EBX SHR ECX, 1 REP MOVSD MOV ECX, EBX AND ECX, 1 REP MOVSW STOSW MOV EAX, EDX POP EBX POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrPCopyW(Dest: PWideChar; const Source: string): PWideChar; // copies a Pascal-style string to a null-terminated wide string begin Result := StrPLCopyW(Dest, Source, Length(Source)); Result[Length(Source)] := WideNull; end; //---------------------------------------------------------------------------------------------------------------------- function StrPLCopyW(Dest: PWideChar; const Source: string; MaxLen: Cardinal): PWideChar; // copies characters from a Pascal-style string into a null-terminated wide string asm PUSH EDI PUSH ESI MOV EDI, EAX MOV ESI, EDX MOV EDX, EAX XOR AX, AX @@1: LODSB STOSW DEC ECX JNZ @@1 MOV EAX, EDX POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrCatW(Dest, Source: PWideChar): PWideChar; // appends a copy of Source to the end of Dest and returns the concatenated string begin StrCopyW(StrEndW(Dest), Source); Result := Dest; end; //---------------------------------------------------------------------------------------------------------------------- function StrLCatW(Dest, Source: PWideChar; MaxLen: Cardinal): PWideChar; // appends a specified maximum number of WideCharacters to string asm PUSH EDI PUSH ESI PUSH EBX MOV EDI, Dest MOV ESI, Source MOV EBX, MaxLen SHL EBX, 1 CALL StrEndW MOV ECX, EDI ADD ECX, EBX SUB ECX, EAX JBE @@1 MOV EDX, ESI SHR ECX, 1 CALL StrLCopyW @@1: MOV EAX, EDI POP EBX POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrCompW(Str1, Str2: PWideChar): Integer; // compares Str1 to Str2 (binary comparation) // Note: There's also an extended comparation function which uses a given language to // compare unicode strings. asm PUSH EDI PUSH ESI MOV EDI, EDX MOV ESI, EAX MOV ECX, 0FFFFFFFFH XOR EAX, EAX REPNE SCASW NOT ECX MOV EDI, EDX XOR EDX, EDX REPE CMPSW MOV AX, [ESI - 2] MOV DX, [EDI - 2] SUB EAX, EDX POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrICompW(Str1, Str2: PWideChar): Integer; // compares Str1 to Str2 without case sensitivity (binary comparation), // Note: only ANSI characters are compared case insensitively asm PUSH EDI PUSH ESI MOV EDI, EDX MOV ESI, EAX MOV ECX, 0FFFFFFFFH XOR EAX, EAX REPNE SCASW NOT ECX MOV EDI, EDX XOR EDX, EDX @@1: REPE CMPSW JE @@4 MOV AX, [ESI - 2] CMP AX, 'a' JB @@2 CMP AX, 'z' JA @@2 SUB AL, 20H @@2: MOV DX, [EDI - 2] CMP DX, 'a' JB @@3 CMP DX, 'z' JA @@3 SUB DX, 20H @@3: SUB EAX, EDX JE @@1 @@4: POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrLCompW(Str1, Str2: PWideChar; MaxLen: Cardinal): Integer; // compares a specified maximum number of charaters in two strings asm PUSH EDI PUSH ESI PUSH EBX MOV EDI, EDX MOV ESI, EAX MOV EBX, ECX XOR EAX, EAX OR ECX, ECX JE @@1 REPNE SCASW SUB EBX, ECX MOV ECX, EBX MOV EDI, EDX XOR EDX, EDX REPE CMPSW MOV AX, [ESI - 2] MOV DX, [EDI - 2] SUB EAX, EDX @@1: POP EBX POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrLICompW(Str1, Str2: PWideChar; MaxLen: Cardinal): Integer; // compares strings up to a specified maximum number of characters, not case sensitive // Note: only ANSI characters are compared case insensitively asm PUSH EDI PUSH ESI PUSH EBX MOV EDI, EDX MOV ESI, EAX MOV EBX, ECX XOR EAX, EAX OR ECX, ECX JE @@4 REPNE SCASW SUB EBX, ECX MOV ECX, EBX MOV EDI, EDX XOR EDX, EDX @@1: REPE CMPSW JE @@4 MOV AX, [ESI - 2] CMP AX, 'a' JB @@2 CMP AX, 'z' JA @@2 SUB AX, 20H @@2: MOV DX, [EDI - 2] CMP DX, 'a' JB @@3 CMP DX, 'z' JA @@3 SUB DX, 20H @@3: SUB EAX, EDX JE @@1 @@4: POP EBX POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrNScanW(S1, S2: PWideChar): Integer; // determines where (in S1) the first time one of the characters of S2 appear. // The result is the length of a string part of S1 where none of the characters of // S2 do appear (not counting the trailing #0 and starting with position 0 in S1). var Run: PWideChar; begin Result := -1; if Assigned(S1) and Assigned(S2) then begin Run := S1; while (Run^ <> #0) do begin if StrScanW(S2, Run^) <> nil then Break; Inc(Run); end; Result := Run - S1; end; end; //---------------------------------------------------------------------------------------------------------------------- function StrRNScanW(S1, S2: PWideChar): Integer; // This function does the same as StrRNScanW but uses S1 in reverse order. This means S1 points to the last // character of a string, is traveresed reversely and terminates with a starting #0. // This is useful for parsing strings stored in reversed macro buffers etc. var Run: PWideChar; begin Result := -1; if Assigned(S1) and Assigned(S2) then begin Run := S1; while (Run^ <> #0) do begin if StrScanW(S2, Run^) <> nil then Break; Dec(Run); end; Result := S1 - Run; end; end; //---------------------------------------------------------------------------------------------------------------------- function StrScanW(Str: PWideChar; Chr: WideChar): PWideChar; // returns a pointer to first occurrence of a specified character in a string asm PUSH EDI PUSH EAX MOV EDI, Str MOV ECX, 0FFFFFFFFH XOR AX, AX REPNE SCASW NOT ECX POP EDI MOV AX, Chr REPNE SCASW MOV EAX, 0 JNE @@1 MOV EAX, EDI SUB EAX, 2 @@1: POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrScanW(Str: PWideChar; Chr: WideChar; StrLen: Cardinal): PWideChar; // returns a pointer to first occurrence of a specified character in a string // or nil if not found // Note: this is just a binary search for the specified character and there's no check for // a terminating null. Instead at most StrLen characters are searched. This makes // this function extremly fast. // // on enter EAX contains Str, EDX contains Chr and ECX StrLen // on exit EAX contains result pointer or nil asm TEST EAX, EAX JZ @@Exit // get out if the string is nil or StrLen is 0 JCXZ @@Exit @@Loop: CMP [EAX], DX // this unrolled loop is actually faster on modern processors JE @@Exit // than REP SCASW INC EAX DEC ECX JNZ @@Loop XOR EAX, EAX @@Exit: end; //---------------------------------------------------------------------------------------------------------------------- function StrRScanW(Str: PWideChar; Chr: WideChar): PWideChar; // returns a pointer to the last occurance of Chr in Str asm PUSH EDI MOV EDI, Str MOV ECX, 0FFFFFFFFH XOR AX, AX REPNE SCASW NOT ECX STD SUB EDI, 2 MOV AX, Chr REPNE SCASW MOV EAX, 0 JNE @@1 MOV EAX, EDI ADD EAX, 2 @@1: CLD POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrPosW(Str, SubStr: PWideChar): PWideChar; // returns a pointer to the first occurance of SubStr in Str asm PUSH EDI PUSH ESI PUSH EBX OR EAX, EAX JZ @@2 OR EDX, EDX JZ @@2 MOV EBX, EAX MOV EDI, EDX XOR AX, AX MOV ECX, 0FFFFFFFFH REPNE SCASW NOT ECX DEC ECX JZ @@2 MOV ESI, ECX MOV EDI, EBX MOV ECX, 0FFFFFFFFH REPNE SCASW NOT ECX SUB ECX, ESI JBE @@2 MOV EDI, EBX LEA EBX, [ESI - 1] // Note: 2 would be wrong here, we are dealing with numbers not an address @@1: MOV ESI, EDX LODSW REPNE SCASW JNE @@2 MOV EAX, ECX PUSH EDI MOV ECX, EBX REPE CMPSW POP EDI MOV ECX, EAX JNE @@1 LEA EAX, [EDI - 2] JMP @@3 @@2: XOR EAX, EAX @@3: POP EBX POP ESI POP EDI end; //---------------------------------------------------------------------------------------------------------------------- function StrAllocW(Size: Cardinal): PWideChar; // Allocates a buffer for a null-terminated wide string and returns a pointer // to the first character of the string. begin Size := SizeOf(WideChar) * Size + SizeOf(Cardinal); GetMem(Result, Size); FillChar(Result^, Size, 0); Cardinal(Pointer(Result)^) := Size; Inc(Result, SizeOf(Cardinal) div SizeOf(WideChar)); end; //---------------------------------------------------------------------------------------------------------------------- function StrBufSizeW(Str: PWideChar): Cardinal; // Returns max number of wide characters that can be stored in a buffer allocated by StrAllocW. begin Dec(Str, SizeOf(Cardinal) div SizeOf(WideChar)); Result := (Cardinal(Pointer(Str)^) - SizeOf(Cardinal)) div 2; end; //---------------------------------------------------------------------------------------------------------------------- function StrNewW(Str: PWideChar): PWideChar; // Duplicates the given string (if not nil) and returns the address of the new string. var Size: Cardinal; begin if Str = nil then Result := nil else begin Size := StrLenW(Str) + 1; Result := StrMoveW(StrAllocW(Size), Str, Size); end; end; //---------------------------------------------------------------------------------------------------------------------- procedure StrDisposeW(Str: PWideChar); // releases a string allocated with StrNew. begin if Str <> nil then begin Dec(Str, SizeOf(Cardinal) div SizeOf(WideChar)); FreeMem(Str, Cardinal(Pointer(Str)^)); end; end; //---------------------------------------------------------------------------------------------------------------------- procedure StrSwapByteOrder(Str: PWideChar); // exchanges in each character of the given string the low order and high order // byte to go from LSB to MSB and vice versa. // EAX contains address of string asm PUSH ESI PUSH EDI MOV ESI, EAX MOV EDI, ESI XOR EAX, EAX // clear high order byte to be able to use 32bit operand below @@1: LODSW OR EAX, EAX JZ @@2 XCHG AL, AH STOSW JMP @@1 @@2: POP EDI POP ESI end; end.