Home > SQL Script > CompareStrings_DataAccuracy – loop through characters to compare two strings

CompareStrings_DataAccuracy – loop through characters to compare two strings

USE [YourDatabaseName]
GO
/****** Object:  UserDefinedFunction [dbo].[CompareStrings_DataAccuracyTest]    Script Date: 10/25/2007 16:16:28 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO

CREATE FUNCTION [dbo].[CompareStrings_DataAccuracyTest] (@FirstString varchar(max), @SecondString varchar(max)) 
RETURNS @strCompare TABLE
(
    — columns returned by the function
    FirstStringlength int NOT NULL,
    num_of_errors int NOT NULL,
    accuracy decimal(3,2) NOT NULL,
    errorType nvarchar(255) null
)
AS 

BEGIN
    –First of all, remove all spaces within the string   
    set @FirstString = replace(@FirstString,’  ‘,”)
    set @SecondString = replace(@SecondString,’  ‘,”)
    set @FirstString = replace(@FirstString,’ ‘,”)
    set @SecondString = replace(@SecondString,’ ‘,”)

    DECLARE @Difference AS INT,
            @MaxLength AS INT,
            @Position AS INT
    declare @accuracy as decimal(3,2)
    Declare @errorType as nvarchar(255)   

    If isnumeric(@FirstString)=1 and isnumeric(@SecondString)=1
    –Start of Check if the answer is nermeric
    –if it is nemeric, check whether 2 parameters are equal. if equal, write to tem db and return
    –if not equal, continue next block of code to compare them with String Mode.
    Begin
        declare @tempFirstString as money
        declare @tempSecondString as money
        set @tempFirstString=convert(money,@FirstString)
        set @tempSecondString=convert(money,@SecondString)

        if @tempFirstString = @tempSecondString
        Begin
            set @Difference=0
            set    @accuracy=1
            set    @errorType=’Unknown’           
       
            — copy the required columns to the result of the function
            INSERT @strCompare
            SELECT 0, @Difference, @accuracy,@errorType
            RETURN   
        End   
    End
    –End of Check if the answer is nermeric

    Begin –Start of not nermeric

    /*
    Returns a count of the number of characters different in the 2 strings passed.

    If each string contains the same number of each letters and there are 2 differences,
    this will be counted as 1 difference as it is likely a typo: EG ‘String’ and ‘Strnig’
    */
   

    –Create Temporary table to hold the count of letters in words
    DECLARE @LettersInString TABLE
        (
        Letter CHAR(1) NOT NULL,
        FirstStringCount INT NOT NULL DEFAULT 0,
        SecondStringCount INT NOT NULL DEFAULT 0
        )

    –Difference is the counter of characters that are different in the 2 strings
    SET @Difference = 0
    –Position is the position of the character in the string that is being compared
    SET @Position = 1

    –determine the length of the longest string
    IF len(@FirstString) < len(@SecondString)
        SET @MaxLength = len(@SecondString)
    ELSE
        SET @MaxLength = len(@FirstString)

    –loop through each string until the maximum length is reached
    WHILE @Position <= @MaxLength
        BEGIN
        –check to see if the character in the first string is in the temp table. If not add
        IF NOT EXISTS (SELECT Letter FROM @LettersInString WHERE Letter = SUBSTRING(@FirstString, @Position, 1))
            INSERT INTO @LettersInString SELECT SUBSTRING(@FirstString, @Position, 1), 0, 0
       
        –add 1 to the count of the character in the temp table
        UPDATE @LettersInString
        SET FirstStringCount = FirstStringCount + 1
        WHERE Letter = SUBSTRING(@FirstString, @Position, 1)

        –check to see if the character in the second string is in the temp table. If not add
        IF NOT EXISTS (SELECT Letter FROM @LettersInString WHERE Letter = SUBSTRING(@SecondString, @Position, 1))
            INSERT INTO @LettersInString SELECT SUBSTRING(@SecondString, @Position, 1), 0, 0

        –add 1 to the count of the character in the temp table
        UPDATE @LettersInString
        SET SecondStringCount = SecondStringCount + 1
        WHERE Letter = SUBSTRING(@SecondString, @Position, 1)

        –check to see if the same position in each string holds the same character   
        IF SUBSTRING(@FirstString, @Position, 1) <> SUBSTRING(@SecondString, @Position, 1)
            SET @Difference = @Difference + 1

        –move to the next position
        SET @Position = @Position + 1
        END

        –If each string contains the same number of each letters and there are 2 differences, this will be counted as 1 difference as it is likely a typo: EG ‘String’ and ‘Strnig’
        –IF (SELECT COUNT(Letter) FROM @LettersInString WHERE FirstStringCount <> SecondStringCount) = 0 AND @Difference = 2
        —    SET @Difference = 1
       
       
        set @accuracy = 1 – cast(@Difference as decimal) / cast(len(@FirstString) as decimal)

       
        set @errorType=’Unknown’
        /*
        set @errorType=
            case
                when len(@FirstString) < len(@SecondString) Then ‘Type Extra Characters’
                when len(@FirstString) > len(@SecondString) Then ‘Missing Characters’
            else
                ‘Others’
            end
        */

        — copy the required columns to the result of the function
        INSERT @strCompare
        SELECT len(@FirstString), @Difference, @accuracy,@errorType
        RETURN             
    End  –end of not nermeric

END

Advertisements
Categories: SQL Script
  1. No comments yet.
  1. No trackbacks yet.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: