Search This Blog

SQL Server Removing unwanted characters


Sometimes when you import data from other system, you may want to clean up data by removing unwanted characters
You can use Replace function. Suppose you want to remove the character ^ from the string
declare @str varchar(100)set @str='test^ string'select @str=replace(@str,'^','')
Select @str
But what if you have lot of characters to be removed?
You can have a seperate 
table that has the set of characters to be removed and use a function
--Create test data
create table #data (data varchar(100))
insert #data select 'tes^@&t %stri)-n!g' data union all
select
 '))aaer***********)' union all
select
 '&^&&hsdf()' union all
select
 'jj&wk' union all
select
 ')hw*pa' union all
select
 'No&@#$53*24,Mai()$n R--!oad'
--Create table that has all set of characters to be removed
create table clean_chars (char_id int identity(1,1),chars char(1))
insert into clean_chars (chars)select '^' as repl union all select ')' union all
select
 '(' union all
select
 '&' union all
select
 '*' union all
select
 '%' union all
select
 '@' union all
select
 '-' union all
select
 ',' union all
select
 '#' union all
select
 '$' union all
select
 '#' union all
select
 '!'


--Create a function that removes all those characters from data
create function clean_data(@data varchar(100)
)
returns varchar(1000)
as
begin        declare @char_id varchar(1000),@clean_data varchar(1000)        set @clean_data=@data        select @char_id=min(char_id) from clean_chars
        while @char_id is not null        begin                select @clean_data=replace(@clean_data,chars,'') from clean_chars where char_id=@char_id                select @char_id= min(char_id) from clean_chars where char_id>@char_id        end
return @clean_data
end
--Run the query

select data,dbo.clean_data(d.data) as clean_data from #data as d

Removing unwanted characters - Part 2


Here is another method that uses Dynamic SQL
declare @data table (data varchar(100))
-- table that has source data
insert @data select 'tes^@&t %stri)-n!g' data union all
select
 '))aaer***********)' union all
select
 '&^&&hsdf()' union all
select
 'jj&wk' union all
select
 ')hw*pa' union all
select
 'No&@#$53*24,Mai()$n R--!oad'
-- table that has unwanted characters
declare @clean_chars table (char_id int identity(1,1),chars char(1))
insert into @clean_chars (chars)select '^' as repl union all select ')' union all
select
 '(' union all
select
 '&' union all
select
 '*' union all
select
 '%' union all
select
 '@' union all
select
 '-' union all
select
 ',' union allselect '$' union all
select
 '#' union all
select
 '!'
--T-sql code that removes them
declare @replace varchar(8000),@chars varchar(8000),@sql varchar(8000)
select @chars='',@replace='',@sql=''
select @replace = @replace+'replace(', @chars = @chars+''''+chars+''',''''),' from @clean_chars
set @replace=@replace+'''~!@#'','
set @replace=@replace+substring(@chars,1,LEN(@chars)-1)
select @sql=@sql+'select '''+data+''' as old_data, '+replace(@replace,'~!@#',data)+' as new_data union all ' from @data
set @sql=substring(@sql ,1,LEN(@sql)-10)
exec(@sql)