2017-11-15 3 views
2
Need help with extracting the firstname, middlename and lastname from a 
freetext fullname. How to extract them out with all these formats? 
Need to figure out how to handle format 2,5,9,7 

--fullname sample data 
DECLARE @name TABLE 
(fullname VARCHAR(100)) 
INSERT INTO @name SELECT 
'Malone,Susan M' UNION ALL SELECT --1 
'Conn,Chris G' UNION ALL SELECT --2 
'Van Pess,Wen B' UNION ALL SELECT --3 
'DESHPANDE, ANN W.' UNION ALL SELECT --4 
'Asif,LEE' UNION ALL SELECT --5 
'CERVANTES MANDY'UNION ALL SELECT --6 
'Bill, Dave' UNION ALL SELECT --7 
'SMITH,ANN M' UNION ALL SELECT --8 
'BHULLER, MATT' UNION ALL SELECT --9 
'KIM (DAUM), GAIL' UNION ALL SELECT --10 
'John.Mills'--11 

DECLARE @DELIMITER1 varchar(5), @DELIMITER2 varchar(5), @DELIMITER3 
varchar(5),@MAX_LENGTH int 
SET @DELIMITER1 = ',' 
SET @DELIMITER2 = ' ' 
SET @MAX_LENGTH = 50 


--LastName 
SELECT fullname, 
case when 
CHARINDEX(@DELIMITER2, fullname) >=1 
then replace(SUBSTRING(fullname, 1, CHARINDEX(@DELIMITER2, fullname) 
),',','')--replace to empty string if contains a "," 
when 
CHARINDEX(@DELIMITER2, fullname) =0 
then replace(SUBSTRING(fullname, 1, CHARINDEX(@DELIMITER1, fullname) 
),',','')--replace to empty string if contains a "," 
else null 
end as Lastname, 


--Middle Name 
CASE 
-- Middle fullname follows two-fullname first fullnames like Mary Ann 
    WHEN LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(fullname, 
CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0 
--when len is greater than 0 
    THEN SUBSTRING(fullname, LEN(fullname) - CHARINDEX(@DELIMITER2, 
REVERSE(fullname))+2, @MAX_LENGTH) 
     ELSE NULL 
END AS Middlefullname, 


--First Name 
CASE 
-- Count the number of @DELIMITER2. Choose the string between the 

    WHEN LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(fullname, 
CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0 -- 
--when len is greater than 0 
    Then replace(ltrim(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
1, 
---need help here 
(LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH))- 
LEN(SUBSTRING(fullname, LEN(fullname) - CHARINDEX(@DELIMITER2, 
REVERSE(fullname))+2, @MAX_LENGTH))))),'-','') --replace the "-" to empty 
string 
     ELSE ltrim(SUBSTRING(fullname,CHARINDEX(@DELIMITER1,fullname)+ 
    1,@MAX_LENGTH))--trimmed leading spaces 
END AS Firstname 
FROM @name 
order by fullname 
+0

Speichern Sie die Daten so in der Datenbank? Es erfordert "Entpacken" und ist nicht "atomare" Daten. Das Brechen einer Regel verletzt die erste Normalform. – Zorkolot

Antwort

0

Erste Logik zum Verketten der "Namensteile" mit Punkten. Beachten Sie meine Kommentare.

--fullname sample data 
DECLARE @name TABLE 
(nameid int identity, fullname VARCHAR(100)) 
INSERT INTO @name SELECT 
'Malone,Susan M' UNION ALL SELECT --1 
'Conn,Chris G' UNION ALL SELECT --2 
'Van Pess,Wen B' UNION ALL SELECT --3 
'DESHPANDE, ANN W.' UNION ALL SELECT --4 
'Asif,LEE' UNION ALL SELECT --5 
'CERVANTES MANDY'UNION ALL SELECT --6 
'Bill, Dave' UNION ALL SELECT --7 
'SMITH,ANN M' UNION ALL SELECT --8 
'BHULLER, MATT' UNION ALL SELECT --9 
'KIM (DAUM), GAIL' UNION ALL SELECT --10 
'John.Mills';--11 


select original = fullname, prepped = dotted.fn, total.spaces 
from @name 
cross apply (values (patindex('%(%),%',fullname),fullname)) prep1(x,fn) -- check for parentheses: 
cross apply (values (-- remove parentheses if they exist, replace commas w/ dots, dots w/ spaces: 
    case 
    when prep1.x > 1 then substring(fn,1,x-1) + substring(fn,charindex(',',fn,x)+1,8000) 
    else replace(replace(fn, ',', '.'),'.',' ') 
    end)) prep(fn) 
cross apply (values (replace(rtrim(ltrim(replace(prep.fn,' ',' '))),' ',' '))) clean(fn) 
cross apply (values (len(clean.fn)-len(replace(clean.fn,' ','')))) total(spaces) -- count spaces 
cross apply (values (replace(clean.fn, ' ','.'))) dotted(fn); -- replace spaces with dots 

Das gibt

original     prepped    spaces 
------------------------ -------------------- ------- 
Malone,Susan M   Malone.Susan.M  2 
Conn,Chris G    Conn.Chris.G   2 
Van Pess,Wen B   Van.Pess.Wen.B  3 
DESHPANDE, ANN W.  DESHPANDE.ANN.W  2 
Asif,LEE     Asif.LEE    1 
CERVANTES MANDY   CERVANTES.MANDY  1 
Bill, Dave    Bill.Dave   1 
SMITH,ANN M    SMITH.ANN.M   2 
BHULLER, MATT   BHULLER.MATT   1 
KIM (DAUM), GAIL   KIM.GAIL    1 
John.Mills    John.Mills   1 

Der Rest kann getan werden ParseName mit etwa so:

--fullname sample data 
DECLARE @name TABLE 
(nameid int identity, fullname VARCHAR(100)) 
INSERT INTO @name SELECT 
'Malone,Susan M' UNION ALL SELECT --1 
'Conn,Chris G' UNION ALL SELECT --2 
'Van Pess,Wen B' UNION ALL SELECT --3 
'DESHPANDE, ANN W.' UNION ALL SELECT --4 
'Asif,LEE' UNION ALL SELECT --5 
'CERVANTES MANDY'UNION ALL SELECT --6 
'Bill, Dave' UNION ALL SELECT --7 
'SMITH,ANN M' UNION ALL SELECT --8 
'BHULLER, MATT' UNION ALL SELECT --9 
'KIM (DAUM), GAIL' UNION ALL SELECT --10 
'John.Mills';--11 

with clean as 
(
    select original = fullname, prepped = dotted.fn, total.spaces 
    from @name 
    cross apply (values (patindex('%(%),%',fullname),fullname)) prep1(x,fn) -- check for parentheses: 
    cross apply (values (-- remove parentheses if they exist, replace commas w/ dots, dots w/ spaces: 
    case 
     when prep1.x > 1 then substring(fn,1,x-1) + substring(fn,charindex(',',fn,x)+1,8000) 
     else replace(replace(fn, ',', '.'),'.',' ') 
    end)) prep(fn) 
    cross apply (values (replace(rtrim(ltrim(replace(prep.fn,' ',' '))),' ',' '))) clean(fn) 
    cross apply (values (len(clean.fn)-len(replace(clean.fn,' ','')))) total(spaces) -- count spaces 
    cross apply (values (replace(clean.fn, ' ','.'))) dotted(fn) 
) 
select original, cleaned = 
    case spaces 
    when 1 then parsename(prepped,1)+' '+parsename(prepped,2) 
    when 2 then parsename(prepped,2)+' '+parsename(prepped,1)+' '+parsename(prepped,3) 
    when 3 then parsename(prepped,2)+' '+parsename(prepped,1)+' '+parsename(prepped,3)+ 
      ' '+parsename(prepped,4) 
    end 
from clean 

Returns:

original    cleaned 
-------------------- ------------------ 
Malone,Susan M  Susan M Malone 
Conn,Chris G   Chris G Conn 
Van Pess,Wen B  Wen B Pess Van 
DESHPANDE, ANN W. ANN W DESHPANDE 
Asif,LEE    LEE Asif 
CERVANTES MANDY  MANDY CERVANTES 
Bill, Dave   Dave Bill 
SMITH,ANN M   ANN M SMITH 
BHULLER, MATT  MATT BHULLER 
KIM (DAUM), GAIL  GAIL KIM 
John.Mills   Mills John 

Alternativ können Sie eine regex clr (mdq.regexreplace) verwenden. Beachten Sie meine post on SSC vor ein paar Jahren.

+0

Hey Alan danke für die Hilfe! Was soll ich tun, wenn meine Daten bei Verwendung des Parsenamens mehr als 4 Punkte haben? – Jason312

+0

Wenn Sie mehr als 3 Teile haben, was ich gepostet habe, wird nicht funktionieren. Ich werde versuchen, heute/heute Abend zu schreiben, wie ich mit mehr als 3 Teilen umgehen soll. –