Resource Logic Client/Server Development Services

Bulk Import Split into Individual Records


  
The code below loads the 125Mb file into internal variable @allrecords. This is then split into records by searching for the 'next' chr(10), which is the end-of-record character.  Once a record has been split off, the offset is set to the position beyond the last end of record marker and the process is repeated, until the length of the incoming text file is exceeded.
  
From there, it is simple to substring the record into individual columns.  If a particular column has no value (the Code fields) they are left null.
  
Two updates are run, one to replace a sequence that is a space and open corner bracket, and an ascii 26 is converted to an 'n', under the presmption the original character was Spanish. Without these fixes, SQL queries that export these records as XML will fail.  It is possible that one or both of these minor defects prevent SSIS from completing a normal import.
  
CREATE procedure [dbo].[ParseFromBulkLoad]

as

declare @allrecords varchar(max);
declare @onerecord varchar(255);
declare @base int;
declare @offset int;
declare @maxcount int;

set nocount on

--create table EOPubTextRecords
--(
-- Id int IDENTITY(1,1) NOT NULL,
-- EOTextRecord varchar(255) NULL,
-- Scratchpad varchar(255) NULL,
-- OrganizationName varchar(106) NULL,
-- City varchar(31) NULL,
-- StateCode varchar(3) NULL,
-- Code1 varchar(1) NULL,
-- Code2 varchar(1) NULL,
-- Code3 varchar(1) NULL,
-- Code4 varchar(1) NULL,
-- Code5 varchar(1) NULL,
-- Code6 varchar(1) NULL
--)

truncate table EOPubTextRecords

select @allrecords = b.IncomingContents from EOPubBulkLoad b,
    (select MAX(bulkloadid) as maxid from EOPubBulkLoad) m
        where BulkLoadId = m.maxid

set @base = 1
set @maxcount = 1000000

recordgroup:
    set @offset = CHARINDEX(CHAR(10), @allrecords, @base)
    if @offset > 1
        begin
            set @onerecord = SUBSTRING(@allrecords, @base, @offset - @base)
            insert into EOPubTextRecords (EOTextRecord) values (@onerecord)
            set @base = @offset + 1
        select @maxcount = @maxcount - 1
    end
    else
        set @maxcount = 0
    if @maxcount > 0 goto recordgroup

update EOPubTextRecords set OrganizationName = rtrim(SUBSTRING(EOTextRecord, 1, 106))
update EOPubTextRecords set City = rtrim(SUBSTRING(EOTextRecord, 107, 31))
update EOPubTextRecords set StateCode = rtrim(SUBSTRING(EOTextRecord, 138, 3))
update EOPubTextRecords set Code1 = '1' where SUBSTRING(EOTextRecord, 141, 7) like '%1%'
update EOPubTextRecords set Code2 = '2' where SUBSTRING(EOTextRecord, 141, 7) like '%2%'
update EOPubTextRecords set Code3 = '3' where SUBSTRING(EOTextRecord, 141, 7) like '%3%'
update EOPubTextRecords set Code4 = '4' where SUBSTRING(EOTextRecord, 141, 7) like '%4%'
update EOPubTextRecords set Code5 = '5' where SUBSTRING(EOTextRecord, 141, 7) like '%5%'
update EOPubTextRecords set Code6 = '6' where SUBSTRING(EOTextRecord, 141, 7) like '%6%'
update EOPubTextRecords set OrganizationName = REPLACE(OrganizationName, ' <', '') where OrganizationName like '% <%'
update EOPubTextRecords set OrganizationName = REPLACE(OrganizationName, CHAR(26), 'n') where OrganizationName like '%' + CHAR(26) + '%';
  
  
Each of these stored procedures runs in about five minutes.