|
|
|
Resource Logic Client/Server Development Services
Bulk Import Split into Individual Records
|
|
|
The code below loads the 125Mb file into internal variable @allrecords. This is
then split into records by searching for the 'next' chr(10), which is the
end-of-record character. Once a record has been split off, the offset is
set to the position beyond the last end of record marker and the process is
repeated, until the length of the incoming text file is exceeded. |
|
From there, it is simple to substring the record into individual columns.
If a particular column has no value (the Code fields) they are left null.
|
|
Two updates are run, one to replace a sequence that is a space and open corner
bracket, and an ascii 26 is converted to an 'n', under the presmption the
original character was Spanish. Without these fixes, SQL queries that export
these records as XML will fail. It is possible that one or both of these
minor defects prevent SSIS from completing a normal import. |
|
|
CREATE procedure [dbo].[ParseFromBulkLoad]
as
declare @allrecords varchar(max);
declare @onerecord varchar(255);
declare @base int;
declare @offset int;
declare @maxcount int;
set nocount on
--create table EOPubTextRecords
--(
-- Id int IDENTITY(1,1) NOT NULL,
-- EOTextRecord varchar(255) NULL,
-- Scratchpad varchar(255) NULL,
-- OrganizationName varchar(106) NULL,
-- City varchar(31) NULL,
-- StateCode varchar(3) NULL,
-- Code1 varchar(1) NULL,
-- Code2 varchar(1) NULL,
-- Code3 varchar(1) NULL,
-- Code4 varchar(1) NULL,
-- Code5 varchar(1) NULL,
-- Code6 varchar(1) NULL
--)
truncate table EOPubTextRecords
select @allrecords = b.IncomingContents from EOPubBulkLoad b,
(select MAX(bulkloadid) as maxid from EOPubBulkLoad) m
where BulkLoadId = m.maxid
set @base = 1
set @maxcount = 1000000
recordgroup:
set @offset = CHARINDEX(CHAR(10), @allrecords, @base)
if @offset > 1
begin
set @onerecord = SUBSTRING(@allrecords, @base, @offset - @base)
insert into EOPubTextRecords (EOTextRecord) values (@onerecord)
set @base = @offset + 1
select @maxcount = @maxcount - 1
end
else
set @maxcount = 0
if @maxcount > 0 goto recordgroup
update EOPubTextRecords set OrganizationName = rtrim(SUBSTRING(EOTextRecord, 1,
106))
update EOPubTextRecords set City = rtrim(SUBSTRING(EOTextRecord, 107, 31))
update EOPubTextRecords set StateCode = rtrim(SUBSTRING(EOTextRecord, 138, 3))
update EOPubTextRecords set Code1 = '1' where SUBSTRING(EOTextRecord, 141, 7)
like '%1%'
update EOPubTextRecords set Code2 = '2' where SUBSTRING(EOTextRecord, 141, 7)
like '%2%'
update EOPubTextRecords set Code3 = '3' where SUBSTRING(EOTextRecord, 141, 7)
like '%3%'
update EOPubTextRecords set Code4 = '4' where SUBSTRING(EOTextRecord, 141, 7)
like '%4%'
update EOPubTextRecords set Code5 = '5' where SUBSTRING(EOTextRecord, 141, 7)
like '%5%'
update EOPubTextRecords set Code6 = '6' where SUBSTRING(EOTextRecord, 141, 7)
like '%6%'
update EOPubTextRecords set OrganizationName = REPLACE(OrganizationName, ' <',
'') where OrganizationName like '% <%'
update EOPubTextRecords set OrganizationName = REPLACE(OrganizationName, CHAR(26), 'n') where OrganizationName like '%' + CHAR(26) + '%';
|
|
|
|
Each of these stored procedures runs in about five minutes.
|
|
|
|
|