Lasso Soft Inc. > Home

[parse_csv]

Linkparse_csv
AuthorJohan Solve
CategoryFile
Version8.x
LicensePublic Domain
Posted25 Mar 2012
Updated25 Mar 2012
More by this author...

Description

Custom tag to parse text files with any field delimiters, for example tab, comma (the default), semicolon etc. It uses a state machine to be able to handle optional field delimiters, for example when a CSV file only has quotes around field values when a comma is present in the data. Outputs either array of arrays or array of maps. Note: to output array of maps, the first row of data must contain field names.

Sample Usage

parse_csv(file_read(filename.csv),
	-linebreak='\r\n',
	-fieldseparator=',',
	-fielddelimiter='"',
	-escapecharacter='\\',
	-firstrowfieldnames,
	-rowformat='map',
	-optionalfielddelimiters);

Source Code

Click the "Download" button below to retrieve a copy of this tag, including the complete documentation and sample usage shown on this page. Place the downloaded ".inc" file in your LassoStartup folder, restart Lasso, and you can begin using this tag immediately.

define_tag('parse_csv', -required='input'
	,	-optional='linebreak', -type='string', -copy
	,	-optional='fielddelimiter', -type='string', -copy
	,	-optional='fieldseparator', -type='string', -copy
	,	-optional='escapecharacter', -type='string', -copy
	,	-optional='optionaldelimiters', -copy
	,	-optional='firstrowfieldnames', -copy
	,	-optional='rowformat', -type='string', -copy
);	

	local(	'state'='out'
		,	'output'=array
		,	'row'=array
		,	'field'=string
		,	'char'=string
		,	'escaped'=false
		,	'unquoted'=false
		,	'fieldnames'=array
	);
	
	!local_defined('linebreak') ? local('linebreak'='\n');
	!local_defined('fielddelimiter') ? local('fielddelimiter'='"');
	!local_defined('fieldseparator') ? local('fieldseparator'=',');
	!local_defined('escapecharacter') ? local('escapecharacter'='\\');
	!local_defined('rowformat') ? local('rowformat'='array');
	local('optionaldelimiters'=(local_defined('optionaldelimiters') && #optionaldelimiters != false));
	local('firstrowfieldnames'=(local_defined('firstrowfieldnames') && #firstrowfieldnames != false));
	
	iterate(#input -> split(''), #char);
		select(#state);
		case('out');
			if(#char==#fielddelimiter);
				#state='in';
			else(#char==#fieldseparator);
				if(#firstrowfieldnames && #rowformat=='map' && #fieldnames -> size);
					if(#fieldnames -> size >= #row -> size + 1);
						#row -> insert(#fieldnames -> get(#row -> size + 1) = #field);
					/if;
				else;
					#row -> insert(#field);
				/if;
				#field=string;
			else(#char==#linebreak);
				if(#firstrowfieldnames && #rowformat=='map' && #fieldnames -> size);
					if(#fieldnames -> size >= #row -> size + 1);
						#row -> insert(#fieldnames -> get(#row -> size + 1) = #field);
					/if;
				else;
					#row -> insert(#field);
				/if;
				#output -> insert(#row);
				#field=string;
				#rowformat == 'map' ? 	#row = map | #row = array;
			else(#optionaldelimiters);
				#state='in';
				#unquoted=true;
				#field += #char;
			/if;
		case('in');
			if(!#escaped && #char == #escapecharacter);
				#escaped=true;
			else(#char==#fielddelimiter && !#escaped && !#unquoted);
				#state='out';
			else(#char==#fieldseparator && #optionaldelimiters && #unquoted && !#escaped);
				#state='out';
				#unquoted=false;
				if(#firstrowfieldnames && #rowformat=='map' && #fieldnames -> size);
					if(#fieldnames -> size >= #row -> size + 1);
						#row -> insert(#fieldnames -> get(#row -> size + 1) = #field);
					/if;
				else;
					#row -> insert(#field);
				/if;
				#field=string;			
			else(#char==#linebreak && #optionaldelimiters && #unquoted && !#escaped);
				#state='out';
				#unquoted=false;
				if(#firstrowfieldnames && #rowformat=='map' && #fieldnames -> size);
					if(#fieldnames -> size >= #row -> size + 1);
						#row -> insert(#fieldnames -> get(#row -> size + 1) = #field);
					/if;
				else;
					#row -> insert(#field);
				/if;
				#output -> insert(#row);
				#field=string;
				#rowformat == 'map' ? 	#row = map | #row = array;
				#field=string;			
			else;
				#field += #char;
				#escaped=false;
			/if;
		/select;
		if(#firstrowfieldnames && #output -> size == 1 && #fieldnames -> size == 0);
			#fieldnames = #output -> get(1);
			#output -> remove(1);
		else(!#firstrowfieldnames);
			#rowformat='array';
		/if;
	/iterate;
	if(#field -> size);
		if(#firstrowfieldnames && #rowformat=='map' && #fieldnames -> size);
			if(#fieldnames -> size >= #row -> size + 1);
				#row -> insert(#fieldnames -> get(#row -> size + 1) = #field);
			/if;
		else;
			#row -> insert(#field);
		/if;
	/if;
	#row -> size ? #output -> insert(#row);

	return(#output);

/define_tag;

Related Tags

Comments

No comments

Please log in to comment

Subscribe to the LassoTalk mail list

LassoSoft Inc. > Home

 

 

©LassoSoft Inc 2015 | Web Development by Treefrog Inc | PrivacyLegal terms and Shipping | Contact LassoSoft