Loop files bug?
Hi all,
I'm trying to use a loop files operator and append over a series of .csv files. However, when trying to append I get an error message that the attribute 'Telephone1' does not have the same data type in all my files, which is correct (in some it is nominal while in other a real value). I therefore added the 'parse numbers' operator into the loop files to change the data type of 'Telephone1', but when I do that, I get an error that the attribute does not exist.
-I've checked with breakpoints, but the attribute does exist in every uploaded .csv file
-If tried if there was somehow a whitespace after the variable which may have caused the problem, this is not the case (even if I copy the attribute name from my results view, I get the same error).
-I have tried to use other operators to see if they worked: if I do a 'select attributes' with 'Telephone1' RM does not give an error message and the attribute is select and gives a valid output, however, if I used the branch operator (and in it the parse numbers) because I only want the numbers parsed if 'Telephone1' is recognised as a nominal value it doesn't work either.
Anyone has an idea what might be the issue? I've included my XML below.
<operator activated="true" class="parse_numbers" compatibility="7.5.003" expanded="true" height="82" name="Parse Numbers" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Telephone2|Mobile2|Telephone1|Mobile1"/>
</operator>
<connect from_port="file object" to_op="Read CSV" to_port="file"/>
<connect from_op="Read CSV" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Parse Numbers" to_port="example set input"/>
<connect from_op="Parse Numbers" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Getting the data from SFTP of agency</description>
</operator>
<operator activated="true" breakpoints="after" class="append" compatibility="7.5.003" expanded="true" height="82" name="Append" width="90" x="246" y="289"/>
<operator activated="true" breakpoints="after" class="select_subprocess" compatibility="7.5.003" expanded="true" height="82" name="Check uploaded" width="90" x="380" y="289">
<process expanded="true">
<operator activated="true" class="jdbc_connectors:read_database" compatibility="7.5.003" expanded="true" height="68" name="Read Database" width="90" x="45" y="136">
<parameter key="connection" value="DDmachine2"/>
<parameter key="define_query" value="table name"/>
<parameter key="table_name" value="DDstreetimport_results_handled"/>
<enumeration key="parameters"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="136">
<list key="filters_list">
<parameter key="filters_entry_key" value="DDstreetimport_datasource_typename.equals."DR csv Streetimport""/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.5.003" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="136">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="DDstreetimport_datasource_typename|DDstreetimport_datasource_batchname"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.5.003" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="447" y="136">
<list key="function_descriptions">
<parameter key="already_upload" value=""Y""/>
</list>
</operator>
<operator activated="true" class="join" compatibility="7.5.003" expanded="true" height="82" name="Join" width="90" x="581" y="34">
<parameter key="join_type" value="left"/>
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="ddstreetimport_datasource_batchname" value="DDstreetimport_datasource_batchname"/>
<parameter key="ddstreetimport_datasource_typename" value="DDstreetimport_datasource_typename"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples (2)" width="90" x="715" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="already_upload.does_not_equal.Y"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.5.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="849" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="already_upload"/>
<parameter key="invert_selection" value="true"/>
</operator>
<connect from_port="input 1" to_op="Join" to_port="left"/>
<connect from_op="Read Database" from_port="output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="original" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<process expanded="true">
<connect from_port="input 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">1: check if already upload and take only not upload files.<br>2: do not check and take all files.</description>
</operator>
<operator activated="true" class="select_subprocess" compatibility="7.5.003" expanded="true" height="82" name="Cleaning data" width="90" x="715" y="289">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.5.003" expanded="true" height="82" name="data cleaning" width="90" x="45" y="34">
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Cleansing some data issues as they come from the streetimport</description>
</operator>
<operator activated="true" class="subprocess" compatibility="7.5.003" expanded="true" height="82" name="internal resultcodes" width="90" x="179" y="34">
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="7.5.003" expanded="true" height="82" name="update contact_id" width="90" x="313" y="34">
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="data cleaning" to_port="in 1"/>
<connect from_op="data cleaning" from_port="out 1" to_op="internal resultcodes" to_port="in 1"/>
<connect from_op="internal resultcodes" from_port="out 1" to_op="update contact_id" to_port="in 1"/>
<connect from_op="update contact_id" from_port="out 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<process expanded="true">
<connect from_port="input 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">cleans data from streetimport, only if it is new data</description>
</operator>
<connect from_port="input 1" to_op="Set Macros" to_port="through 1"/>
<connect from_op="Set Macros" from_port="through 1" to_op="Set Macro (2)" to_port="through 1"/>
<connect from_op="Loop Files" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Check uploaded" to_port="input 1"/>
<connect from_op="Check uploaded" from_port="output 1" to_op="Cleaning data" to_port="input 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<description align="center" color="blue" colored="true" height="50" resized="true" width="403" x="428" y="30">Getting and processing streetimport files we get from agency</description>
<description align="center" color="blue" colored="true" height="249" resized="true" width="166" x="10" y="254">Upload from agency</description>
<description align="center" color="green" colored="true" height="248" resized="true" width="654" x="179" y="254">Preprocessing</description>
<description align="center" color="orange" colored="true" height="252" resized="true" width="364" x="836" y="251">Loading to RMserver</description>
</process>
</operator>
</process>
Best Answer
-
sgenzer Administrator, Moderator, Employee-RapidMiner, RapidMiner Certified Analyst, Community Manager, Member, University Professor, PM Moderator Posts: 2,959 Community Manager
hi @lghansse - yes I see what's going on. So the parameters on your Read CSV were not set past the defaults, so RapidMiner was looking at Telephone 1 and seeing that it was numerical, made it Real. Parse Numbers only looks for nominal/polynominal attributes of course, so says it cannot find anything.
The fix is to just set the metadata properly in the Read CSV operator. See my process below.
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="set_macros" compatibility="8.0.001" expanded="true" height="82" name="Set Macros" width="90" x="45" y="34">
<list key="macros">
<parameter key="agency_id" value="136133"/>
<parameter key="checkuploaded" value="1"/>
</list>
<description align="center" color="transparent" colored="false" width="126">agency details</description>
</operator>
<operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Macro (2)" width="90" x="179" y="34">
<parameter key="macro" value="storage_test"/>
<parameter key="value" value="0"/>
<description align="center" color="transparent" colored="false" width="126">0: live data fully operational<br>1: historic data in tables, for sake of completnes that wasn't processed in the new system</description>
</operator>
<operator activated="true" class="concurrency:loop_files" compatibility="8.0.001" expanded="true" height="82" name="Loop Files" width="90" x="45" y="289">
<parameter key="directory" value="/Users/GenzerConsulting/Desktop/Ighansse"/>
<parameter key="filter_type" value="regex"/>
<parameter key="filter_by_regex" value=".+Tablet_DAILY.+"/>
<parameter key="enable_macros" value="true"/>
<parameter key="enable_parallel_execution" value="false"/>
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="8.0.001" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34">
<parameter key="csv_file" value="/Users/GenzerConsulting/Desktop/Ighansse/aTablet_DAILY4.csv"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="encoding" value="UTF-8"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="Loading_type.true.integer.attribute"/>
<parameter key="1" value="Recruiting_organization_id.true.integer.attribute"/>
<parameter key="2" value="Recruiter_id.true.polynominal.attribute"/>
<parameter key="3" value="Recruiter_First_name.true.attribute_value.attribute"/>
<parameter key="4" value="Recruiter_Last_name.true.attribute_value.attribute"/>
<parameter key="5" value="Recruiter_prefix.true.attribute_value.attribute"/>
<parameter key="6" value="Organization_yesno.true.polynominal.attribute"/>
<parameter key="7" value="campaign_id.true.integer.attribute"/>
<parameter key="8" value="Prefix.true.polynominal.attribute"/>
<parameter key="9" value="Donor_id.true.integer.attribute"/>
<parameter key="10" value="First_name.true.polynominal.attribute"/>
<parameter key="11" value="Last_name.true.polynominal.attribute"/>
<parameter key="12" value="Street_name.true.polynominal.attribute"/>
<parameter key="13" value="Street_Number.true.integer.attribute"/>
<parameter key="14" value="Street_Unit.true.polynominal.attribute"/>
<parameter key="15" value="Postal_code.true.integer.attribute"/>
<parameter key="16" value="City.true.polynominal.attribute"/>
<parameter key="17" value="Country.true.polynominal.attribute"/>
<parameter key="18" value="Birth_date.true.polynominal.attribute"/>
<parameter key="19" value="Telephone1.true.nominal.attribute"/>
<parameter key="20" value="Telephone2.true.nominal.attribute"/>
<parameter key="21" value="Mobile1.true.polynominal.attribute"/>
<parameter key="22" value="Mobile2.true.polynominal.attribute"/>
<parameter key="23" value="Email.true.attribute_value.attribute"/>
<parameter key="24" value="IBAN.true.attribute_value.attribute"/>
<parameter key="25" value="Bic.true.attribute_value.attribute"/>
<parameter key="26" value="Bank_name.true.attribute_value.attribute"/>
<parameter key="27" value="Recruitment_date.true.attribute_value.attribute"/>
<parameter key="28" value="Import_date.true.attribute_value.attribute"/>
<parameter key="29" value="Newsletter.true.attribute_value.attribute"/>
<parameter key="30" value="Member.true.attribute_value.attribute"/>
<parameter key="31" value="Interests.true.attribute_value.attribute"/>
<parameter key="32" value="Recruitment_location.true.attribute_value.attribute"/>
<parameter key="33" value="Notes.true.attribute_value.attribute"/>
<parameter key="34" value="Mandate_reference.true.attribute_value.attribute"/>
<parameter key="35" value="Amount.true.attribute_value.attribute"/>
<parameter key="36" value="Frequency_unit.true.attribute_value.attribute"/>
<parameter key="37" value="Frequency_interval.true.attribute_value.attribute"/>
<parameter key="38" value="Start_date.true.attribute_value.attribute"/>
<parameter key="39" value="End_date.true.attribute_value.attribute"/>
<parameter key="40" value="Cancellation.true.attribute_value.attribute"/>
<parameter key="41" value="Followup_call.true.attribute_value.attribute"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="8.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="ddstreetimport_datasource_batchname" value="%{file_name}"/>
<parameter key="ddstreetimport_datasource_typename" value=""DR csv Streetimport""/>
</list>
</operator>
<operator activated="true" class="parse_numbers" compatibility="8.0.001" expanded="true" height="82" name="Parse Numbers" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Telephone2|Mobile2|Telephone1|Mobile1"/>
</operator>
<connect from_port="file object" to_op="Read CSV" to_port="file"/>
<connect from_op="Read CSV" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Parse Numbers" to_port="example set input"/>
<connect from_op="Parse Numbers" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Getting the data from SFTP of agency</description>
</operator>
<operator activated="true" breakpoints="after" class="append" compatibility="8.0.001" expanded="true" height="82" name="Append" width="90" x="246" y="289"/>
<operator activated="true" breakpoints="after" class="select_subprocess" compatibility="8.0.001" expanded="true" height="82" name="Check uploaded" width="90" x="380" y="289">
<process expanded="true">
<operator activated="true" class="jdbc_connectors:read_database" compatibility="8.0.001" expanded="true" height="68" name="Read Database" width="90" x="45" y="136">
<parameter key="connection" value="DDmachine2"/>
<parameter key="define_query" value="table name"/>
<parameter key="table_name" value="DDstreetimport_results_handled"/>
<enumeration key="parameters"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="136">
<list key="filters_list">
<parameter key="filters_entry_key" value="DDstreetimport_datasource_typename.equals."DR csv Streetimport""/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="136">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="DDstreetimport_datasource_typename|DDstreetimport_datasource_batchname"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="8.0.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="447" y="136">
<list key="function_descriptions">
<parameter key="already_upload" value=""Y""/>
</list>
</operator>
<operator activated="true" class="join" compatibility="8.0.001" expanded="true" height="82" name="Join" width="90" x="581" y="34">
<parameter key="join_type" value="left"/>
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="ddstreetimport_datasource_batchname" value="DDstreetimport_datasource_batchname"/>
<parameter key="ddstreetimport_datasource_typename" value="DDstreetimport_datasource_typename"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples (2)" width="90" x="715" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="already_upload.does_not_equal.Y"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="849" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="already_upload"/>
<parameter key="invert_selection" value="true"/>
</operator>
<connect from_port="input 1" to_op="Join" to_port="left"/>
<connect from_op="Read Database" from_port="output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="original" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<process expanded="true">
<connect from_port="input 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">1: check if already upload and take only not upload files.<br>2: do not check and take all files.</description>
</operator>
<operator activated="true" class="select_subprocess" compatibility="8.0.001" expanded="true" height="82" name="Cleaning data" width="90" x="715" y="289">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="data cleaning" width="90" x="45" y="34">
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Cleansing some data issues as they come from the streetimport</description>
</operator>
<operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="internal resultcodes" width="90" x="179" y="34">
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="update contact_id" width="90" x="313" y="34">
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="data cleaning" to_port="in 1"/>
<connect from_op="data cleaning" from_port="out 1" to_op="internal resultcodes" to_port="in 1"/>
<connect from_op="internal resultcodes" from_port="out 1" to_op="update contact_id" to_port="in 1"/>
<connect from_op="update contact_id" from_port="out 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<process expanded="true">
<connect from_port="input 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">cleans data from streetimport, only if it is new data</description>
</operator>
<connect from_port="input 1" to_op="Set Macros" to_port="through 1"/>
<connect from_op="Set Macros" from_port="through 1" to_op="Set Macro (2)" to_port="through 1"/>
<connect from_op="Loop Files" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Check uploaded" to_port="input 1"/>
<connect from_op="Check uploaded" from_port="output 1" to_op="Cleaning data" to_port="input 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<description align="center" color="blue" colored="true" height="50" resized="true" width="403" x="428" y="30">Getting and processing streetimport files we get from agency</description>
<description align="center" color="blue" colored="true" height="249" resized="true" width="166" x="10" y="254">Upload from agency</description>
<description align="center" color="green" colored="true" height="248" resized="true" width="654" x="179" y="254">Preprocessing</description>
<description align="center" color="orange" colored="true" height="252" resized="true" width="364" x="836" y="251">Loading to RMserver</description>
</process>
</operator>
</process>Scott
1
Answers
Hi @lghansse,
Can you verify your XML code, it seems that some parts of code are missing. To be sure, to copy the whole XML code :
- click on the XML panel
- Control+A (to be sure to copy the whole XML code) then Control + C,
Can you too share some of your .csv file ?
Regards,
Lionel
Sorry, missed some part of the XML when copying. This should be correct. I can't really upload the .csv files since they contain personal data. I've included an anonimized example of one my files.
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.5.003" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="set_macros" compatibility="7.5.003" expanded="true" height="82" name="Set Macros" width="90" x="45" y="34">
<list key="macros">
<parameter key="agency_id" value="136133"/>
<parameter key="checkuploaded" value="1"/>
</list>
<description align="center" color="transparent" colored="false" width="126">agency details</description>
</operator>
<operator activated="true" class="set_macro" compatibility="7.5.003" expanded="true" height="82" name="Set Macro (2)" width="90" x="179" y="34">
<parameter key="macro" value="storage_test"/>
<parameter key="value" value="0"/>
<description align="center" color="transparent" colored="false" width="126">0: live data fully operational<br>1: historic data in tables, for sake of completnes that wasn't processed in the new system</description>
</operator>
<operator activated="true" class="concurrency:loop_files" compatibility="7.5.003" expanded="true" height="82" name="Loop Files" width="90" x="45" y="289">
<parameter key="directory" value="C:\Users\lise.hanssens\Documents\Lise\Rapidminer\Data_voorRM\Test ddmachine2"/>
<parameter key="filter_type" value="regex"/>
<parameter key="filter_by_regex" value=".+Tablet_DAILY.+"/>
<parameter key="enable_macros" value="true"/>
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="7.5.003" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34">
<list key="annotations"/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.5.003" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="ddstreetimport_datasource_batchname" value="%{file_name}"/>
<parameter key="ddstreetimport_datasource_typename" value=""DR csv Streetimport""/>
</list>
</operator>
<operator activated="true" class="parse_numbers" compatibility="7.5.003" expanded="true" height="82" name="Parse Numbers" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Telephone2|Mobile2|Telephone1|Mobile1"/>
</operator>
<connect from_port="file object" to_op="Read CSV" to_port="file"/>
<connect from_op="Read CSV" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Parse Numbers" to_port="example set input"/>
<connect from_op="Parse Numbers" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Getting the data from SFTP of agency</description>
</operator>
<operator activated="true" breakpoints="after" class="append" compatibility="7.5.003" expanded="true" height="82" name="Append" width="90" x="246" y="289"/>
<operator activated="true" breakpoints="after" class="select_subprocess" compatibility="7.5.003" expanded="true" height="82" name="Check uploaded" width="90" x="380" y="289">
<process expanded="true">
<operator activated="true" class="jdbc_connectors:read_database" compatibility="7.5.003" expanded="true" height="68" name="Read Database" width="90" x="45" y="136">
<parameter key="connection" value="DDmachine2"/>
<parameter key="define_query" value="table name"/>
<parameter key="table_name" value="DDstreetimport_results_handled"/>
<enumeration key="parameters"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="136">
<list key="filters_list">
<parameter key="filters_entry_key" value="DDstreetimport_datasource_typename.equals."DR csv Streetimport""/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.5.003" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="136">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="DDstreetimport_datasource_typename|DDstreetimport_datasource_batchname"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.5.003" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="447" y="136">
<list key="function_descriptions">
<parameter key="already_upload" value=""Y""/>
</list>
</operator>
<operator activated="true" class="join" compatibility="7.5.003" expanded="true" height="82" name="Join" width="90" x="581" y="34">
<parameter key="join_type" value="left"/>
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="ddstreetimport_datasource_batchname" value="DDstreetimport_datasource_batchname"/>
<parameter key="ddstreetimport_datasource_typename" value="DDstreetimport_datasource_typename"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples (2)" width="90" x="715" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="already_upload.does_not_equal.Y"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.5.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="849" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="already_upload"/>
<parameter key="invert_selection" value="true"/>
</operator>
<connect from_port="input 1" to_op="Join" to_port="left"/>
<connect from_op="Read Database" from_port="output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="original" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<process expanded="true">
<connect from_port="input 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">1: check if already upload and take only not upload files.<br>2: do not check and take all files.</description>
</operator>
<operator activated="true" class="select_subprocess" compatibility="7.5.003" expanded="true" height="82" name="Cleaning data" width="90" x="715" y="289">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.5.003" expanded="true" height="82" name="data cleaning" width="90" x="45" y="34">
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Cleansing some data issues as they come from the streetimport</description>
</operator>
<operator activated="true" class="subprocess" compatibility="7.5.003" expanded="true" height="82" name="internal resultcodes" width="90" x="179" y="34">
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="7.5.003" expanded="true" height="82" name="update contact_id" width="90" x="313" y="34">
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="data cleaning" to_port="in 1"/>
<connect from_op="data cleaning" from_port="out 1" to_op="internal resultcodes" to_port="in 1"/>
<connect from_op="internal resultcodes" from_port="out 1" to_op="update contact_id" to_port="in 1"/>
<connect from_op="update contact_id" from_port="out 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<process expanded="true">
<connect from_port="input 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">cleans data from streetimport, only if it is new data</description>
</operator>
<connect from_port="input 1" to_op="Set Macros" to_port="through 1"/>
<connect from_op="Set Macros" from_port="through 1" to_op="Set Macro (2)" to_port="through 1"/>
<connect from_op="Loop Files" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Check uploaded" to_port="input 1"/>
<connect from_op="Check uploaded" from_port="output 1" to_op="Cleaning data" to_port="input 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<description align="center" color="blue" colored="true" height="50" resized="true" width="403" x="428" y="30">Getting and processing streetimport files we get from agency</description>
<description align="center" color="blue" colored="true" height="249" resized="true" width="166" x="10" y="254">Upload from agency</description>
<description align="center" color="green" colored="true" height="248" resized="true" width="654" x="179" y="254">Preprocessing</description>
<description align="center" color="orange" colored="true" height="252" resized="true" width="364" x="836" y="251">Loading to RMserver</description>
</process>
</operator>
</process>
hi @lghansse - welcome to the community. Just a friendly reminder to also use the </> button when you post code. It cleans up the post, and does not turn ; ) into smiley emojis.
Scott
Hi @lghansse,
I can not reproduce your error.
I'm importing your csv file by choosing 'Telephone 1' as nominal, then I write the file : data2.csv
When I lauch the process, data1.csv (with 'telephone 1' as real) and data2.csv (with 'telephone 2' as nominal) are good appended with no error.
a possible track : Try the Loop Files (Advanced) operator of the Jackhammer Extension.
Regards,
Lionel
@sgenzer Thanks for helping!