Concatenation duplication

asn4293 · March 2018

I want to remove duplication of the data after using Concat operator

Original Data

Id	name	skills	department	language	experience
1	John	x	Fin	FR	3
2	Mickeal	y	HR	ENG	5
3	Sean	y	HR	ENG	1
1	John	x	Fin	ENG	3

Should Look Like

Id	name	skills	department	language	experience
1	John	x	Fin	FR\|ENG	3
2	Mickeal	y	HR	ENG	5
3	Sean	y	HR	ENG	1

But After using Concat it is looking like

Id	name	skills	department	language	experience
1	John\|John	x\|x	Fin\|Fin	FR\|ENG	3
2	Mickeal	y	HR	ENG	5
3	Sean	y	HR	ENG	1

<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve" width="90" x="112" y="85">
        <parameter key="repository_entry" value="//Local Repository/data/Employees"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.0.001" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
        <parameter key="attribute_name" value="Id"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate" width="90" x="447" y="85">
        <list key="aggregation_attributes">
          <parameter key="department" value="concatenation"/>
          <parameter key="language" value="concatenation"/>
          <parameter key="name" value="concatenation"/>
          <parameter key="skills" value="concatenation"/>
        </list>
        <parameter key="group_by_attributes" value="Id"/>
      </operator>
      <connect from_op="Retrieve" from_port="output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

This is file you can work on
https://drive.google.com/open?id=1evx7JGzVmx0f6VO1fNMFG5tBGmB-xKVN

Edin_Klapic · May 2018

Hi @asn4293,

For my tests, I created an Excel file only with the last table you posted which is the equivalent of your dataset but without the ID.

Below you find the XML for a working process with your dataset.

By using the Operator Work on Subset and only selecting the Mobility Region for that you can overcome the issue with the User Sys ID.

Happy Mining,

Edin

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
        <parameter key="excel_file" value="C:\Users\EdinKlapic\Desktop\MobilityConcat.xlsx"/>
        <parameter key="imported_cell_range" value="A1:B14065"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="User Sys ID.true.integer.id"/>
          <parameter key="1" value="Mobility Region.true.polynominal.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="loop_examples" compatibility="8.1.003" expanded="true" height="103" name="Loop Examples" width="90" x="246" y="34">
        <process expanded="true">
          <operator activated="false" breakpoints="after" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates" width="90" x="581" y="595"/>
          <operator activated="false" class="numerical_to_polynominal" compatibility="8.1.003" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="45" y="187">
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="false" class="filter_example_range" compatibility="8.1.003" expanded="true" height="82" name="Filter Example Range" width="90" x="380" y="136">
            <parameter key="first_example" value="%{example}"/>
            <parameter key="last_example" value="%{example}"/>
          </operator>
          <operator activated="false" class="split" compatibility="8.1.003" expanded="true" height="82" name="Split" width="90" x="514" y="136">
            <parameter key="split_pattern" value="\|"/>
          </operator>
          <operator activated="false" class="de_pivot" compatibility="8.1.003" expanded="true" height="82" name="De-Pivot" width="90" x="648" y="136">
            <list key="attribute_name">
              <parameter key="Region" value=".*"/>
            </list>
            <parameter key="index_attribute" value="ID"/>
            <parameter key="create_nominal_index" value="true"/>
          </operator>
          <operator activated="false" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates (2)" width="90" x="313" y="289">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="Region"/>
          </operator>
          <operator activated="false" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="289">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="Region"/>
          </operator>
          <operator activated="false" breakpoints="after" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose" width="90" x="581" y="289"/>
          <operator activated="false" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate" width="90" x="715" y="289">
            <list key="aggregation_attributes">
              <parameter key="att_1" value="concatenation"/>
            </list>
            <parameter key="only_distinct" value="true"/>
          </operator>
          <operator activated="true" class="filter_example_range" compatibility="8.1.003" expanded="true" height="82" name="Filter Example Range (3)" width="90" x="45" y="34">
            <parameter key="first_example" value="%{example}"/>
            <parameter key="last_example" value="%{example}"/>
          </operator>
          <operator activated="true" class="work_on_subset" compatibility="8.1.003" expanded="true" height="82" name="Work on Subset" width="90" x="179" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="Mobility Region"/>
            <parameter key="deliver_inner_results" value="true"/>
            <process expanded="true">
              <operator activated="true" class="split" compatibility="8.1.003" expanded="true" height="82" name="Split (2)" width="90" x="45" y="34">
                <parameter key="split_pattern" value="\|"/>
              </operator>
              <operator activated="true" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose (2)" width="90" x="179" y="34"/>
              <operator activated="true" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates (3)" width="90" x="313" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="att_1"/>
                <parameter key="include_special_attributes" value="true"/>
              </operator>
              <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="447" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="id"/>
                <parameter key="invert_selection" value="true"/>
                <parameter key="include_special_attributes" value="true"/>
              </operator>
              <operator activated="true" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate (2)" width="90" x="581" y="34">
                <list key="aggregation_attributes">
                  <parameter key="att_1" value="concatenation"/>
                </list>
                <parameter key="only_distinct" value="true"/>
              </operator>
              <operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename" width="90" x="715" y="34">
                <parameter key="old_name" value="concat(att_1)"/>
                <parameter key="new_name" value="Mobility Region"/>
                <list key="rename_additional_attributes"/>
              </operator>
              <connect from_port="exampleSet" to_op="Split (2)" to_port="example set input"/>
              <connect from_op="Split (2)" from_port="example set output" to_op="Transpose (2)" to_port="example set input"/>
              <connect from_op="Transpose (2)" from_port="example set output" to_op="Remove Duplicates (3)" to_port="example set input"/>
              <connect from_op="Remove Duplicates (3)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
              <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
              <connect from_op="Aggregate (2)" from_port="example set output" to_op="Rename" to_port="example set input"/>
              <connect from_op="Rename" from_port="example set output" to_port="example set"/>
              <portSpacing port="source_exampleSet" spacing="0"/>
              <portSpacing port="sink_example set" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
          </operator>
          <connect from_port="example set" to_op="Filter Example Range (3)" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="example set output" to_op="Split" to_port="example set input"/>
          <connect from_op="Split" from_port="example set output" to_op="De-Pivot" to_port="example set input"/>
          <connect from_op="De-Pivot" from_port="example set output" to_op="Remove Duplicates (2)" to_port="example set input"/>
          <connect from_op="Remove Duplicates (2)" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Transpose" to_port="example set input"/>
          <connect from_op="Transpose" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Filter Example Range (3)" from_port="example set output" to_op="Work on Subset" to_port="example set"/>
          <connect from_op="Work on Subset" from_port="example set" to_port="output 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append" width="90" x="447" y="34"/>
      <connect from_op="Read Excel" from_port="output" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="output 1" to_op="Append" to_port="example set 1"/>
      <connect from_op="Append" from_port="merged set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

kayman · March 2018

Use the replace operator and add following regex

^(.*?)\|.*$

and replace with :

$1

This will only keep the part from the beginning untill the first pipe symbol

asn4293 · March 2018

It did not work as it removed ENG also. It is looking like this

1	1.0	Fin	FR	x	John
2	2.0	HR	ENG	y	Mickeal
3	3.0	HR	ENG	y	Sean

Should Look Like

Id	name	skills	department	language	experience
1	John	x	Fin	FR\|ENG	3
2	Mickeal	y	HR	ENG	5
3	Sean	y	HR	ENG	1

Thomas_Ott · March 2018

@asn4293 does not the Aggregate operator work for you? You can Group by Name and Language?

asn4293 · March 2018

@Thomas_Ott
The data which is mentioned is sample set not the actual data.

Actual data is having more than 40 attributes and 100,000 rows.
When I use ConCat I don't even know except ID which attributes will have same values, So can not use other attributes using aggregate operator what will be the other values be like. for example in one row I have LogisticsI Logistics I Logistics It should have been like one Logistics as it is same.

Logistics|Logistics|Logistics

Logisztikai koordinátor|Disztribúciós koordinátor|Logisztikai asszisztens

Double Decket Kft.|Art Media Kft.|Gate Media International Kft.

Not Imported|Not Imported|Not Imported

Koordinátor|Asszisztens|Asszisztens

Logistics

Warehouse and Distribution Manager

N/A

Supply Chain Management / Logistics (SC)

Logistics Coordinator

Yes

Hungary

EMEA Europe/Middle East/Africa

English

The only Solution I could think of is to remove every duplication in each row rather than whole data. Would have been perfect if Aggregate removes all concat duplicates and keep only one.

Hope it helped you to understand it better.

BalazsBarany · March 2018

Hi!

You could use Aggregate with the default aggregation and group by ID.

Then it would give you the concatenated values (if you select that as the default aggregation function).

You would apply the regexp replace solution to just the fields you want (or use a variant that just removes identical repetitions).

Regards,

Balázs

asn4293 · May 2018

Americas|EMEA Europe/Middle East/Africa|APAC Asia/Pacific|EMEA Europe/Middle East/Africa

Currently not mobile

Americas

EMEA Europe/Middle East/Africa

Americas

These are the fields in the columns I want to restrict it to only unique values can it be done if so please help me.

Results Should Look Like This

Americas|APAC Asia/Pacific|EMEA Europe/Middle East/Africa

Currently not mobile

Americas

EMEA Europe/Middle East/Africa

Americas

EMEA Europe/Middle East/Africa|APAC Asia/Pacific|Americas

asn4293 · May 2018

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel" width="90" x="112" y="85">
        <parameter key="excel_file" value="C:\Users\shahida1\Desktop\Mobility.xlsx"/>
        <parameter key="imported_cell_range" value="A1:B18473"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="User Sys ID.true.integer.id"/>
          <parameter key="1" value="Region.true.polynominal.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role" width="90" x="246" y="85">
        <parameter key="attribute_name" value="User Sys ID"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="8.1.000" expanded="true" height="82" name="Aggregate" width="90" x="380" y="85">
        <parameter key="attribute_filter_type" value="regular_expression"/>
        <parameter key="regular_expression" value="\b(\w+)(?:,\s+\1\b)+."/>
        <parameter key="default_aggregation_function" value="concatenation"/>
        <list key="aggregation_attributes">
          <parameter key="Region" value="concatenation"/>
        </list>
        <parameter key="group_by_attributes" value="User Sys ID"/>
        <parameter key="only_distinct" value="true"/>
      </operator>
      <operator activated="true" class="replace" compatibility="8.1.000" expanded="true" height="82" name="Replace (2)" width="90" x="112" y="442">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="concat(Region)"/>
        <parameter key="replace_what" value="\b(\w+)(?:\s+\1\b)+"/>
        <parameter key="replace_by" value="$1"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role (3)" width="90" x="514" y="340">
        <parameter key="attribute_name" value="User Sys ID"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <connect from_op="Read Excel" from_port="output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_op="Replace (2)" to_port="example set input"/>
      <connect from_op="Replace (2)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
      <connect from_op="Set Role (3)" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Hi @Thomas_Ott @lionelderkrikor @sgenzer,

Can you please help me with regex I copied from web to remove exact words in the same rows but it is not working code is attached with file plese help me.
https://drive.google.com/open?id=1EBFYfCG0y3UVNFNayqichdiW3Pf6wumU

Edin_Klapic · May 2018

Hi @asn4293,

This process produces the desired result. You may need to adapt it to your Attributes.

Happy Mining,

Edin

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="8.1.003" expanded="true" height="68" name="Read Excel" width="90" x="112" y="34">
        <parameter key="excel_file" value="C:\Users\EdinKlapic\Desktop\Duplicates test.xlsx"/>
        <parameter key="imported_cell_range" value="A1:A10"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="duplicates.true.polynominal.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="loop_examples" compatibility="8.1.003" expanded="true" height="103" name="Loop Examples" width="90" x="246" y="34">
        <process expanded="true">
          <operator activated="true" class="filter_example_range" compatibility="8.1.003" expanded="true" height="82" name="Filter Example Range" width="90" x="45" y="34">
            <parameter key="first_example" value="%{example}"/>
            <parameter key="last_example" value="%{example}"/>
          </operator>
          <operator activated="true" class="split" compatibility="8.1.003" expanded="true" height="82" name="Split" width="90" x="179" y="34">
            <parameter key="split_pattern" value="\|"/>
          </operator>
          <operator activated="true" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose" width="90" x="313" y="34"/>
          <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="id"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates" width="90" x="581" y="34"/>
          <operator activated="true" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate" width="90" x="715" y="34">
            <list key="aggregation_attributes">
              <parameter key="att_1" value="concatenation"/>
            </list>
            <parameter key="only_distinct" value="true"/>
          </operator>
          <connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="example set output" to_op="Split" to_port="example set input"/>
          <connect from_op="Split" from_port="example set output" to_op="Transpose" to_port="example set input"/>
          <connect from_op="Transpose" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/>
          <connect from_op="Remove Duplicates" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_port="output 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append" width="90" x="380" y="34"/>
      <connect from_op="Read Excel" from_port="output" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="output 1" to_op="Append" to_port="example set 1"/>
      <connect from_op="Append" from_port="merged set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

asn4293 · May 2018

@Edin_Klapic Thank you for reply ,

Can you please share file you used on this model or if possible please apply it on my file which is

Here https://drive.google.com/open?id=1EBFYfCG0y3UVNFNayqichdiW3Pf6wumU

asn4293 · May 2018

@Edin_Klapic Thank you I was able to figure it out but with ID it all mess things up can you please help. I tried to join ID later but was not able to do it.

Also, please have this link for this new file https://drive.google.com/open?id=1MBqi460DL79ssLGBzYalHSyIkwVWKbpL

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
        <parameter key="excel_file" value="C:\Users\shahida1\Desktop\MobilityConcat.xlsx"/>
        <parameter key="imported_cell_range" value="A1:B14065"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="User Sys ID.true.integer.id"/>
          <parameter key="1" value="Mobility Region.true.polynominal.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="112" y="238">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Mobility Region"/>
      </operator>
      <operator activated="true" class="loop_examples" compatibility="8.1.000" expanded="true" height="103" name="Loop Examples" width="90" x="246" y="34">
        <process expanded="true">
          <operator activated="true" class="filter_example_range" compatibility="8.1.000" expanded="true" height="82" name="Filter Example Range" width="90" x="45" y="34">
            <parameter key="first_example" value="%{example}"/>
            <parameter key="last_example" value="%{example}"/>
          </operator>
          <operator activated="true" class="split" compatibility="8.1.000" expanded="true" height="82" name="Split" width="90" x="179" y="34">
            <parameter key="split_pattern" value="\|"/>
          </operator>
          <operator activated="true" class="transpose" compatibility="8.1.000" expanded="true" height="82" name="Transpose" width="90" x="313" y="34"/>
          <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="id"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="remove_duplicates" compatibility="8.1.000" expanded="true" height="103" name="Remove Duplicates" width="90" x="581" y="34"/>
          <operator activated="true" class="aggregate" compatibility="8.1.000" expanded="true" height="82" name="Aggregate" width="90" x="715" y="34">
            <list key="aggregation_attributes">
              <parameter key="att_1" value="concatenation"/>
            </list>
            <parameter key="only_distinct" value="true"/>
          </operator>
          <connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="example set output" to_op="Split" to_port="example set input"/>
          <connect from_op="Split" from_port="example set output" to_op="Transpose" to_port="example set input"/>
          <connect from_op="Transpose" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/>
          <connect from_op="Remove Duplicates" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_port="output 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="append" compatibility="8.1.000" expanded="true" height="82" name="Append" width="90" x="380" y="34"/>
      <connect from_op="Read Excel" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="output 1" to_op="Append" to_port="example set 1"/>
      <connect from_op="Append" from_port="merged set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Thank you and have a nice day.

asn4293 · May 2018

@Edin_Klapic Thank you so very much for contributing and helping me around this data

Howdy, Stranger!

Quick Links

Categories

Altair RapidMiner Community

GET HELP. LEARN BEST PRACTICES. NETWORK WITH YOUR PEERS.

Concatenation duplication

Best Answer

Answers