The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Split data


in Help
Hi!
I have a CSV file that consists of an id, which is an unique movie, and the keywords for this movie. It looks something like this: 15602,"[{'id': 1495, 'name': 'fishing'}, {'id': 12392, 'name': 'best friend'}, {'id': 179431, 'name': 'duringcreditsstinger'}, {'id': 208510, 'name': 'old men'}]"
I want to split the data so every movie (the id) gets every keyword. But using read csv-file, it only gets me a column with the id and then one column with all the keywords, including keyword-id and 'name'. Is there any solution to only get the specific keyword?
I have a CSV file that consists of an id, which is an unique movie, and the keywords for this movie. It looks something like this: 15602,"[{'id': 1495, 'name': 'fishing'}, {'id': 12392, 'name': 'best friend'}, {'id': 179431, 'name': 'duringcreditsstinger'}, {'id': 208510, 'name': 'old men'}]"
I want to split the data so every movie (the id) gets every keyword. But using read csv-file, it only gets me a column with the id and then one column with all the keywords, including keyword-id and 'name'. Is there any solution to only get the specific keyword?
0
Best Answers
-
rjones13 Member Posts: 204
Unicorn
Hi,
Would the following process work for you? It's not the most elegant, but does the job. If you need it to automatically select the correct attributes, might need a little more fancy work.
Let me know how you get one.
Best,
Roland<?xml version="1.0" encoding="UTF-8"?><process version="10.2.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="10.2.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="UTF-8"/>
<process expanded="true">
<operator activated="true" class="utility:create_exampleset" compatibility="10.2.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="34">
<parameter key="generator_type" value="comma separated text"/>
<parameter key="number_of_examples" value="100"/>
<parameter key="use_stepsize" value="false"/>
<list key="function_descriptions"/>
<parameter key="add_id_attribute" value="false"/>
<list key="numeric_series_configuration"/>
<list key="date_series_configuration"/>
<list key="date_series_configuration (interval)"/>
<parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="input_csv_text" value="att1; att2 15602;[{'id': 1495, 'name': 'fishing'}, {'id': 12392, 'name': 'best friend'}, {'id': 179431, 'name': 'duringcreditsstinger'}, {'id': 208510, 'name': 'old men'}]"/>
<parameter key="column_separator" value=";"/>
<parameter key="parse_all_as_nominal" value="true"/>
<parameter key="decimal_point_character" value="."/>
<parameter key="trim_attribute_names" value="true"/>
</operator>
<operator activated="true" class="blending:set_role" compatibility="10.2.000" expanded="true" height="82" name="Set Role" width="90" x="179" y="34">
<list key="set_roles">
<parameter key="att1" value="id"/>
</list>
</operator>
<operator activated="true" class="replace" compatibility="10.2.000" expanded="true" height="82" name="Remove special characters" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="att2"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="[\[\]\{\}\s\']"/>
<parameter key="replace_by" value=""/>
</operator>
<operator activated="true" class="split" compatibility="10.2.000" expanded="true" height="82" name="Split" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="split_pattern" value=","/>
<parameter key="split_mode" value="ordered_split"/>
</operator>
<operator activated="true" class="blending:select_attributes" compatibility="10.2.000" expanded="true" height="82" name="Select Attributes" width="90" x="581" y="34">
<parameter key="type" value="include attributes"/>
<parameter key="attribute_filter_type" value="a subset"/>
<parameter key="select_attribute" value=""/>
<parameter key="select_subset" value="att2_2␞att2_4␞att2_6␞att2_8"/>
<parameter key="also_apply_to_special_attributes_(id,_label..)" value="false"/>
</operator>
<operator activated="true" class="replace" compatibility="10.2.000" expanded="true" height="82" name="Remove name:" width="90" x="715" y="34">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="name:"/>
</operator>
<connect from_op="Create ExampleSet" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Remove special characters" to_port="example set input"/>
<connect from_op="Remove special characters" from_port="example set output" to_op="Split" to_port="example set input"/>
<connect from_op="Split" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Remove name:" to_port="example set input"/>
<connect from_op="Remove name:" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0 -
CKönig Employee-RapidMiner, Member Posts: 70
RM Team Member
Hi @ostlundtheo,
your data structure in the CSV looks like JSON, so you would need to parse that first. Maybe even have a look at the process that is generating your csv file, if you can change that to make processing easier.0