The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here

How to split a sentence by conjunction words in sentence?

HeikoeWin786HeikoeWin786 Member Posts: 64 Contributor I
Hi,

I want to split the sentence using split operator. But, not with regular expression.
I want the sentence to be split by conjunction words e.g. and, but, however, yet
E.g. The staff is bad, and the room is not good, but the food is great.
This sentence should split as
The staff is bad
the room is not good
the food is great

Can anyone suggest me how I can achieve this in rapidminer?

thanks a lot

Answers

  • Robi_MeRobi_Me Member Posts: 32 Maven
    edited January 2021
    <?xml version="1.0" encoding="UTF-8"?><process version="9.7.002">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.7.002" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="289">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="conjunction,replace&#10;also,§also&#10;besides,besides&#10;furthermore,§furthermore&#10;likewise,§likewise&#10;moreover,§moreover&#10;however,§however&#10;nevertheless,§nevertheless&#10;nonetheless,§nonetheless&#10;still,§still&#10;conversely,§conversely&#10;instead,§instead&#10;otherwise,§instead&#10;rather,§instead&#10;accordingly,§instead&#10;consequently,§instead&#10;hence,§instead&#10;meanwhile,§instead&#10;then,§instead&#10;therefore,§instead&#10;thus,§instead&#10;and,§and&#10;but,§but"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet (2)" width="90" x="179" y="136">
            <parameter key="generator_type" value="attribute functions"/>
            <parameter key="number_of_examples" value="1"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions">
              <parameter key="text" value="&quot;The staff is bad, and the room is not good, but the food is great.&quot;"/>
            </list>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="text&#10;The staff is bad, and the room is not good, but the food is great."/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="replace_dictionary" compatibility="9.7.002" expanded="true" height="103" name="Replace (Dictionary)" width="90" x="380" y="187">
            <parameter key="return_preprocessing_model" value="false"/>
            <parameter key="create_view" value="false"/>
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="true"/>
            <parameter key="from_attribute" value="conjunction"/>
            <parameter key="to_attribute" value="replace"/>
            <parameter key="use_regular_expressions" value="false"/>
            <parameter key="convert_to_lowercase" value="false"/>
            <parameter key="first_match_only" value="false"/>
          </operator>
          <operator activated="true" class="split" compatibility="9.7.002" expanded="true" height="82" name="Split" width="90" x="514" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="split_pattern" value="§"/>
            <parameter key="split_mode" value="ordered_split"/>
          </operator>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Replace (Dictionary)" to_port="dictionary"/>
          <connect from_op="Create ExampleSet (2)" from_port="output" to_op="Replace (Dictionary)" to_port="example set input"/>
          <connect from_op="Replace (Dictionary)" from_port="example set output" to_op="Split" to_port="example set input"/>
          <connect from_op="Split" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    


Sign In or Register to comment.