The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Information Extraction plugin help in RapidMiner
Im trying to apply text mining to a text document with the help of Information Extraction plugin. I dont know which operators to use and how. Please help how do I proceed further?
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="text:read_document" compatibility="5.3.002" expanded="true" height="60" name="Read Document" width="90" x="45" y="30">
<parameter key="file" value="C:\Users\Shireen\Desktop\times manifesto.txt"/>
</operator>
<operator activated="true" class="text:process_documents" compatibility="5.3.002" expanded="true" height="94" name="Process Documents" width="90" x="179" y="75">
<process expanded="true">
<connect from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="information_extraction:sentence_tokenizer" compatibility="1.0.000" expanded="true" height="76" name="SentenceTokenizer" width="90" x="380" y="75">
<parameter key="optionalAttribute" value="and"/>
</operator>
<operator activated="true" class="information_extraction:text_annotator" compatibility="1.0.000" expanded="true" height="76" name="TextAnnotator" width="90" x="514" y="120">
<parameter key="repository-entry" value="and"/>
<parameter key="text-attribute" value="name"/>
<parameter key="label-attribute" value="no"/>
</operator>
<connect from_op="Read Document" from_port="output" to_op="Process Documents" to_port="documents 1"/>
<connect from_op="Process Documents" from_port="example set" to_op="SentenceTokenizer" to_port="example set input"/>
<connect from_op="SentenceTokenizer" from_port="example set output" to_op="TextAnnotator" to_port="example set input"/>
<connect from_op="TextAnnotator" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
0