The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Extracting the Tokenized Result Data
Hi,
First I read a txt-file and afterwards i would like to do some simple text processing steps and then i would like to export the newly stemmed and processed data to a txt data.
I got all the steps working but i just cant export results - I dont now if its clear but i would like to have a txt-file which contains the processed file as it is show in the Resulttable.
First I read a txt-file and afterwards i would like to do some simple text processing steps and then i would like to export the newly stemmed and processed data to a txt data.
I got all the steps working but i just cant export results - I dont now if its clear but i would like to have a txt-file which contains the processed file as it is show in the Resulttable.
<operator activated="true" class="text:read_document" compatibility="5.2.004" expanded="true" height="60" name="Read Document" width="90" x="45" y="120">
<parameter key="file" value="C:\mystring.txt"/>
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="txt"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<operator activated="true" class="text:tokenize" compatibility="5.2.004" expanded="true" height="60" name="Tokenize" width="90" x="196" y="138">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="5.2.004" expanded="true" height="60" name="Transform Cases" width="90" x="311" y="69">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_german" compatibility="5.2.004" expanded="true" height="60" name="Filter Stopwords (German)" width="90" x="313" y="210">
<parameter key="stop_word_list" value="Standard"/>
</operator>
<operator activated="true" class="text:stem_german" compatibility="5.2.004" expanded="true" height="60" name="Stem (German)" width="90" x="447" y="30"/>
<operator activated="true" class="text:filter_by_length" compatibility="5.2.004" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="447" y="210">
<parameter key="min_chars" value="2"/>
<parameter key="max_chars" value="25"/>
</operator>
<connect from_op="Read Document" from_port="output" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (German)" to_port="document"/>
<connect from_op="Filter Stopwords (German)" from_port="document" to_op="Stem (German)" to_port="document"/>
<connect from_op="Stem (German)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="108"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
0
Answers