#!/usr/bin/perl # Jussi Karlgren, april 2000 # Del av programpaketet PARA # http://www.sics.se/jussi/Verktyg/Para # omformar text till prologtermer mening för mening: # item(Löpnummer,Etikett,ProloglistaAvOrd). # använd växeln -l för att välja # etikett till prologtermen. %abbr = {"ABl.", "abl"}; require "getopts.pl"; &Getopts('l:'); $i = 0; print (":- multifile item/3.\n:- dynamic item/3.\n"); while(<>) { $opt_l = $ARGV unless $opt_l; tr/ÉČĀÂA-ZÅÄÖÜ/éčāâa-zåäöü/; s/([\)\(\]\[\/])/\ $1\ /g; # add some space s/n\'t/\ not\ /g; # n't -> not s/\'/\ \ /g; # ' -> space (for 'll or for l' d' etc) s/,/\ ,\ /g; # put some space around the comma s/\>/\ \>\ /g; # put some space around the bracket s/(\D)([\.;:!\?])(\D)/$1\ \.\ $3/g; # space around punctuation (sentence break) s/(\d)\.(\s)/$1$2/g; # take out period after digit (ordinal in many lges) @words = split /\s+/; # split line into words $pw = ""; $out = ""; foreach $word (@words) { next if $word eq '\\'; $out = $out."'".$pw."'" unless $pw eq ""; if ($out && $word eq ".") { print "item(",++$i,",'$opt_l',[",$out,"]).\n"; $pw = ""; $out = ""; } else { $out = $out.", " unless $pw eq ""; $pw = $word; }; }; $out = $out."'".$pw."'" unless $pw eq ""; print "item(",++$i,",'$opt_l',[",$out,"]).\n" if $out; };