% para.prolog % http://www.sics.se/jussi/Verktyg/ % Jussi Karlgren, maj 2001 % Parar två texter som är inlästa mening för mening i item(Löpnummer,Etikett,[L|StaAvOrd]). % Lämplig förprocessor är tex preprocess.perl som bör finnas i närheten. %==================================================================================== % deklarationer :- multifile item/3. :- dynamic item/3. %==================================================================================== % toppnivåpredikat align(Source,Target,_Alignment,I,J,DS,PS) :- clean, fix_find_points(I,J,Source,Target,DS,PS), assert(path_mem(I,J,0)), next_fixpoint(I,J,I0,J0,I1,J1,_DiffS,_DiffT,NullS,NullT,DS), assert(path_cache(I0,J0,path([],0,I0,J0,NullS-NullT))), explore_paths(I0,J0,I1,J1,Source,Target,DS,PS). clean :- retractall(fixpoint(_,_,_,_,_)), retractall(path_cache(_,_,_)), retractall(path_mem(_,_,_)), retractall(f_cache(_,_,_)). %==================================================================================== % Hjälppredikat % % Om filnamnen finns i filnamn/2 som nedan: % filnamn('franska.terms','engelska.terms'). % kan programmet köras med t/0 som sätter resultatet i filerna % franska.par och engelska.par. % filnamn('de.396D0645.txt.deabbr.presplit.terms', 'en_396D0645.html.fixed.merfixad.terms'). filnamn('de.399D0167.txt.deabbr.presplit.terms', 'en_399D0167.html.fixed.merfixad.terms'). filnamn('de.287A0720(02).txt.deabbr.presplit.terms', 'en_287A0720_02.html.fixed.merfixad.terms'). filnamn('de.297A0222(01).txt.deabbr.presplit.terms', 'en_297A0222_01.html.fixed.merfixad.terms'). filnamn('de.298A0319(01).txt.deabbr.presplit.terms', 'en_298A0319_01.html.fixed.merfixad.terms'). filnamn('de.298A0623(01).txt.deabbr.presplit.terms', 'en_298A0623_01.html.fixed.merfixad.terms'). filnamn('de.300D0400.txt.deabbr.presplit.terms', 'en_300D0400.html.fixed.merfixad.terms'). filnamn('de.300L0014.txt.deabbr.presplit.terms', 'en_300L0014.html.fixed.merfixad.terms'). filnamn('de.300L0032.txt.deabbr.presplit.terms', 'en_300L0032.html.fixed.merfixad.terms'). filnamn('de.300R2388.txt.deabbr.presplit.terms', 'en_300R2388.html.fixed.merfixad.terms'). filnamn('de.300R2848.txt.deabbr.presplit.terms', 'en_300R2848.html.fixed.merfixad.terms'). filnamn('de.300X0517.txt.deabbr.presplit.terms', 'en_300X0517.html.fixed.merfixad.terms'). filnamn('de.300X0776.txt.deabbr.presplit.terms', 'en_300X0776.html.fixed.merfixad.terms'). filnamn('de.300Y0330(01).txt.deabbr.presplit.terms', 'en_300Y0330_01.html.fixed.merfixad.terms'). filnamn('de.300Y0810(01).txt.deabbr.presplit.terms', 'en_300Y0810_01.html.fixed.merfixad.terms'). filnamn('de.301D0098.txt.deabbr.presplit.terms', 'en_301D0098.html.fixed.merfixad.terms'). filnamn('de.301D0264.txt.deabbr.presplit.terms', 'en_301D0264.html.fixed.merfixad.terms'). filnamn('de.301L0018.txt.deabbr.presplit.terms', 'en_301L0018.html.fixed.merfixad.terms'). filnamn('de.301Y0131(01).txt.deabbr.presplit.terms', 'en_301Y0131_01.html.fixed.merfixad.terms'). filnamn('de.372L0462.txt.deabbr.presplit.terms', 'en_372L0462.html.fixed.merfixad.terms'). filnamn('de.377L0388.txt.deabbr.presplit.terms', 'en_377L0388.html.fixed.merfixad.terms'). filnamn('de.382L0714.txt.deabbr.presplit.terms', 'en_382L0714.html.fixed.merfixad.terms'). filnamn('de.386S1566.txt.deabbr.presplit.terms', 'en_386S1566.html.fixed.merfixad.terms'). filnamn('de.391D0482.txt.deabbr.presplit.terms', 'en_391D0482.html.fixed.merfixad.terms'). filnamn('de.391L0497.txt.deabbr.presplit.terms', 'en_391L0497.html.fixed.merfixad.terms'). filnamn('de.392D0097.txt.deabbr.presplit.terms', 'en_392D0097.html.fixed.merfixad.terms'). filnamn('de.392L0018.txt.deabbr.presplit.terms', 'en_392L0018.html.fixed.merfixad.terms'). filnamn('de.392L0022.txt.deabbr.presplit.terms', 'en_392L0022.html.fixed.merfixad.terms'). filnamn('de.393L0038.txt.deabbr.presplit.terms', 'en_393L0038.html.fixed.merfixad.terms'). filnamn('de.393L0042.txt.deabbr.presplit.terms', 'en_393L0042.html.fixed.merfixad.terms'). filnamn('de.394L0058.txt.deabbr.presplit.terms', 'en_394L0058.html.fixed.merfixad.terms'). filnamn('de.396R2200.txt.deabbr.presplit.terms', 'en_396R2200.html.fixed.merfixad.terms'). filnamn('de.396R2406.txt.deabbr.presplit.terms', 'en_396R2406.html.fixed.merfixad.terms'). filnamn('de.397D0084.txt.deabbr.presplit.terms', 'en_397D0084.html.fixed.merfixad.terms'). filnamn('de.397D0469.txt.deabbr.presplit.terms', 'en_397D0469.html.fixed.merfixad.terms'). filnamn('de.397D0816.txt.deabbr.presplit.terms', 'en_397D0816.html.fixed.merfixad.terms'). filnamn('de.397L0023.txt.deabbr.presplit.terms', 'en_397L0023.html.fixed.merfixad.terms'). filnamn('de.397L0024.txt.deabbr.presplit.terms', 'en_397L0024.html.fixed.merfixad.terms'). filnamn('de.397Y0512(01).txt.deabbr.presplit.terms', 'en_397Y0512_01.html.fixed.merfixad.terms'). filnamn('de.398D0387.txt.deabbr.presplit.terms', 'en_398D0387.html.fixed.merfixad.terms'). filnamn('de.398D0531.txt.deabbr.presplit.terms', 'en_398D0531.html.fixed.merfixad.terms'). filnamn('de.398L0018.txt.deabbr.presplit.terms', 'en_398L0018.html.fixed.merfixad.terms'). filnamn('de.398L0037.txt.deabbr.presplit.terms', 'en_398L0037.html.fixed.merfixad.terms'). filnamn('de.398L0069.txt.deabbr.presplit.terms', 'en_398L0069.html.fixed.merfixad.terms'). filnamn('de.200A1215(01).txt.deabbr.presplit.terms', 'en_200A1215_01.html.fixed.merfixad.terms'). filnamn('de.398R1232.txt.deabbr.presplit.terms', 'en_398R1232.html.fixed.merfixad.terms'). filnamn('de.398Y0722(02).txt.deabbr.presplit.terms', 'en_398Y0722_02.html.fixed.merfixad.terms'). filnamn('de.398Y0731(01).txt.deabbr.presplit.terms', 'en_398Y0731_01.html.fixed.merfixad.terms'). filnamn('de.398Y1120(03).txt.deabbr.presplit.terms', 'en_398Y1120_03.html.fixed.merfixad.terms'). filnamn('de.398Y1207(01).txt.deabbr.presplit.terms', 'en_398Y1207_01.html.fixed.merfixad.terms'). filnamn('de.398Y1209(01).txt.deabbr.presplit.terms', 'en_398Y1209_01.html.fixed.merfixad.terms'). filnamn('de.399D0182.txt.deabbr.presplit.terms', 'en_399D0182.html.fixed.merfixad.terms'). filnamn('de.399D0287.txt.deabbr.presplit.terms', 'en_399D0287.html.fixed.merfixad.terms'). filnamn('de.399L0096.txt.deabbr.presplit.terms', 'en_399L0096.html.fixed.merfixad.terms'). filnamn('de.399R0800.txt.deabbr.presplit.terms', 'en_399R0800.html.fixed.merfixad.terms'). filnamn('de.399R1749.txt.deabbr.presplit.terms', 'en_399R1749.html.fixed.merfixad.terms'). filnamn('de.399R2204.txt.deabbr.presplit.terms', 'en_399R2204.html.fixed.merfixad.terms'). filnamn('de.399X0802(01).txt.deabbr.presplit.terms', 'en_399X0802_01.html.fixed.merfixad.terms'). filnamn('de.399Y0209(01).txt.deabbr.presplit.terms', 'en_399Y0209_01.html.fixed.merfixad.terms'). filnamn('de.499Y0130(06).txt.deabbr.presplit.terms', 'en_499Y0130_06.html.fixed.merfixad.terms'). filnamn('de.499Y0812(01).txt.deabbr.presplit.terms', 'en_499Y0812_01.html.fixed.merfixad.terms'). filnamn('de.500PC0276.txt.deabbr.presplit.terms', 'en_500PC0276.html.fixed.merfixad.terms'). filnamn('de.500PC0324(02).txt.deabbr.presplit.terms', 'en_500PC0324_02.html.fixed.merfixad.terms'). filnamn('de.500PC0438(02).txt.deabbr.presplit.terms', 'en_500PC0438_02.html.fixed.merfixad.terms'). filnamn('de.500PC0438(03).txt.deabbr.presplit.terms', 'en_500PC0438_03.html.fixed.merfixad.terms'). filnamn('de.500PC0461.txt.deabbr.presplit.terms', 'en_500PC0461.html.fixed.merfixad.terms'). filnamn('de.500PC0574.txt.deabbr.presplit.terms', 'en_500PC0574.html.fixed.merfixad.terms'). filnamn('de.500PC0716.txt.deabbr.presplit.terms', 'en_500PC0716.html.fixed.merfixad.terms'). filnamn('de.500PC0732.txt.deabbr.presplit.terms', 'en_500PC0732.html.fixed.merfixad.terms'). filnamn('de.500PC0736.txt.deabbr.presplit.terms', 'en_500PC0736.html.fixed.merfixad.terms'). filnamn('de.500PC0816.txt.deabbr.presplit.terms', 'en_500PC0816.html.fixed.merfixad.terms'). filnamn('de.500PC0847.txt.deabbr.presplit.terms', 'en_500PC0847.html.fixed.merfixad.terms'). filnamn('de.500PC0899.txt.deabbr.presplit.terms', 'en_500PC0899.html.fixed.merfixad.terms'). filnamn('de.297A0716(01).txt.deabbr.presplit.terms', 'en_297A0716_01.html.fixed.merfixad.terms'). filnamn('de.400A0922(02).txt.deabbr.presplit.terms', 'en_400A0922_02.html.fixed.merfixad.terms'). filnamn('de.498Y0716(03).txt.deabbr.presplit.terms', 'en_498Y0716_03.html.fixed.merfixad.terms'). filnamn('de.500PC0063(01).txt.deabbr.presplit.terms', 'en_500PC0063_01.html.fixed.merfixad.terms'). %-------------------------------------------- t :- findall(F-E,filnamn(F,E),L), t(de,en,L). %-------------------------------------------- t(_,_,[]). t(S,T,[F-E|As]) :- abolish(item/3), consult(F), consult(E), fixanamn(F,E,F1,E1), findall(N,item(N,S,_),Ls), sort(Ls,[I|_]), findall(O,item(O,T,_),Lt), sort(Lt,[J|_]), n(S,T,I,J,F1,E1), t(S,T,As). fixanamn(F,E,F1,E1) :- atom_concat(E0,'.terms',E), atom_concat(F0,'.terms',F), atom_concat(E0,'.fix.par',E1), atom_concat(F0,'.fix.par',F1). % ingen logg m(S,T,I,J,L,M) :- align(S,T,_K,I,J,no,L-M). % med logg n(S,T,I,J,L,M) :- atom_concat(L0,'.fix.par',L), atom_concat(L0,'.fix.log',L1), !, align(S,T,_K,I,J,L1,L-M). % med stderr o(S,T,I,J,L,M) :- !, align(S,T,_K,I,J,user_error,L-M). %==================================================================================== % io printl(_ParS,[]) :- !. printl(_ParS,null) :- !. printl(ParS,[A|As]) :- printitem(ParS,A),printl(ParS,As). printitem(_ParS,A) :- struntitem(A), !. printitem(ParS,A) :- print(ParS,A), print(ParS,' '). struntitem(','). % portray saknar möjlighet att bifoga strömvariabel. portrayjf(F1-F2,P) :- open(F1,append,ParStreamF), open(F2,append,ParStreamE), portrayj(ParStreamF-ParStreamE,P), close(ParStreamE), close(ParStreamF). portrayj(ParS,path(A,_B,_C,_D,_L)) :- !, reverse(A,Ar), portrayj(ParS,Ar). portrayj(ParS,fixpath(A,_B,_C,_D)) :- !, reverse(A,Ar), portrayj(ParS,Ar). portrayj(ParS1-ParS2,par((A,Apos),(B,Bpos),W0)) :- W is truncate(W0*100)/100, !, print(ParS1,''), printl(ParS1,A), nl(ParS1), flush_output(ParS1), print(ParS2,''), printl(ParS2,B), nl(ParS2), flush_output(ParS2). portrayj(_ParS1-_ParS2,fix((_APos0,_A,_Apos),null)) :- !. portrayj(_ParS1-_ParS2,fix(null,(_BPos0,_B,_Bpos))) :- !. portrayj(ParS1-ParS2,fix((_APos0,A,Apos),(_BPos0,B,Bpos))) :- !, print(ParS1,''), printl(ParS1,A), nl(ParS1), flush_output(ParS1), print(ParS2,''), printl(ParS2,B), nl(ParS2), flush_output(ParS2). portrayj(ParS1-ParS2,dyn((A,Apos),(B,Bpos))) :- !, print(ParS1,''), printl(ParS1,A), nl(ParS1), flush_output(ParS1), print(ParS2,''), printl(ParS2,B), nl(ParS2), flush_output(ParS2). portrayj(_ParS,[]) :- !. portrayj(ParS,[A|As]) :- !, portrayj(ParS,A), portrayj(ParS,As). portrayj(_ParS,null) :- !. portrayj(ParS,A) :- print(ParS,A). %-------------------------------------------- % om processen har kraschat vill man kunna få ut det som kommit fram ändå flushclose :- current_stream(_,_,K), close(K), fail. flushclose. %-------------------------------------------- debugoutput(no,_) :- !. debugoutput(L1,NewPaths) :- !, open(L1,append,S), write(S,NewPaths), nl(S), flush_output(S), close(S). %==================================================================================== % sök fixpunkter % find_fixpoints(+SPos,+TPos,+S,+T) % find_candidates(+Pos,+Tag,-ListOfCandidates) % find_candidate(+Start,+Pos,+Tag,-Candidate) % match_candidate(+Candidate,+Tag2,+TStart,+TPos0,-Pair) % record_fix(+SPos0,+TPos0,+Candidate,+Pair) fix_find_points(SPos,TPos,S,T,D,ParS) :- fix_find_candidates(SPos,S,SCandidates), fix_find_candidates(TPos,T,TCandidates), !, Initial = fixpath([],0,SCandidates,TCandidates), fix_explore_paths([Initial],FixPoints), portrayjf(ParS,FixPoints), fix_record(D,FixPoints). fix_find_candidates(SPos,S,[Cand|Cs]) :- fix_find_candidate(SPos,_SPos1,S,Cand), fix_find_candidates2(Cand,S,Cs). fix_find_candidates2((_,_,SPos2),S,Cs) :- \+ SPos2 == eof, !, SPos3 is SPos2 + 1, fix_find_candidates(SPos3,S,Cs). fix_find_candidates2((_N,_Token,eof),_,[]) :- !. fix_explore_paths([],[]). fix_explore_paths([Path|_],Path) :- fix_explored(Path), !. fix_explore_paths([Path|Ps],BestPath) :- fix_add_paths(Path,NewPaths), fix_insert_paths(NewPaths,Ps,NewListOfPaths), fix_explore_paths(NewListOfPaths,BestPath). fix_explored(fixpath(_Pairs,_W,[],[])). %==================================================================================== % S vid slut fix_add_paths(fixpath(STs,W,[],[T|Ts]),[fixpath([fix(null,T)|STs],W1,[],Ts)]) :- !, W1 is W + 1. % T vid slut fix_add_paths(fixpath(STs,W,[S|Ss],[]),[fixpath([fix(S,null)|STs],W1,Ss,[])]) :- !, W1 is W + 1. % mitt i fix_add_paths(fixpath(STs,W,[S|Ss],[T|Ts]),[fixpath([fix(S,T)|STs],W,Ss,Ts), fixpath([fix(S,null)|STs],W1,Ss,[T|Ts]), fixpath([fix(null,T)|STs],W1,[S|Ss],Ts)]) :- S = (_SPos0,SToken,_SPos), T = (_TPos0,TToken,_TPos), fix_pair(SToken,TToken), !, W1 is W + 1. % mitt i och S och T passar inte ihop! fix_add_paths(fixpath(STs,W,[S|Ss],[T|Ts]),[fixpath([fix(S,null)|STs],W1,Ss,[T|Ts]), fixpath([fix(null,T)|STs],W1,[S|Ss],Ts)]) :- W1 is W + 1. %======================================================================== fix_insert_paths([],P,P). fix_insert_paths([P|R],Paths,NewPaths) :- \+ fix_explored(P), !, fix_insert_path(P,Paths,Paths2), fix_insert_paths(R,Paths2,NewPaths). fix_insert_paths([P|_R],_Paths,[P]) :- fix_explored(P). fix_insert_path(NewPath,PreviousPaths,UpdatedPaths) :- NewPath = fixpath(_,W1,SL,TL), select(fixpath(_,W2,SL,TL),PreviousPaths,RinsedPaths), !, (W1 < W2 -> fix_insert_in_right_place(NewPath,RinsedPaths,UpdatedPaths), ! ; UpdatedPaths = PreviousPaths). fix_insert_path(NewPath,PrevPaths,UpdatedPaths) :- fix_insert_in_right_place(NewPath,PrevPaths,UpdatedPaths). %======================================================================== fix_insert_in_right_place(Path,[],[Path]). fix_insert_in_right_place(Path1,[Path2|Ps],[Path1,Path2|Ps]) :- Path1 = fixpath(_,P1,_,_), Path2 = fixpath(_,P2,_,_), P2 >= P1, !. fix_insert_in_right_place(Path1,[Path2|PrevPaths],[Path2|UpdatedPaths]) :- Path1 = fixpath(_,P1,_,_), Path2 = fixpath(_,P2,_,_), P2 < P1, fix_insert_in_right_place(Path1,PrevPaths,UpdatedPaths). %======================================================================== % find_candidate(+BörjaHär,+KandidatBörjarHär,+Tag,(-Kandidat,-KandidatSlutPos)) fix_find_candidate(SPos0,SPos0,S,(SPos0,SToken,SPos1)) :- read_token(S,SPos0,(SToken,SPos1)), fix_material(SToken). fix_find_candidate(N,N,S,(N,SToken,NN)) :- read_token(S,N,(SToken,NN)), NN = eof, !. fix_find_candidate(N,SPos0,S,Candidate) :- NN is N + 1, fix_find_candidate(NN,SPos0,S,Candidate). fix_material([K,L]) :- section_header(K), section_number(L). % FIX lexikon section_header(article). section_header(artikel). section_header(chapter). section_header(kapitel). section_header(chapitre). section_header(title). section_header(titre). section_header(titel). section_header(part). section_header(teil). section_header(partie). section_header(annex). section_header(anhang). section_header(annexe). section_header(section). section_header(sektion). section_header(abschnitt). section_header(document). section_header(dokument). section_number(premier). section_number(K) :- number(K), !. section_number(K) :- roman_number(K), !. %section_number(K) :- atom_length(K,1), char_code(K,B), name(KK,[B]), number(KK), !. section_number(K) :- name(K,Ks), all_numbers(Ks), !. all_numbers([]). all_numbers([A|As]) :- A > 47, A < 58, all_numbers(As). roman_number(i). roman_number(ii). roman_number(iii). roman_number(iv). roman_number(v). roman_number(vi). roman_number(vii). roman_number(viii). roman_number(ix). roman_number(x). roman_number(l). roman_number(c). roman_number(d). roman_number(m). % m+(d)(c)(m)c+(l)(x)(c)x+(i)(x)((i)v)(((i)i)i) fix_pair(null,null). fix_pair([L|Ls],[L|Lbs]) :- !, fix_pair(Ls,Lbs). fix_pair([L|Ls],[Lbis|Lbs]) :- translate(L,Lbis), ! , fix_pair(Ls,Lbs). fix_pair([],[]). fix_record(D,fixpath(Path,W,[],[])) :- length(Path,L), debugoutput(D,[L,fixpoint,alignment,with,edit,weight,W]), fix_record1(Path). fix_record1([]) :- !. fix_record1([A|As]) :- fix_one_record(A), fix_record1(As). fix_one_record(fix(_,null)) :- !. fix_one_record(fix(null,_)) :- !. fix_one_record(A) :- A = fix((SPos0,SToken,SPos1),(TPos0,TToken,TPos1)), assert(fixpoint(SPos0,TPos0,fix((SPos0,SToken,SPos1),(TPos0,TToken,TPos1)),SPos1,TPos1)). %==================================================================================== % huvudslinga explore_paths(SPos,TPos,SPos,TPos,S,T,DS,ParS) :- path_cache(SPos,TPos,Path), fixpoint(SPos,TPos,Fix,SPos0,TPos0), next_fixpoint(SPos0,TPos0,SPos00,TPos00,SPos1,TPos1,_DiffS,_DiffT,NullS,NullT,DS), !, Path = path(A,W,SPos,TPos,L), NyPath = path([Fix|A],W,SPos0,TPos0,L), NyStart = path([],0,SPos00,TPos00,NullS-NullT), retract(path_cache(SPos,TPos,Path)), rinse(NyPath), garbage_collect, assert(path_cache(SPos0,TPos0,NyStart)), portrayjf(ParS,NyPath), explore_paths(SPos00,TPos00,SPos1,TPos1,S,T,DS,ParS). explore_paths(eof,eof,_SPos1,_TPos1,_S,_T,_DS,_ParS). explore_paths(SPos,TPos,_SPos1,_TPos1,_S,_T,_DS,ParS) :- path_cache(SPos,TPos,Path), explored(Path), !, portrayjf(ParS,Path). explore_paths(SPos,TPos,SPos1,TPos1,S,T,DS,ParS) :- path_cache(SPos,TPos,Path), garbage_collect, \+ explored(Path), retract(path_cache(SPos,TPos,Path)), !, add_paths(Path,S,T,NewPaths), insert_paths(NewPaths,DS), pick_best_path(SPos0,TPos0), explore_paths(SPos0,TPos0,SPos1,TPos1,S,T,DS,ParS). %==================================================================================== bla. next_fixpoint(eof,eof,eof,eof,eof,eof,0,0,0,0,_DS) :- bla, !. next_fixpoint(SPos0,TPos0,SPos0,TPos0,SPos1,TPos1,DiffS,DiffT,NullS,NullT,DS) :- findall(S-T,(fixpoint(S,T,_F,_S1,_T1),S>SPos0),L), keysort(L,[SPos1-TPos1|_]), DiffS is SPos1 - SPos0, DiffT is TPos1 - TPos0, DiffS + DiffT =< 200, bla, !, (DiffS > DiffT -> (NullT is DiffS - DiffT + truncate(DiffT/50), NullS is truncate(DiffS/50) ) ; (NullS is DiffT - DiffS + truncate(DiffS/50), NullT is truncate(DiffT/50) ) ), debugoutput(DS,[new,segment,of,length,DiffS,and,DiffT,between, '(',SPos0,TPos0,')',and,'(',SPos1,TPos1,')']). next_fixpoint(SPos0,TPos0,SPos00,TPos00,SPos11,TPos11,DiffS,DiffT,NullS,NullT,DS) :- bla, findall(S-T,(fixpoint(S,T,_F,_S1,_T1),S>SPos0),L), keysort(L,[SPos1-TPos1|_]), DiffS1 is SPos1 - SPos0, DiffT1 is TPos1 - TPos0, DiffS1 + DiffT1 > 200, !, debugoutput(DS,['***',overlong,segment,of,length,DiffS1,'+',DiffT1,between, '(',SPos0,TPos0,')',and,'(',SPos1,TPos1,')','!!']), next_fixpoint(SPos1,TPos1,SPos00,TPos00,SPos11,TPos11,DiffS,DiffT,NullS,NullT,DS). %==================================================================================== pick_best_path(SPos,TPos) :- findall(P-(SPos0,TPos0),path_cache(SPos0,TPos0,path(_,P,_,_,_)),L), keysort(L,[_-(SPos,TPos)|_]). %==================================================================================== % De två utkommenterade raderna nedan % ( % SPos < SPos1, % resp % TPos < TPos1, %) % fungerar så att rinse/1 bara tar bort hypoteser % som korsar fixpunkten. Är de utkommenterade tar rinse/1 bort ALLA hypoteser med någondera % parhalvan längre bak än fixpunkten. Om fler hypoteser önskas: tag bort kommentartecknet. rinse(path(_A,_W,_SPos,TPos,_L)) :- \+ TPos = eof, path_cache(SPos1,TPos1,P), % SPos < SPos1, TPos1 < TPos, retract(path_cache(SPos1,TPos1,P)), fail. rinse(path(_A,_W,SPos,_TPos,_L)) :- \+ SPos = eof, path_cache(SPos1,TPos1,P), SPos1 < SPos, % TPos < TPos1, retract(path_cache(SPos1,TPos1,P)), fail. rinse(_). explored(path(_,_,eof,eof,_)). %==================================================================================== % Lägg till nya potentiella vägar att gå. Om någon av sidorna hunnit till filslut % eller till fixpunkt så går vi inte vidare med dem utan kör nollövergångar. % Annars går vi framåt på båda eller på en av sidorna. % % S vid filslut add_paths(path(A,W,eof,TPos,NullS-NullT),_S,T,[NewPathCandidate]) :- NullS > 0, !, NullS1 is NullS - 1, read_token(T,TPos,TToken), build_path((null,eof),TToken,path(A,W,eof,TPos,NullS1-NullT),_,NewPathCandidate). % T vid filslut add_paths(path(A,W,SPos,eof,NullS-NullT),S,_T,[NewPathCandidate]) :- NullT > 0, !, NullT1 is NullT - 1, read_token(S,SPos,SToken), build_path(SToken,(null,eof),path(A,W,SPos,eof,NullS-NullT1),_,NewPathCandidate). % Båda vid fixpunkt add_paths(path(_A,_W,SPos,TPos,_L),_S,_T,[]) :- fixpoint(SPos,TPos,_Fix,_SPos1,_TPos1), !. % S vid fixpunkt add_paths(path(A,W,SPos,TPos,NullS-NullT),_S,T,[NewPathCandidate]) :- fixpoint(SPos,_,_,_,_), NullS > 0, !, read_token(T,TPos,TToken), NullS1 is NullS-1, build_path((null,SPos),TToken,path(A,W,SPos,TPos,NullS1-NullT),_,NewPathCandidate). % T vid fixpunkt add_paths(path(A,W,SPos,TPos,NullS-NullT),S,_T,[NewPathCandidate]) :- fixpoint(_,TPos,_,_,_), NullT > 0, !, NullT1 is NullT - 1, read_token(S,SPos,SToken), build_path(SToken,(null,TPos),path(A,W,SPos,TPos,NullS-NullT1),_,NewPathCandidate). % vanligt ställe: generera nya sökvägar add_paths(path(A,Points,SPos0,TPos0,L),S,T,[NewPathCandidate|NewPaths]) :- \+ fixpoint(SPos0,_,_,_,_), \+ fixpoint(_,TPos0,_,_,_), \+ SPos0 == eof, \+ TPos0 == eof, !, read_token(S,SPos0,SToken), read_token(T,TPos0,TToken), build_path(SToken,TToken,path(A,Points,SPos0,TPos0,L),ThesePoints,NewPathCandidate), build_null_paths(ThesePoints,path(A,Points,SPos0,TPos0,L),SToken,TToken,NewPaths). add_paths(_,_,_,[]). %---------------------------- % om vikten är noll så ska inga nollövergångar ens testas build_null_paths(P,_,_,_,[]) :- P =:= 0, !. % alla nollövergångar förbrukade build_null_paths(_,path(_,_,_,_,NullS-NullT),_,_,[]) :- NullS =:= 0, NullT =:= 0, !. % endast T har nollor kvar build_null_paths(P,path(A1,P0,_SPos,TPos,NullS-NullT),(ST,SPos1),_,[path([par((ST,SPos1),(null,TPos),200)|A1],P1,SPos1,TPos,0-NullT1)]) :- P > 0, NullT > 0, NullS =:= 0, !, P1 is P0 + 200, NullT1 is NullT - 1. % endast S har nollor kvar build_null_paths(P,path(A1,P0,SPos,_TPos,NullS-NullT),_,(TT,TPos1),[path([par((null,SPos),(TT,TPos1),200)|A1],P1,SPos,TPos1,NullS1-0)]) :- P > 0, NullS > 0, NullT =:= 0, !, P1 is P0 + 200, NullS1 is NullS - 1. % både S och T har nollor kvar build_null_paths(ThesePoints,path(A1,P0,SPos,TPos,NullS-NullT),(ST,SPos1),(TT,TPos1),[path([par((null,SPos),(TT,TPos1),200)|A1],P1,SPos,TPos1,NullS1-NullT),path([par((ST,SPos1),(null,TPos),200)|A1],P1,SPos1,TPos,NullS-NullT1)]) :- ThesePoints > 0, NullS > 0, NullT > 0, !, P1 is P0 + 200, NullS1 is NullS - 1, NullT1 is NullT - 1. %---------------------------- build_path(SToken,TToken,Path,Points2,NewPath) :- Path = path(A1,Points,SPos0,TPos0,L), SToken = (_ST,SPos1), TToken = (_TT,TPos1), f(SToken,TToken,Points2), build_par(Points2,SToken,TToken,NyPar,SPos0,TPos0,SPos1,TPos1), NPoints is Points + Points2, NewPath = path([NyPar|A1],NPoints,SPos1,TPos1,L). %---------------------------- % den första satsen nedan är riskabel. den tillhandahåller dynamiskt allokerade % fixpunkter som ju förstås snabbar upp processen men riskerar träffa fel! %build_par(P,SToken,TToken,dyn(SToken,TToken),SPos0,TPos0,SPos1,TPos1) :- % P =:= 0, % !, % assert(fixpoint(SPos0,TPos0,dyn(SToken,TToken),SPos1,TPos1)). build_par(Points,SToken,TToken,par(SToken,TToken,Points),_SPos0,_TPos0,_SPos,_TPos). %---------------------------- % kolla att inte N är större än den största som finns - annars är det fel. read_token(Key,N,(A,NN)) :- var(N), !, item(NN,Key,A), N is NN - 1. read_token(Key,N,(A,NN)) :- nonvar(N), N \== eof, NN is N + 1, item(NN,Key,A), !. read_token(Key,N,(A,NN)) :- nonvar(N), N \== eof, findall(NN0-A0,(item(NN0,Key,A0),NN0 > N),LL), keysort(LL, [NN-A|_]), !. read_token(_Key,N,(null,eof)) :- nonvar(N), N \== eof. read_token(_,E,_) :- nonvar(E), E == eof, fail. %======================================================================== insert_paths([],_) :- rinse_path_cache(20). insert_paths([P|R],D) :- insert_path(P,D), insert_paths(R,D). insert_path(NewPath,D) :- NewPath = path(A,P1,SPos,TPos,L), path_mem(SPos,TPos,P2), !, (P1 < P2 -> retract(path_mem(SPos,TPos,P2)), retractall(path_cache(SPos,TPos,path(_,P2,_,_,_))), !, assert(path_mem(SPos,TPos,P1)), assert(path_cache(SPos,TPos,path(A,P1,SPos,TPos,L))) ; true ), rinse_short_paths(SPos,TPos,D). insert_path(path(A,P1,SPos,TPos,L),D) :- assert(path_mem(SPos,TPos,P1)), assert(path_cache(SPos,TPos,path(A,P1,SPos,TPos,L))), rinse_short_paths(SPos,TPos,D). % gamla hypoteser som inte kommer närmare än D ska inte jobbas med. rinse_short_paths(SPos,TPos,_D) :- findall(DD-S0-T0,( path_cache(_,_,path(A,P,S0,T0,L0)), SPos > S0, TPos > T0, DD is (SPos-S0)*(SPos-S0)+(TPos-T0)*(TPos-T0), DD > 40, retract(path_cache(S0,T0,path(A,P,S0,T0,L0)))), _L). % debugoutput(D,[rinse,L]). % inte fler än N alternativ ska finnas att jobba med. rinse_path_cache(N) :- findall(K-P,path_cache(_,_,path(P,K,_,_,_)),L), keysort(L,LL), length(LL,NN), reverse(LL,LLL), rinse_path_cache2(LLL,NN,N). rinse_path_cache2(_LL,NN,N) :- NN < N, !. rinse_path_cache2(_LL,NN,NN) :- !. rinse_path_cache2([K-P|LL],NN,N) :- N < NN, retract(path_cache(_,_,path(P,K,_,_,_))), NN1 is NN - 1, rinse_path_cache2(LL,NN1,N). %======================================================================== % En av de mest centrala delarna i programmet: skillnaden mellan munsbitar på % de två sidorna. % talen måste vara med decimalpunkt utskriven, annars klarar inte keysort av att sortera % rätt. f(A,B,N) :- f_cache(A,B,N), !. % Nollor är mycket olika allt annat så att de ska vara dyra att sätta in. f((A,_),(null,_),200.0) :- \+ A = null, !. f((null,_),(B,_),200.0) :- \+ B = null, !. % i sht är nollor olika varann! f((null,_),(null,_),300.0) :- !. % två identiska munsbitar är identiska, helt enkelt f((A,_),(A,_),0.0) :- \+ A = null, \+ number(A), !. % men två identiska nummer ger ofta falska fixpunkter f((A,_),(A,_),0.001) :- number(A), !. f((A,_),(A,_),0.001) :- char_code(A,B), number_codes(_,[B]), !. f(A,B,N) :- f2(A,B,N), assert(f_cache(A,B,N)). % två olika munsbitar är ganska likadana om de delar många element och är ungefär lika långa % L och I går från ett till noll; perfekt träff blir noll, totalmiss blir ett; viktas upp till % att sammanlagt bli etthundra. %f2((A,_Pa),(B,_Pb),N) :- % length_correlation(A,_AL,B,_BL,L), % N is 100 * L. f2((A,_Pa),(B,_Pb),N) :- length_correlation(A,AL,B,BL,L), item_overlap(A,AL,B,BL,I), N is 25 * L + 75 * I. % går från ett till noll. length_correlation(A,AL,B,BL,L) :- length(A,AL), length(B,BL), (AL > BL -> K is BL / AL ; K is AL / BL), L is 1 - K. % L is ((AL-BL)*(AL-BL))/((AL+BL)/2). % går från ett till noll. item_overlap(A,AL,B,BL,I) :- item_overlap2(A,B,IO), I is 1 - 2 * IO / (AL+BL). % om perfekt överlapp blir lika med längden; om totalt olika blir noll. item_overlap2([A|Abis],Bs,W+N) :- ( select(B,Bs,Bbis), item_identity(A,B,W), !, item_overlap2(Abis,Bbis,N)) ; ( W = 0, item_overlap2(Abis,Bs,N) ). item_overlap2([],_,0). % här finns det mycket att göra % merkel: proper names, tech terms useful anchor terms % här borde språkparen in item_identity(A,A,1) :- !. item_identity(A,B,1) :- translate(A,B), !. %======================================================================== % lexikon translate(protokoll, protocol). translate(colonne,column). translate(anhang,annex). translate(anhang,annex). translate(artikel,article). translate(titel,title). translate(kapitel,chapter). translate(teil,part). translate(abschnitt,section). translate('modifications','amendments'). translate('platz',position). translate('des','of'). translate('präambel',preamble). translate('eg','ec'). translate(gemeinschaft,community). translate(staaten,countries). translate(geltendes,force). translate(partie,part). translate(titel,title). translate(kapitel,chapter). translate(ziele,objectives). translate('grundsätze',principles). translate(und,and). translate(acteure,actors). translate(allgemeine,general). translate(schweden, sweden). translate(grande-bretagne, britain). %==================================================================================== % lists.pl from sicstus library member(Element, [Head|Tail]) :- member_(Tail, Head, Element). % auxiliary to avoid choicepoint for last element member_(_, Element, Element). member_([Head|Tail], _, Element) :- member_(Tail, Head, Element). % non_member(+Element, +List) % non_member is true when Element does not exist in List. non_member(Element, List) :- non_member_(List, Element). non_member_([], _). non_member_([Head|Tail], Element) :- dif(Head, Element), non_member_(Tail, Element). % select(?Element, ?List, ?List2) % is true when the result of removing an occurrence of Element in List % is List2. select(Element, [Element|Tail], Tail). select(Element, [Head|Tail1], [Head|Tail2]) :- select(Element, Tail1, Tail2). % reverse(?List, ?Reversed) % is true when Reversed is has the same element as List but in a reversed % order. List must be a proper list. reverse(List, Reversed) :- reverse(List, [], Reversed). reverse([], Reversed, Reversed). reverse([Head|Tail], SoFar, Reversed) :- reverse(Tail, [Head|SoFar], Reversed).