Commit b3cdd2e747561ae256baa05dcbb7d4c24d755dc6
1 parent
8b1fb953fc
Exists in
master
and in
1 other branch
Ajout de Orkis proc et utilisation pour tagger
Showing 3 changed files with 43 additions and 14 deletions Inline Diff
processor/Orkis.py
File was created | 1 | from BaseProcessor import baseProcessor | |
2 | import nltk | ||
3 | from LiaTools import * | ||
4 | class Orkis(baseProcessor): | ||
5 | """ Processor for Orkis """ | ||
6 | def __init__(self,dirtyString): | ||
7 | self.tagger=Tagger() | ||
8 | self.phoner=Phoner() | ||
9 | self.dico ={} | ||
10 | self.string=dirtyString | ||
11 | def isReady(self): | ||
12 | self.phoner.isReady() | ||
13 | self.tagger.isReady() | ||
14 | def clean(self): | ||
15 | stopword=StopWord() | ||
16 | self.string=stopword.RemoveStopList(self.string) | ||
17 | def insertLem(self): | ||
18 | self.cleanString=self.tagger.clean(self.string) | ||
19 | taggedString=self.tagger.tagg(self.cleanString) | ||
20 | self.tableLem = taggedString.rstrip().split("\n") | ||
21 | for line in taggedString.rstrip().split("\n"): | ||
22 | table = line.rstrip().split(" ") | ||
23 | if not table[0] in self.dico : | ||
24 | self.dico[table[0]]=[set(),set()] | ||
25 | self.dico[table[0]][0].add(table[2]) | ||
26 | def insertPhon(self): | ||
27 | phonedString=self.phoner.phon(self.cleanString) | ||
28 | self.tablephon= phonedString.rstrip().split("\n") | ||
29 | for line in phonedString.rstrip().split("\n"): | ||
30 | table = line.rstrip().split(" ") | ||
31 | if table[0] in self.dico: | ||
32 | self.dico[table[0]][1].add(table[1]) | ||
33 | def getDico(self): | ||
34 | self.clean() | ||
35 | self.insertLem() | ||
36 | self.insertPhon() | ||
37 | return self.dico | ||
38 |
static/js/application.js
1 | // Some general UI pack related JS | 1 | // Some general UI pack related JS |
2 | $(document).ready(function() { | 2 | $(document).ready(function() { |
3 | tagging(); | 3 | tagging(); |
4 | }); | 4 | }); |
5 | 5 | ||
6 | 6 | ||
7 | 7 | ||
8 | function tagging(){ | 8 | function tagging(){ |
9 | $('#go').click(function(){ | 9 | $('#go').click(function(){ |
10 | data={"string" :$('#data').val()}; | 10 | data={"string" :$('#data').val()}; |
11 | console.log(data); | 11 | console.log(data); |
12 | $.ajax({ | 12 | $.ajax({ |
13 | type: "POST", | 13 | type: "POST", |
14 | url: "tagger", | 14 | url: "tagger", |
15 | data: data, | 15 | data: data, |
16 | success: function(data){ | 16 | success: function(data){ |
17 | result=$('#result'); | 17 | result=$('#result'); |
18 | result.append(_.escape(data)); | 18 | result.html(_.escape(data)); |
19 | console.log(_.escape(data)); | 19 | console.log(_.escape(data)); |
20 | console.log("resultat"); | 20 | console.log("resultat"); |
21 | }, | 21 | }, |
22 | error: function(){ | 22 | error: function(){ |
23 | alert("error"); | 23 | alert("error"); |
24 | }, | 24 | }, |
25 | dataType: "text" | 25 | dataType: "text" |
26 | }); | 26 | }); |
27 | }); | 27 | }); |
28 | } | 28 | } |
29 | 29 |
webtagger.py
1 | # -*- coding: utf-8 -*- | 1 | # -*- coding: utf-8 -*- |
2 | import subprocess | 2 | import subprocess |
3 | import os | 3 | import os |
4 | import json | 4 | import json |
5 | from flask import Flask, request, render_template | 5 | from flask import Flask, request, render_template |
6 | from processor.LiaTools import * | 6 | from processor.LiaTools import * |
7 | from processor.Orkis import Orkis | ||
7 | app = Flask(__name__) | 8 | app = Flask(__name__) |
8 | 9 | ||
9 | @app.route("/") | 10 | @app.route("/") |
10 | def docs(): | 11 | def docs(): |
11 | return render_template('index.html') | 12 | return render_template('index.html') |
12 | 13 | ||
13 | @app.route("/tagger",methods=['POST']) | 14 | @app.route("/tagger",methods=['POST']) |
14 | def cleaner(): | 15 | def cleaner(): |
15 | # Charging Processor et check if they are okay ( aim is to dynamic charge later ) | ||
16 | tagger = Tagger() | ||
17 | tagger.isReady() | ||
18 | phoner = Phoner() | ||
19 | phoner.isReady() | ||
20 | stoplist = StopWord() | ||
21 | stoplist.isReady() | ||
22 | # Receive String from post parametre Raw text | 16 | # Receive String from post parametre Raw text |
23 | dirtyString= request.values[u'string'] | 17 | dirtyString= request.values[u'string'] |
18 | # Charging Processor et check if they are okay ( aim is to dynamic charge later ) | ||
19 | orkisProc = Orkis(dirtyString) | ||
24 | # Processing | 20 | # Processing |
25 | dirtyString = stoplist.RemoveStopList(dirtyString) | ||
26 | lemm = tagger.lemm(tagger.clean(dirtyString)) | ||
27 | # Adding lemm of each words cause we went ther phonem too | 21 | # Adding lemm of each words cause we went ther phonem too |
28 | dirtyString = dirtyString+" "+ lemm | 22 | taggedTable= orkisProc.getDico() |
29 | cleanString= phoner.clean(dirtyString) | ||
30 | taggedString= phoner.phon(cleanString) | ||
31 | # Returning a row text to be parse client side | 23 | # Returning a row text to be parse client side |
32 | return taggedString | 24 | return unicode(taggedTable) |
33 | if __name__ == '__main__': | 25 | if __name__ == '__main__': |