From 2dca315085d911c2612883625e1e06ec589fc623 Mon Sep 17 00:00:00 2001 From: Rouvier Mickael Date: Tue, 27 Jun 2017 14:11:15 +0200 Subject: [PATCH] Move README in README.md --- README | 106 -------------------------------------------------------------- README.md | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 106 deletions(-) delete mode 100644 README create mode 100644 README.md diff --git a/README b/README deleted file mode 100644 index b2450df..0000000 --- a/README +++ /dev/null @@ -1,106 +0,0 @@ -# DEFT 2017 - Sentiment Analysis - -- Authors: Mickael Rouvier and Pierre-Michel Bousquet -- Version: 1.0 -- Date: 26/06/17 - -These scripts provide the LIA system that I used for the DEFT 2017 - Sentiment Analysis. The LIA system is a multi-view ensemble of Convolutional Neural Networks (CNN). Four different word embeddins are used to initialize the input of CNN : lexical embedding, sentiment embedding (multi-task learning), sentiment embedding (distant learning) and sentiment embedding (negative sampling). The system is a fusion at the score level of the different CNNs variants. - - -You can reproduce my results or freely adapt my code for your experiments. - - -Warning, before to run the system execute the makefile: - -```shell -make -``` - -This executable split the training corpus (K-Fold) and tokenize the tweets: - -```shell -sh run_corpus.sh -``` - -This executable train the different word embeddings: -```shell -sh run_word2vec.sh -``` - -This executable learn the models: - -```shell -sh run_cnn.sh -``` - -This executable run the model on dev and test: - -```shell -sh run_extract_dev.sh -sh run_extract_test.sh -``` - -At this point you can score the CNNs: -```shell -ruby bin/scoring.rb data/task1_test.tokenize results_test/cnn_task1_0_distant_size100_123.txt -ruby bin/scoring.rb data/task2_test.tokenize results_test/cnn_task2_0_distant_size100_123.txt -ruby bin/scoring.rb data/task3_test.tokenize results_test/cnn_task3_0_distant_size100_123.txt -``` - - -This executable run the fusion system: -```shell -sh run_fusion.sh -``` - - -Finally, you can score the full-system: -```shell -ruby bin/scoring.rb data/task1_test.tokenize output/equipe-8_tache1_run3.csv -ruby bin/scoring.rb data/task2_test.tokenize output/equipe-8_tache2_run1.csv -ruby bin/scoring.rb data/task3_test.tokenize output/equipe-8_tache3_run3.csv -``` - - - -# Results - - -## Baseline - -We reproduce the sentiment analysis system of Kim (based on Word embeddings and CNN): - - -| Corpus | Baseline | -| ------------ |:-------------:| -| Task1 | 59.55 | -| Task2 | 77.18 | -| Task3 | 57.59 | - - - - -## DEFT 2017 - -These results are those SENSEI-LIF system presented in SemEval 2016 Sentiment Analysis: - -| Corpus | Task1 | Task2 | Task3 | -| ----------- |:-------------:|:-------------:|:-------------:| -| Run1 | 60.23 | 78.31 | 57.83 | -| Run2 | 63.44 | 77.39 | 58.49 | -| Run3 | 65.00 | 77.43 | 59.39 | - - -# Citing - -The system is described in this paper: - - @inproceedings{rouvier2017, - author = {Mickael Rouvier and Pierre-Michel Bousquet}, - title = {LIA @ DEFT’2017 : Multi-view Ensemble of Convolutional Neural Network}, - booktitle = {DEFT 2107}, - year = {2017}, - address = {Orleans, France} - } - - diff --git a/README.md b/README.md new file mode 100644 index 0000000..b2450df --- /dev/null +++ b/README.md @@ -0,0 +1,106 @@ +# DEFT 2017 - Sentiment Analysis + +- Authors: Mickael Rouvier and Pierre-Michel Bousquet +- Version: 1.0 +- Date: 26/06/17 + +These scripts provide the LIA system that I used for the DEFT 2017 - Sentiment Analysis. The LIA system is a multi-view ensemble of Convolutional Neural Networks (CNN). Four different word embeddins are used to initialize the input of CNN : lexical embedding, sentiment embedding (multi-task learning), sentiment embedding (distant learning) and sentiment embedding (negative sampling). The system is a fusion at the score level of the different CNNs variants. + + +You can reproduce my results or freely adapt my code for your experiments. + + +Warning, before to run the system execute the makefile: + +```shell +make +``` + +This executable split the training corpus (K-Fold) and tokenize the tweets: + +```shell +sh run_corpus.sh +``` + +This executable train the different word embeddings: +```shell +sh run_word2vec.sh +``` + +This executable learn the models: + +```shell +sh run_cnn.sh +``` + +This executable run the model on dev and test: + +```shell +sh run_extract_dev.sh +sh run_extract_test.sh +``` + +At this point you can score the CNNs: +```shell +ruby bin/scoring.rb data/task1_test.tokenize results_test/cnn_task1_0_distant_size100_123.txt +ruby bin/scoring.rb data/task2_test.tokenize results_test/cnn_task2_0_distant_size100_123.txt +ruby bin/scoring.rb data/task3_test.tokenize results_test/cnn_task3_0_distant_size100_123.txt +``` + + +This executable run the fusion system: +```shell +sh run_fusion.sh +``` + + +Finally, you can score the full-system: +```shell +ruby bin/scoring.rb data/task1_test.tokenize output/equipe-8_tache1_run3.csv +ruby bin/scoring.rb data/task2_test.tokenize output/equipe-8_tache2_run1.csv +ruby bin/scoring.rb data/task3_test.tokenize output/equipe-8_tache3_run3.csv +``` + + + +# Results + + +## Baseline + +We reproduce the sentiment analysis system of Kim (based on Word embeddings and CNN): + + +| Corpus | Baseline | +| ------------ |:-------------:| +| Task1 | 59.55 | +| Task2 | 77.18 | +| Task3 | 57.59 | + + + + +## DEFT 2017 + +These results are those SENSEI-LIF system presented in SemEval 2016 Sentiment Analysis: + +| Corpus | Task1 | Task2 | Task3 | +| ----------- |:-------------:|:-------------:|:-------------:| +| Run1 | 60.23 | 78.31 | 57.83 | +| Run2 | 63.44 | 77.39 | 58.49 | +| Run3 | 65.00 | 77.43 | 59.39 | + + +# Citing + +The system is described in this paper: + + @inproceedings{rouvier2017, + author = {Mickael Rouvier and Pierre-Michel Bousquet}, + title = {LIA @ DEFT’2017 : Multi-view Ensemble of Convolutional Neural Network}, + booktitle = {DEFT 2107}, + year = {2017}, + address = {Orleans, France} + } + + -- 1.8.2.3