Commit b55f47b3852621a367b08b37a5da99fe0b489ea4

Authored by Romain Deveaud
1 parent e267264ee3
Exists in master

resolving encoding problems

Showing 1 changed file with 5 additions and 5 deletions Side-by-side Diff

lib/mirimiri/document.rb
... ... @@ -122,7 +122,7 @@
122 122 def initialize(url,only_tags=nil)
123 123 @url = url
124 124 content = only_tags.nil? ? WebDocument.get_content(url) : WebDocument.get_content(url).extract_xmltags_values(only_tags).join("")
125   - super content.strip_javascripts.strip_stylesheets.strip_xml_tags
  125 + super content.strip_javascripts.strip_xml_tags
126 126 end
127 127 end
128 128  
129 129  
130 130  
... ... @@ -136,15 +136,15 @@
136 136 def self.search_wikipedia_titles(name)
137 137 raise ArgumentError, "Bad encoding", name unless name.isutf8
138 138  
139   - res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml" ).toutf8).elements['api/query/search']
  139 + res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml" ).unaccent.toutf8).elements['api/query/search']
140 140  
141   - res.collect { |e| e.attributes['title'] } unless res.nil?
  141 + res.collect { |e| e.attributes['title'] } unless res.nil?
142 142 end
143 143  
144 144 def self.get_url(name)
145 145 raise ArgumentError, "Bad encoding", name unless name.isutf8
146 146  
147   - atts = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml" ).toutf8).elements['api/query/pages/page'].attributes
  147 + atts = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml" ).unaccent.toutf8).elements['api/query/pages/page'].attributes
148 148  
149 149 atts['fullurl'] if atts['missing'].nil?
150 150 end
... ... @@ -152,7 +152,7 @@
152 152 def self.search_homepage(name)
153 153 title = WikipediaPage.search_wikipedia_titles name
154 154  
155   - WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty?
  155 + WikipediaPage.get_url(title[0]) unless title.nil? || title.empty?
156 156 end
157 157  
158 158 end