Commit b55f47b3852621a367b08b37a5da99fe0b489ea4
1 parent
e267264ee3
Exists in
master
resolving encoding problems
Showing 1 changed file with 5 additions and 5 deletions Side-by-side Diff
lib/mirimiri/document.rb
... | ... | @@ -122,7 +122,7 @@ |
122 | 122 | def initialize(url,only_tags=nil) |
123 | 123 | @url = url |
124 | 124 | content = only_tags.nil? ? WebDocument.get_content(url) : WebDocument.get_content(url).extract_xmltags_values(only_tags).join("") |
125 | - super content.strip_javascripts.strip_stylesheets.strip_xml_tags | |
125 | + super content.strip_javascripts.strip_xml_tags | |
126 | 126 | end |
127 | 127 | end |
128 | 128 | |
129 | 129 | |
130 | 130 | |
... | ... | @@ -136,15 +136,15 @@ |
136 | 136 | def self.search_wikipedia_titles(name) |
137 | 137 | raise ArgumentError, "Bad encoding", name unless name.isutf8 |
138 | 138 | |
139 | - res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml" ).toutf8).elements['api/query/search'] | |
139 | + res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml" ).unaccent.toutf8).elements['api/query/search'] | |
140 | 140 | |
141 | - res.collect { |e| e.attributes['title'] } unless res.nil? | |
141 | + res.collect { |e| e.attributes['title'] } unless res.nil? | |
142 | 142 | end |
143 | 143 | |
144 | 144 | def self.get_url(name) |
145 | 145 | raise ArgumentError, "Bad encoding", name unless name.isutf8 |
146 | 146 | |
147 | - atts = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml" ).toutf8).elements['api/query/pages/page'].attributes | |
147 | + atts = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml" ).unaccent.toutf8).elements['api/query/pages/page'].attributes | |
148 | 148 | |
149 | 149 | atts['fullurl'] if atts['missing'].nil? |
150 | 150 | end |
... | ... | @@ -152,7 +152,7 @@ |
152 | 152 | def self.search_homepage(name) |
153 | 153 | title = WikipediaPage.search_wikipedia_titles name |
154 | 154 | |
155 | - WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty? | |
155 | + WikipediaPage.get_url(title[0]) unless title.nil? || title.empty? | |
156 | 156 | end |
157 | 157 | |
158 | 158 | end |