Jean-François Rey / otmedia

Blame view

tools/scripts/ProcessSOLRQueries.py 2.35 KB

e6be5137b Jean-François Rey reinitialized pro...	1 2 3 4 5 6 7 8 9 10 11 12	#!/usr/bin/python # -- coding: utf-8 -- from urllib2 import * import json from pprint import pprint import sys from solrinfo import * def obtainSOLRInfos(webName, query): #print 'http://194.57.216.43:8080/' + webName + '/select?q='+ query + '&wt=json' #conn = urlopen('http://194.57.216.43:8080/' + webName + '/select?q='+ query + '&wt=json')
a564ec1e5 Jean-François Rey bugfix	13 14	print 'http://'+machine+':'+port+'/' + webName + '/select?q='+ query + '&wt=json' + ' timeout = 30 ' conn = urlopen('http://'+machine+':'+port+'/' + webName + '/select?q='+ query + '&wt=json', timeout = 30)
e6be5137b Jean-François Rey reinitialized pro...	15	rsp = json.load(conn)
a564ec1e5 Jean-François Rey bugfix	16	conn.close()
e6be5137b Jean-François Rey reinitialized pro...	17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67	keywords = "" txts = "" for doc in rsp['response']['docs']: jsonDoc = json.loads(doc['jsonDocument'].encode("utf-8")) if 'extractedData' in jsonDoc: if 'entityTags' in jsonDoc['extractedData']: for keyword in jsonDoc['extractedData']['entityTags']: for item in jsonDoc['extractedData']['entityTags'][keyword]: for access in item: keywords += item[access]['value'].encode("utf-8") + " " if 'content' in jsonDoc: if 'body' in jsonDoc['content']: if 'textBlock' in jsonDoc['content']['body']['textBlocks']: for txt in jsonDoc['content']['body']['textBlocks']['textBlock']: if 'parag' in txt: for parag in txt['parag']: for textOrMedia in parag['textOrMultimediaRef']: if 'text' in textOrMedia: txts += textOrMedia['text']['value'].encode("utf-8") if 'text' in jsonDoc['content']: for txt in jsonDoc['content']['text']: txts += txt['value'].encode("utf-8") return keywords, txts if len(sys.argv) != 4: print "BAD USAGE: <(i) input queries> <(o) keywords out file> <(o) text out file> " sys.exit(-1) fileName = sys.argv[1] keywordsFile = sys.argv[2] txtsFile = sys.argv[3] inFile = open(fileName, "r") outKeywords = open(keywordsFile, "w") outTxts = open(txtsFile, "w") lines = inFile.readlines() for query in lines: query = query.rstrip() # print query keywords, txt = obtainSOLRInfos('solr-otmedia-document', query) outKeywords.write(keywords) outTxts.write(txt) keywords, txt = obtainSOLRInfos('solr-otmedia-multimedia', query) outKeywords.write(keywords) outTxts.write(txt) outKeywords.close() outTxts.close()
a564ec1e5 Jean-François Rey bugfix	68	inFile.close()