-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimdb_crawler.py
More file actions
59 lines (54 loc) · 1.65 KB
/
imdb_crawler.py
File metadata and controls
59 lines (54 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import requests
from bs4 import BeautifulSoup
import json
from score import *
from label import *
title = ""
dic = {'title': 'value'}
def movie_review(moviename):
str(moviename)
mname = moviename.replace(" ", "+")
url = 'https://www.bing.com/search?q=' + str(mname) + '+imdb'
source_code = requests.get(url)
plain_text = source_code.text
soup1 = BeautifulSoup(plain_text, "lxml")
i = 0
for link in soup1.findAll("cite"):
if i > 0:
break
else:
title = link.get_text()
i += 1
return title
def getuser_review(mr):
global review
url1 = movie_review(mr)
mr = mr.replace(" ", "+")
url3 = 'http://www.omdbapi.com/?t=' + mr
url2 = 'http://{0}'.format(str(url1))
source_code = requests.get(url2)
source_code1 = requests.get(url3)
plain_text = source_code.text
plain_text1 = source_code1.text
test = json.loads(plain_text1)
dic.update({'Title': test['Title']})
dic.update({'Runtime': test['Runtime']})
dic.update({'Actors': test['Actors']})
dic.update({'Genre': test['Genre']})
dic.update({'Plot': test['Plot']})
dic.update({'Metascore': test['Metascore']})
dic.update({'imdbRating': test['imdbRating']})
soup2 = BeautifulSoup(plain_text, "lxml")
i = 0
for link in soup2.findAll('p', {'itemprop': 'reviewBody'}):
if i > 0:
break
else:
review = link.get_text()
i += 1
dic.update({'Review': review})
senscore = get_score(review)
dic.update({'SentimentScore' : float(senscore)})
nature = get_label(review)
dic.update({'Nature' : nature})
return dic