Browse Source

remove duplicates

master
Sam 7 years ago
parent
commit
5074fad2cc
4 changed files with 3011 additions and 14943 deletions
  1. +0
    -609
      ice.csv
  2. +1694
    -7175
      ice.json
  3. +1314
    -7158
      index.html
  4. +3
    -1
      linked_in_scraper.py

+ 0
- 609
ice.csv
File diff suppressed because it is too large
View File


+ 1694
- 7175
ice.json
File diff suppressed because it is too large
View File


+ 1314
- 7158
index.html
File diff suppressed because it is too large
View File


+ 3
- 1
linked_in_scraper.py View File

@ -152,6 +152,7 @@ def clean_and_parse(datafile, outname):
'''Outputs csv, json and html from employee listings'''
out = []
mids = []
with open(datafile, 'r') as infile:
data = json.load(infile)
@ -173,8 +174,9 @@ def clean_and_parse(datafile, outname):
'linkedin': 'https://linkedin.com/in/' + pid,
}
if mid not in out:
if mid not in mids:
out.append(item)
mids.append(mid)
with open(outname + '.json', 'w') as jsonfile:
json.dump(out, jsonfile, indent=2)


Loading…
Cancel
Save