From 73c243d83ddfd5e3a8c818bd224dbc3fbbe7d9e9 Mon Sep 17 00:00:00 2001 From: Greg Gauthier Date: Wed, 21 Oct 2020 21:01:26 +0100 Subject: [PATCH] initial commit --- .gitignore | 3 +++ email_pruner.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 .gitignore create mode 100644 email_pruner.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..864ac8e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea/ +*.iml +*.pyc diff --git a/email_pruner.py b/email_pruner.py new file mode 100644 index 0000000..6c3b519 --- /dev/null +++ b/email_pruner.py @@ -0,0 +1,62 @@ +from string import ascii_letters +from secrets import choice +from timeit import default_timer as timer +from datetime import timedelta + + +def reset_stopwatch(): + return timer() + + +def get_elapsed(starttime): + end = timer() + return timedelta(seconds=end-starttime) + + +def randstring(strlen=64): + return ''.join(choice(ascii_letters) for _ in range(strlen)) + + +def spawn(listlen=100): + base_list = [randstring(10)+"."+randstring(10)+"@"+randstring(15)+".com" for _ in range(listlen)] + dup_list = [choice(base_list) for _ in range(len(base_list))] + final_list = [] + for i in range(listlen): + final_list.append(base_list[i]) + final_list.append(dup_list[i]) + return final_list + + +def dups(biglist): + seen = set() + uneek = [] + for x in biglist: + if x not in seen: + uneek.append(x) + seen.add(x) + return seen + + +if __name__ == "__main__": + start = reset_stopwatch() + list_with_dups = spawn(50000) + print(f"GENERATED COMPLETE LIST WITH DUPLICATES: (count = {len(list_with_dups)})") + # [print(i) for i in list_with_dups] + t1 = get_elapsed(start) + print("Elapsed Time: ", t1) + + start = reset_stopwatch() + dup_list = dups(list_with_dups) + print(f"IDENTIFIED DUPLICATES IN COMPLETE LIST: (count = {len(dup_list)})") + # [print(i) for i in dup_list] + t2 = get_elapsed(start) + print("Elapsed time: ", t2) + + start = reset_stopwatch() + list_with_dups = list(dict.fromkeys(list_with_dups)) + print(f"GENERATED PRUNED LIST WITHOUT DUPLICATES: (count = {len(list_with_dups)})") + # [print(i) for i in list_with_dups] + t3 = get_elapsed(start) + print("Elapsed Time: ", t3) + print(f"TOTAL ELAPSED TIME: {t1+t2+t3}") + print(f"ELAPSED TIME WITHOUT GENERATOR: {t2+t3}")