-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathGitHub_Repos_Scraping.py
More file actions
79 lines (64 loc) · 2.45 KB
/
GitHub_Repos_Scraping.py
File metadata and controls
79 lines (64 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import requests
import os
# Configuration
GITHUB_USERNAME = 'USERNAME_HERE' # Insert here your Github username
GITHUB_TOKEN = 'TOKEN_HERE' # Insert here your github token
BASE_API_URL = 'https://api.github.com'
def get_repositories():
"""Return the repos list of a user."""
url = f'{BASE_API_URL}/users/{GITHUB_USERNAME}/repos'
headers = {
'Authorization': f'token {GITHUB_TOKEN}'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
print(f"Error getting the repositories: {response.status_code}")
return []
def get_files_in_repository(repo_name):
"""Return the list of files in a repository."""
url = f'{BASE_API_URL}/repos/{GITHUB_USERNAME}/{repo_name}/git/trees/main?recursive=1'
headers = {
'Authorization': f'token {GITHUB_TOKEN}'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json().get('tree', [])
else:
print(f"Error getting the files for the repository: {repo_name}: {response.status_code}")
return []
def get_raw_links():
repos = get_repositories()
raw_links = []
for repo in repos:
repo_name = repo['name']
files = get_files_in_repository(repo_name)
for file in files:
if file['type'] == 'blob': # Only files, not directories
raw_url = f'https://raw.githubusercontent.com/{GITHUB_USERNAME}/{repo_name}/main/{file["path"]}'
raw_links.append(raw_url)
return raw_links
def download_file(url, save_path):
"""Download a file from a URL and save it locally."""
response = requests.get(url)
if response.status_code == 200:
with open(save_path, 'wb') as file:
file.write(response.content)
print(f"File downloaded: {save_path}")
else:
print(f"Failed to download {url}: {response.status_code}")
# Example of usage
if __name__ == "__main__":
raw_links = get_raw_links()
for link in raw_links:
print(link)
user_approval = input("Do you want to download these files? (yes/no): ").strip().lower()
if user_approval == 'yes':
save_directory = "downloaded_files"
os.makedirs(save_directory, exist_ok=True)
for link in raw_links:
filename = os.path.join(save_directory, link.split('/')[-1])
download_file(link, filename)
else:
print("Download aborted.")