From anonymous, 2 Weeks ago, written in Python.
Embed
  1. import re
  2. import subprocess
  3.  
  4. import requests
  5.  
  6.  
  7. def iter_plugins():
  8.     regex = r">([\d\w-]*)</a>"
  9.     response = requests.get("https://pypi.org/simple")
  10.     for match in re.finditer(regex, response.text):
  11.         name = match.groups()[0]
  12.         if not name.startswith("pytest-"):
  13.             continue
  14.  
  15.         print()
  16.         print(name)
  17.  
  18.         response = requests.get(f"https://pypi.org/pypi/{name}/json")
  19.         if response.status_code == 404:
  20.             # Some packages, like pytest-azurepipelines42, are included in https://pypi.org/simple but
  21.             # return 404 on the JSON API. Skip.
  22.             print("Skipping due to PyPI 404")
  23.             continue
  24.         response.raise_for_status()
  25.         info = response.json()["info"]
  26.         project_urls = info['project_urls']
  27.         if project_urls is None:
  28.             project_urls = {}
  29.         #print(project_urls)
  30.  
  31.         source_url_keys = ['GitHub', 'GitHub Project', 'Source', 'Repository', 'Source code', 'Source Code', 'Code', 'Homepage']
  32.  
  33.         repo_url = None
  34.         for key in source_url_keys:
  35.             if key not in project_urls:
  36.                 continue
  37.  
  38.             hosters = ['gitlab.com', 'github.com', 'pagure.io', 'bitbucket.org', 'codeberg.org', 'edugit.org', 'framaagit.org', 'yourlabs.io/oss']
  39.             u = project_urls[key]
  40.             if not any(h in u for h in hosters):
  41.                 print(f"IGNORING {u}")
  42.                 continue
  43.  
  44.             if u is not None:
  45.                 repo_url = u
  46.                 break
  47.  
  48.         if repo_url is None:
  49.             print("No URL found, trying description")
  50.             patterns = [
  51.                 r'https://travis-ci.org/([^/]*)/([^.]*)\.svg',
  52.                 r'https://github.com/([^/]*)/([a-zA-Z0-9-]*)',
  53.                 r'http://github.com/([^/]*)/([a-zA-Z0-9-]*)',
  54.             ]
  55.             for p in patterns:
  56.                 m = re.search(p, info['description'])
  57.                 if m is not None:
  58.                     repo_url = f'https://github.com/{m.group(1)}/{m.group(2)}'
  59.  
  60.         if repo_url is None:
  61.             print("NO REPO FOUND!")
  62.             yield None
  63.             continue
  64.  
  65.         print(repo_url)
  66.         url_replacements = [
  67.             ('http://', 'https://'),
  68.             # Avoid password prompts
  69.             ('https://gitlab.com', 'https://invalid:invalid@gitlab.com'),
  70.             ('https://github.com', 'https://invalid:invalid@github.com'),
  71.             # specific issues
  72.             ('pytest-matcher,', 'pytest-matcher'),
  73.         ]
  74.         for old, new in url_replacements:
  75.             repo_url = repo_url.replace(old, new)
  76.  
  77.         yield repo_url
  78.  
  79.  
  80. def main():
  81.     for repo in iter_plugins():
  82.         if repo is None:
  83.             print("Skipping, no known repository")
  84.             continue
  85.  
  86.         try:
  87.             subprocess.run(['git', 'clone', repo], check=True, capture_output=True, encoding='utf-8')
  88.         except subprocess.CalledProcessError as e:
  89.             if "fatal: Authentication failed for" in e.stderr:
  90.                 print("Auth failed, skipping")
  91.             elif "fatal: repository " in e.stderr and "not found" in e.stderr:
  92.                 print("GitHub 404, skipping")
  93.             elif "fatal: destination path " in e.stderr and " already exists and is not an empty directory." in e.stderr:
  94.                 print("Already exists, skipping")
  95.             else:
  96.                 print(e.stderr)
  97.                 raise
  98.         else:
  99.             print("Cloned successfully")
  100.  
  101.  
  102. if __name__ == "__main__":
  103.     main()