# VERSION: 1.8
# AUTHORS: LightDestory (https://github.com/LightDestory)
import re
from helpers import retrieve_url
from novaprinter import prettyPrinter
class ilcorsaronero(object):
url = 'https://ilcorsaronero.link/'
name = 'Il Corsaro Nero'
supported_categories = {'all': '',
'movies': 'film',
'music': 'musica',
'games': 'giochi',
'anime': 'animazione',
'books': 'libri',
'software': 'software',
'tv': 'serie-tv'}
class HTMLParser:
def __init__(self, url):
self.url = url
self.noTorrents = False
def feed(self, html):
self.noTorrents = False
torrents = self.__findTorrents(html)
if len(torrents) == 0:
self.noTorrents = True
return
for torrent in range(len(torrents)):
data = {
'link': torrents[torrent][0],
'name': torrents[torrent][1],
'size': torrents[torrent][2],
'seeds': torrents[torrent][3],
'leech': torrents[torrent][4],
'engine_url': self.url,
'desc_link': torrents[torrent][5],
'pub_date': torrents[torrent][6]
}
prettyPrinter(data)
def __findTorrents(self, html):
torrents = []
# Find all TR nodes with class odd or odd2
trs = re.findall(r'(
.+?
)', html)
for tr in trs[1:]: # Skip the first TR node because it's the header
# Extract from the A node all the needed information
url_titles = re.search(
r'href=\"(.+?)\">(.+?).+?green.+?>.*?([0-9]+).*?red.*?>.*?([0-9]+).+?([0-9\.\,]+ (?:TiB|GiB|MiB|KiB|B)).+?timestamp=\"(.+?)\"',
tr)
if url_titles:
generic_url = '{0}{1}'.format(self.url[:-1], url_titles.group(1))
torrents.append([
generic_url,
url_titles.group(2),
url_titles.group(5),
url_titles.group(3),
url_titles.group(4),
generic_url,
url_titles.group(6)
])
return torrents
def download_torrent(self, info):
torrent_page = ' '.join(retrieve_url(info).split())
magnet_match = re.search(r'href=\"(magnet:.*?)\"', torrent_page)
if magnet_match and magnet_match.groups():
magnet_str = magnet_match.groups()[0]
print(magnet_str + " " + magnet_str)
else:
raise Exception('Error, please fill a bug report!')
def search(self, what, cat='all'):
what = what.replace("%20", "+")
parser = self.HTMLParser(self.url)
counter: int = 1
filter = '&cat={0}'.format(self.supported_categories[cat])
while True:
url = '{0}search?q={1}&cat={2}&page={3}'.format(self.url, what, filter, counter)
html = re.sub(r'\s+', ' ', retrieve_url(url)).strip()
parser.feed(html)
if parser.noTorrents:
break
counter += 1