aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCorentin Chary <corentin.chary@gmail.com>2012-12-12 22:54:50 +0100
committerCorentin Chary <corentin.chary@gmail.com>2012-12-12 22:54:50 +0100
commite42ba7dfd05ff8326981275f5031db5b52564d68 (patch)
treea49245e6e4017ae5f918b8542762556bac02bac0 /euscanwww
parentMerge pull request #19 from EvaSDK/master (diff)
downloadeuscan-e42ba7dfd05ff8326981275f5031db5b52564d68.tar.gz
euscan-e42ba7dfd05ff8326981275f5031db5b52564d68.tar.bz2
euscan-e42ba7dfd05ff8326981275f5031db5b52564d68.zip
djeuscan: rework the scan process, don't use alive anymore
Signed-off-by: Corentin Chary <corentin.chary@gmail.com>
Diffstat (limited to 'euscanwww')
-rw-r--r--euscanwww/djeuscan/admin.py2
-rw-r--r--euscanwww/djeuscan/management/commands/scan_portage.py7
-rw-r--r--euscanwww/djeuscan/models.py1
-rw-r--r--euscanwww/djeuscan/processing/scan/scan_portage.py229
-rw-r--r--euscanwww/djeuscan/processing/scan/scan_upstream.py27
-rw-r--r--euscanwww/djeuscan/tasks.py6
-rw-r--r--euscanwww/euscanwww/__init__.py1
7 files changed, 151 insertions, 122 deletions
diff --git a/euscanwww/djeuscan/admin.py b/euscanwww/djeuscan/admin.py
index 5ecc512..bdf7f6b 100644
--- a/euscanwww/djeuscan/admin.py
+++ b/euscanwww/djeuscan/admin.py
@@ -27,7 +27,7 @@ class PackageAdmin(admin.ModelAdmin):
class VersionAdmin(admin.ModelAdmin):
search_fields = ('package__name', 'package__category')
- list_filter = ('overlay', 'packaged', 'alive')
+ list_filter = ('overlay', 'packaged')
class ProblemReportAdmin(admin.ModelAdmin):
diff --git a/euscanwww/djeuscan/management/commands/scan_portage.py b/euscanwww/djeuscan/management/commands/scan_portage.py
index e70affa..f1d9485 100644
--- a/euscanwww/djeuscan/management/commands/scan_portage.py
+++ b/euscanwww/djeuscan/management/commands/scan_portage.py
@@ -44,12 +44,6 @@ class Command(BaseCommand):
dest='no-log',
default=False,
help='Don\'t store logs'),
- make_option('--prefetch',
- action='store_true',
- dest='prefetch',
- default=False,
- help=('Prefetch all versions and packages from DB to '
- 'speedup full scan process.')),
)
args = '[package package ...]'
help = 'Scans portage tree and fills database'
@@ -70,7 +64,6 @@ class Command(BaseCommand):
no_log=options["no-log"],
purge_packages=options["purge-packages"],
purge_versions=options["purge-versions"],
- prefetch=options["prefetch"],
upstream=options["upstream"],
logger=logger,
)
diff --git a/euscanwww/djeuscan/models.py b/euscanwww/djeuscan/models.py
index 5a0991c..1b75495 100644
--- a/euscanwww/djeuscan/models.py
+++ b/euscanwww/djeuscan/models.py
@@ -144,7 +144,6 @@ class Version(models.Model):
overlay = models.CharField(max_length=128, default='gentoo', db_index=True,
validators=[validate_name], blank=True)
urls = models.TextField(blank=True)
- alive = models.BooleanField(default=True, db_index=True)
vtype = models.CharField(max_length=128, blank=True)
handler = models.CharField(max_length=128, blank=True, db_index=True)
diff --git a/euscanwww/djeuscan/processing/scan/scan_portage.py b/euscanwww/djeuscan/processing/scan/scan_portage.py
index 1d6c866..79aba67 100644
--- a/euscanwww/djeuscan/processing/scan/scan_portage.py
+++ b/euscanwww/djeuscan/processing/scan/scan_portage.py
@@ -8,6 +8,7 @@ import portage
from xml.etree.ElementTree import iterparse, ParseError
from django.db.transaction import commit_on_success
+from django.db import models
from django.core.management.color import color_style
from euscan.version import get_version_type
@@ -29,42 +30,45 @@ class ScanPortage(object):
self._cache = {'packages': {}, 'versions': {}}
self._overlays = None
- self._updated_packages = set()
+ self._packages_updated = set()
+ self._versions = set()
+ self._versions_seen = set()
- def updated_packages(self):
- return list(self._updated_packages)
+ def packages_updated(self):
+ return list(self._packages_updated)
- def cache_hash_package(self, category, name):
+ def hash_package(self, category, name):
return '%s/%s' % (category, name)
def cache_store_package(self, package):
- key = self.cache_hash_package(package.category, package.name)
+ key = self.hash_package(package.category, package.name)
self._cache['packages'][key] = package
def cache_get_package(self, category, name):
return self._cache['packages'].get(
- self.cache_hash_package(category, name)
+ self.hash_package(category, name)
)
- def cache_hash_version(self, category, name, version, revision, slot,
+ def hash_version(self, category, name, version, revision,
overlay):
- key = '%s/%s-%s-r%s %s %s' % (category, name,
- version, revision,
- slot, overlay)
+ key = '%s/%s-%s-r%s %s' % (category, name,
+ version, revision,
+ overlay)
return key
- def cache_get_version(self, category, name, version, revision, slot,
+ def cache_get_version(self, category, name, version, revision,
overlay):
- key = self.cache_hash_version(category, name, version, revision, slot,
+ key = self.hash_version(category, name, version, revision,
overlay)
return self._cache['versions'].get(key)
def cache_store_version(self, version):
- key = self.cache_hash_version(
+ key = self.hash_version(
version.package.category, version.package.name, version.version,
- version.revision, version.slot, version.overlay
+ version.revision, version.overlay
)
self._cache['versions'][key] = version
+ self._versions.add(version)
def scan_gentoopm(self, query, category=None):
import gentoopm
@@ -160,60 +164,28 @@ class ScanPortage(object):
category = ""
elem.clear()
- def prepare_purge_versions(self, packages, query=None, category=None):
- if not self.purge_versions:
- return
-
- # Set all versions dead, then set found versions alive and
- # delete old versions
- if not query:
- # Optimisation for --all or --category
- self.logger.info('Killing existing versions...')
- qs = Version.objects.filter(packaged=True)
- if category:
- qs = qs.filter(package__category=category)
- qs.update(alive=False)
- self.logger.info('done')
- else:
- for package in packages:
- Version.objects.filter(package=package, packaged=True).\
- update(alive=False)
-
def scan(self, query=None, category=None):
- if not query:
- current_packages = Package.objects.all()
- elif '/' in query:
- cat, pkg = portage.catsplit(query)
- current_packages = Package.objects.filter(category=cat, name=pkg)
- else:
- current_packages = Package.objects.filter(name=query)
- if category:
- current_packages = current_packages.filter(category=category)
-
- self.prepare_purge_versions(current_packages, query, category)
-
- packages_alive = set()
-
for data in self.scan_eix_xml(query, category):
#for data in self.scan_gentoopm(query, category):
cat, pkg = data['category'], data['package']
package = self.store_package(
cat, pkg, data['homepage'], data['description']
)
- packages_alive.add("%s/%s" % (cat, pkg))
+
new_version = False
for cpv, slot, overlay, overlay_path in data['versions']:
obj, created = self.store_version(
package, cpv, slot, overlay, overlay_path
)
+ self._versions_seen.add(obj)
new_version = created or new_version
# If the package has at least one new version scan upstream for it
if new_version:
- self._updated_packages.add(package)
+ self._packages_updated.add(package)
- self.purge_old_packages(current_packages, packages_alive)
self.purge_old_versions()
+ self.purge_old_packages()
def store_package(self, cat, pkg, homepage, description):
created = False
@@ -239,7 +211,7 @@ class ScanPortage(object):
created = False
obj = self.cache_get_version(
- package.category, package.name, ver, rev, slot, overlay
+ package.category, package.name, ver, rev, overlay
)
overlay_path = overlay_path or portage.settings["PORTDIR"]
@@ -249,11 +221,12 @@ class ScanPortage(object):
if not obj:
obj, created = Version.objects.get_or_create(
- package=package, slot=slot,
- revision=rev, version=ver,
+ package=package,
+ revision=rev,
+ version=ver,
overlay=overlay,
defaults={
- "alive": True,
+ "slot": slot,
"packaged": True,
"vtype": get_version_type(ver),
"confidence": 100,
@@ -263,9 +236,10 @@ class ScanPortage(object):
}
)
if not created: # Created objects have defaults values
- obj.alive = True
- obj.packaged = True
- obj.save()
+ if obj.slot != slot or obj.package != True:
+ obj.slot = slot
+ obj.packaged = True
+ obj.save()
if created:
self.cache_store_version(obj)
@@ -298,22 +272,51 @@ class ScanPortage(object):
return obj, created
- def purge_old_packages(self, packages, alive):
+ def purge_old_packages(self):
if not self.purge_packages:
return
+ packages = (
+ Package.objects.values("id")
+ .annotate(version_count=models.Count("version"))
+ .filter(version_count=0)
+ )
+ packages = (
+ Package.objects.filter(id__in=[package['id'] for package in packages])
+ )
+
for package in packages:
- cp = "%s/%s" % (package.category, package.name)
- if cp not in alive:
- self.logger.info('- [p] %s' % (package))
- package.delete()
+ self.logger.info('- [p] %s' % (package))
+ package.delete()
+
+ def version_hack(self, version):
+ try:
+ if version.package.last_version_gentoo:
+ version.package.last_version_gentoo.pk
+ if version.package.last_version_overlay:
+ version.package.last_version_overlay.pk
+ if version.package.last_version_upstream:
+ version.package.last_version_upstream.pk
+ except Version.DoesNotExist:
+ version.package.last_version_gentoo = None
+ version.package.last_version_overlay = None
+ version.package.last_version_upstream = None
def purge_old_versions(self):
if not self.purge_versions:
return
- versions = Version.objects.filter(packaged=True, alive=False)
+ versions = self._versions.difference(self._versions_seen)
+
for version in versions:
+ self.logger.info('- [v] %s' % (version))
+
+ if version.packaged == False:
+ continue # Not our job
+
+ # Fix last_version_ stuff that is sometime broken
+ self.version_hack(version)
+
if version.overlay == 'gentoo':
version.package.n_packaged -= 1
else:
@@ -321,8 +324,6 @@ class ScanPortage(object):
version.package.n_versions -= 1
version.package.save()
- self.logger.info('- [v] %s' % (version))
-
if self.no_log:
continue
@@ -335,20 +336,55 @@ class ScanPortage(object):
overlay=version.overlay,
vtype=version.vtype,
)
+ # remove from last version ?
+ version.delete()
+
+ def prefetch(self, packages, category):
+ self.logger.info('Prefetching current objects...')
- versions.delete()
+ ppackages = Package.objects.all()
+ pversions = Version.objects.filter(packaged=True).select_related('package').all()
+
+ if category:
+ ppackages = ppackages.filter(category=category)
+ pversions = pversions.filter(package__category=category)
+ if packages:
+ ids = [ package.id for package in packages ]
+ ppackages = ppackages.filter(pk__in=ids)
+ pversions = pversions.filter(package__pk__in=ids)
+
+ for package in ppackages:
+ self.cache_store_package(package)
+ for version in pversions:
+ self.cache_store_version(version)
+
+ self.logger.info('done')
+
+def populate_categories(logger):
+ # Populate Category and Overlay
+ # TODO: - use portage.settings.categories()
+ # - read metadata.xml to add description
+ for cat in Package.objects.values('category').distinct():
+ obj, created = Category.objects.get_or_create(name=cat["category"])
+ if created:
+ logger.info("+ [c] %s", cat["category"])
+
+def populate_overlays(logger):
+ # TODO: - get informations from layman and portage (path, url)
+ for overlay in Version.objects.values('overlay').distinct():
+ if not overlay["overlay"]:
+ continue
+ obj, created = Overlay.objects.get_or_create(name=overlay["overlay"])
+ if created:
+ logger.info("+ [o] %s", overlay["overlay"])
@commit_on_success
def scan_portage(packages=None, category=None, no_log=False, upstream=False,
- purge_packages=False, purge_versions=False, prefetch=False,
- logger=None):
+ purge_packages=False, purge_versions=False, logger=None):
logger = logger or FakeLogger()
- if packages is None:
- prefetch = True
-
scan_handler = ScanPortage(
logger=logger,
no_log=no_log,
@@ -358,18 +394,28 @@ def scan_portage(packages=None, category=None, no_log=False, upstream=False,
logger.info('Scanning portage tree...')
- if prefetch:
- logger.info('Prefetching objects...')
- ppackages = Package.objects.all()
- pversions = Version.objects.select_related('package').all()
+ if not packages:
+ qs = Package.objects.all()
if category:
- ppackages = ppackages.filter(category=category)
- pversions = pversions.filter(package__category=category)
- for package in ppackages:
- scan_handler.cache_store_package(package)
- for version in pversions:
- scan_handler.cache_store_version(version)
- logger.info('done')
+ qs = qs.filter(category=category)
+ prefetch_packages = qs
+ else:
+ results = []
+ for package in packages:
+ if isinstance(package, Package):
+ results.append(package)
+ else:
+ if '/' in package:
+ cat, pkg = portage.catsplit(package)
+ qs = Package.objects.filter(category=cat, name=pkg)
+ else:
+ qs = Package.objects.filter(name=package)
+ for package in qs:
+ results.append(package)
+ prefetch_packages = results
+
+
+ scan_handler.prefetch(prefetch_packages, category)
if not packages and category:
scan_handler.scan(category=category)
@@ -382,21 +428,8 @@ def scan_portage(packages=None, category=None, no_log=False, upstream=False,
else:
scan_handler.scan(pkg)
- # Populate Category and Overlay
- # TODO: - use portage.settings.categories()
- # - read metadata.xml to add description
- for cat in Package.objects.values('category').distinct():
- obj, created = Category.objects.get_or_create(name=cat["category"])
- if created:
- logger.info("+ [c] %s", cat["category"])
-
- # TODO: - get informations from layman and portage (path, url)
- for overlay in Version.objects.values('overlay').distinct():
- if not overlay["overlay"]:
- continue
- obj, created = Overlay.objects.get_or_create(name=overlay["overlay"])
- if created:
- logger.info("+ [o] %s", overlay["overlay"])
+ populate_categories(logger)
+ populate_overlays(logger)
logger.info('Done.')
- return scan_handler.updated_packages()
+ return scan_handler.packages_updated()
diff --git a/euscanwww/djeuscan/processing/scan/scan_upstream.py b/euscanwww/djeuscan/processing/scan/scan_upstream.py
index bc9159b..da384e5 100644
--- a/euscanwww/djeuscan/processing/scan/scan_upstream.py
+++ b/euscanwww/djeuscan/processing/scan/scan_upstream.py
@@ -14,6 +14,8 @@ class ScanUpstream(object):
def __init__(self, logger=None, purge_versions=False):
self.logger = logger or FakeLogger()
self.purge_versions = purge_versions
+ self._versions = set()
+ self._versions_seen = set()
def scan(self, package):
CONFIG["format"] = "dict"
@@ -74,12 +76,11 @@ class ScanUpstream(object):
if created:
self.logger.info('+ [p] %s/%s' % (cat, pkg))
- # Set all versions dead, then set found versions alive and
- # delete old versions
- if self.purge_versions:
- Version.objects.filter(
- package=obj, packaged=False
- ).update(alive=False)
+ versions = Version.objects.filter(
+ package=obj, packaged=False
+ )
+ for version in versions:
+ self._versions.add(version)
return obj
@@ -87,20 +88,22 @@ class ScanUpstream(object):
confidence):
obj, created = Version.objects.get_or_create(
package=package,
- slot='',
revision='r0',
version=ver,
overlay='',
- defaults={"alive": True, "urls": url, "packaged": False,
+ defaults={"slot" : '', "urls": url, "packaged": False,
"vtype": version_type, "handler": handler,
"confidence": confidence}
)
+
if not created:
- obj.alive = True
+ obj.slot = ''
obj.urls = url
obj.packaged = False
obj.save()
+ self._versions_seen.add(obj)
+
# If it's not a new version, just update the object and continue
if not created:
return
@@ -124,8 +127,10 @@ class ScanUpstream(object):
if not self.purge_versions:
return
- versions = Version.objects.filter(packaged=False, alive=False)
+ versions = self._versions.difference(self._versions_seen)
for version in versions:
+ if version.packaged == True:
+ continue # Not our job
VersionLog.objects.create(
package=version.package,
action=VersionLog.VERSION_REMOVED,
@@ -141,7 +146,7 @@ class ScanUpstream(object):
self.logger.info('- [u] %s %s' % (version, version.urls))
- versions.delete()
+ version.delete()
@commit_on_success
diff --git a/euscanwww/djeuscan/tasks.py b/euscanwww/djeuscan/tasks.py
index 44e4873..f58a544 100644
--- a/euscanwww/djeuscan/tasks.py
+++ b/euscanwww/djeuscan/tasks.py
@@ -115,7 +115,7 @@ def scan_metadata(packages=[], category=None, populate=False):
@task
def scan_portage(packages=None, category=None,
no_log=False, purge_packages=False,
- purge_versions=False, prefetch=False):
+ purge_versions=False):
"""
Scans portage for the given set of packages
"""
@@ -136,7 +136,6 @@ def scan_portage(packages=None, category=None,
no_log=no_log,
purge_packages=purge_packages,
purge_versions=purge_versions,
- prefetch=prefetch,
logger=logger,
)
@@ -182,7 +181,6 @@ def update_portage(packages=None):
packages=None,
purge_packages=True,
purge_versions=True,
- prefetch=True
)
scan_metadata(packages=None, populate=True)
@@ -204,7 +202,7 @@ def update_portage(packages=None):
(
group_one(scan_portage, categories,
attr_name="category", purge_packages=True,
- purge_versions=True, prefetch=True) |
+ purge_versions=True) |
group_one(scan_metadata, categories,
attr_name="category") |
update_counters.si(fast=True)
diff --git a/euscanwww/euscanwww/__init__.py b/euscanwww/euscanwww/__init__.py
index e69de29..8b13789 100644
--- a/euscanwww/euscanwww/__init__.py
+++ b/euscanwww/euscanwww/__init__.py
@@ -0,0 +1 @@
+