-
Notifications
You must be signed in to change notification settings - Fork 0
/
pm-extract-minicpan-metas.v1
executable file
·116 lines (95 loc) · 2.83 KB
/
pm-extract-minicpan-metas.v1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/local/bin/perl
use 5.010;
use strict;
use warnings FATAL => 'all';
#use Carp;
#use diagnostics;
use Data::Dump 'pp';
#use Data::Dumper; sub pp { print Dumper @_ }
$|=1;
###############################################################################
use Archive::Extract;
use File::Find::Rule;
use File::Spec;
use File::Temp qw();
use List::Util qw(first);
use Time::HiRes qw(time);
use YAML::Any qw(LoadFile);
use constant {
CPAN_MINI => '/mirrors/cpan/',
SQLITE_DB => 'cpan-meta.sqlite~',
};
BEGIN { unlink SQLITE_DB ; say SQLITE_DB, " deleted" }
use ORLite {
package => 'DB',
file => SQLITE_DB,
create => sub {
my $dbh = shift;
$dbh->do(q(
CREATE TABLE meta (
id INTEGER PRIMARY KEY AUTOINCREMENT,
abstract TEXT,
author_id INTEGER REFERENCES authors(id),
generated_by TEXT,
license TEXT,
name TEXT,
timestamp TEXT,
version TEXT
)
));
$dbh->do(q(
CREATE TABLE authors (
id INTEGER PRIMARY KEY AUTOINCREMENT,
cpan_name TEXT UNIQUE NOT NULL,
name TEXT
)
));
},
};
run(@ARGV);
sub run {
my @argv = @_;
my $problems = 0;
my $start_time = time;
my $dest = File::Temp::tempdir(
File::Spec->catfile(File::Spec->tmpdir, "extract-$$-XXXXXXXX"),
CLEANUP => 1
);
my $base = File::Spec->catdir(CPAN_MINI, qw(authors id));
my @files = grep {
$_ !~ /CHECKSUMS$/
} File::Find::Rule->file->in($base);
@files = @files[80..199]; # XXX
for my $file (@files) {
my ($author, $archive) = $file =~ m{/([^/]+)/([^/]+)$};
my $ae = Archive::Extract->new(archive => $file);
my $ok = $ae->extract( to => $dest );
if ($ok) {
my ($meta_json, $meta_yaml);
for (@{$ae->files}) {
$meta_json = $_ if /META.json$/;
$meta_yaml = $_ if /META.yml$/;
last if $meta_json && $meta_yaml;
}
if ($meta_yaml) {
update_db_from(File::Spec->catfile($dest, $meta_yaml));
}
if ($meta_json) {
pp($meta_json, $meta_yaml);
}
}
else { $problems ++ }
}
my $elapsed = time - $start_time;
printf "Processed %d archives in %.2f seconds (%.2f s/archive) with %d problems\n",
scalar @files, $elapsed, $elapsed/@files, $problems;
}
sub update_db_from {
my $yaml_file = shift;
my $meta = LoadFile($yaml_file);
die "exit";
}
sub fetch_or_create_author {
my ($cpan_name, $name) = @_;
my @rs = DB::Author->select('where cpan_name = ?', $cpan_name);
}