Parent

Included Modules

Class Index [+]

Quicksearch

Gem::Indexer

Top level class for building the gem repository index.

Attributes

build_legacy[RW]

Build indexes for RubyGems older than 1.2.0 when true

build_modern[RW]

Build indexes for RubyGems 1.2.0 and newer when true

dest_directory[R]

Index install location

dest_specs_index[R]

Specs index install location

dest_latest_specs_index[R]

Latest specs index install location

dest_prerelease_specs_index[R]

Prerelease specs index install location

directory[R]

Index build directory

Public Class Methods

new(directory, options = {}) click to toggle source

Create an indexer that will index the gems in directory.

# File lib/rubygems/indexer.rb, line 59
  def initialize(directory, options = {})
    unless ''.respond_to? :to_xs then
      fail "Gem::Indexer requires that the XML Builder library be installed:" \
           "\n\tgem install builder"
    end

    options = { :build_legacy => true, :build_modern => true }.merge options

    @build_legacy = options[:build_legacy]
    @build_modern = options[:build_modern]

    @rss_title = options[:rss_title]
    @rss_host = options[:rss_host]
    @rss_gems_host = options[:rss_gems_host]

    @dest_directory = directory
    @directory = File.join Dir.tmpdir, "gem_generate_index_#{$$}"

    marshal_name = "Marshal.#{Gem.marshal_version}"

    @master_index = File.join @directory, 'yaml'
    @marshal_index = File.join @directory, marshal_name

    @quick_dir = File.join @directory, 'quick'

    @quick_marshal_dir = File.join @quick_dir, marshal_name

    @quick_index = File.join @quick_dir, 'index'
    @latest_index = File.join @quick_dir, 'latest_index'

    @specs_index = File.join @directory, "specs.#{Gem.marshal_version}"
    @latest_specs_index = File.join @directory,
                                    "latest_specs.#{Gem.marshal_version}"
    @prerelease_specs_index = File.join(@directory,
                                        "prerelease_specs.#{Gem.marshal_version}")

    @dest_specs_index = File.join @dest_directory,
                                  "specs.#{Gem.marshal_version}"
    @dest_latest_specs_index = File.join @dest_directory,
                                         "latest_specs.#{Gem.marshal_version}"
    @dest_prerelease_specs_index = File.join @dest_directory,
                                            "prerelease_specs.#{Gem.marshal_version}"

    @rss_index = File.join @directory, 'index.rss'

    @files = []
  end

Public Instance Methods

abbreviate(spec) click to toggle source

Abbreviate the spec for downloading. Abbreviated specs are only used for searching, downloading and related activities and do not need deployment specific information (e.g. list of files). So we abbreviate the spec, making it much smaller for quicker downloads.

# File lib/rubygems/indexer.rb, line 113
  def abbreviate(spec)
    spec.files = []
    spec.test_files = []
    spec.rdoc_options = []
    spec.extra_rdoc_files = []
    spec.cert_chain = []
    spec
  end
build_indicies(index) click to toggle source

Build various indicies

# File lib/rubygems/indexer.rb, line 125
  def build_indicies(index)
    # Marshal gemspecs are used by both modern and legacy RubyGems
    build_marshal_gemspecs index
    build_legacy_indicies index if @build_legacy
    build_modern_indicies index if @build_modern
    build_rss index

    compress_indicies
  end
build_legacy_indicies(index) click to toggle source

Builds indicies for RubyGems older than 1.2.x

# File lib/rubygems/indexer.rb, line 138
  def build_legacy_indicies(index)
    progress = ui.progress_reporter index.size,
                                    "Generating YAML quick index gemspecs for #{index.size} gems",
                                    "Complete"

    Gem.time 'Generated YAML quick index gemspecs' do
      index.released_gems.each do |original_name, spec|
        spec_file_name = "#{original_name}.gemspec.rz"
        yaml_name = File.join @quick_dir, spec_file_name

        yaml_zipped = Gem.deflate spec.to_yaml
        open yaml_name, 'wb' do |io| io.write yaml_zipped end

        progress.updated original_name
      end

      progress.done
    end

    say "Generating quick index"

    Gem.time 'Generated quick index' do
      open @quick_index, 'wb' do |io|
        io.puts index.sort.map { |_, spec| spec.original_name }
      end
    end

    say "Generating latest index"

    Gem.time 'Generated latest index' do
      open @latest_index, 'wb' do |io|
        io.puts index.latest_specs.sort.map { |spec| spec.original_name }
      end
    end

    # Don't need prerelease legacy index

    say "Generating Marshal master index"

    Gem.time 'Generated Marshal master index' do
      open @marshal_index, 'wb' do |io|
        io.write index.dump
      end
    end

    progress = ui.progress_reporter index.size,
                                    "Generating YAML master index for #{index.size} gems (this may take a while)",
                                    "Complete"

    Gem.time 'Generated YAML master index' do
      open @master_index, 'wb' do |io|
        io.puts "--- !ruby/object:#{index.class}"
        io.puts "gems:"

        gems = index.sort_by { |name, gemspec| gemspec.sort_obj }
        gems.each do |original_name, gemspec|
          yaml = gemspec.to_yaml.gsub(/^/, '    ')
          yaml = yaml.sub(/\A    ---/, '') # there's a needed extra ' ' here
          io.print "  #{original_name}:"
          io.puts yaml

          progress.updated original_name
        end
      end

      progress.done
    end

    @files << @quick_dir
    @files << @master_index
    @files << "#{@master_index}.Z"
    @files << @marshal_index
    @files << "#{@marshal_index}.Z"
  end
build_marshal_gemspecs(index) click to toggle source

Builds Marshal quick index gemspecs.

# File lib/rubygems/indexer.rb, line 216
  def build_marshal_gemspecs(index)
    progress = ui.progress_reporter index.size,
                                    "Generating Marshal quick index gemspecs for #{index.size} gems",
                                    "Complete"

    files = []

    Gem.time 'Generated Marshal quick index gemspecs' do
      index.gems.each do |original_name, spec|
        spec_file_name = "#{original_name}.gemspec.rz"
        marshal_name = File.join @quick_marshal_dir, spec_file_name

        marshal_zipped = Gem.deflate Marshal.dump(spec)
        open marshal_name, 'wb' do |io| io.write marshal_zipped end

        files << marshal_name

        progress.updated original_name
      end

      progress.done
    end

    @files << @quick_marshal_dir

    files
  end
build_modern_index(index, file, name) click to toggle source

Build a single index for RubyGems 1.2 and newer

# File lib/rubygems/indexer.rb, line 247
  def build_modern_index(index, file, name)
    say "Generating #{name} index"

    Gem.time "Generated #{name} index" do
      open(file, 'wb') do |io|
        specs = index.map do |*spec|
          # We have to splat here because latest_specs is an array,
          # while the others are hashes. See the TODO in source_index.rb
          spec = spec.flatten.last
          platform = spec.original_platform

          # win32-api-1.0.4-x86-mswin32-60
          unless String === platform then
            alert_warning "Skipping invalid platform in gem: #{spec.full_name}"
            next
          end

          platform = Gem::Platform::RUBY if platform.nil? or platform.empty?
          [spec.name, spec.version, platform]
        end

        specs = compact_specs(specs)
        Marshal.dump(specs, io)
      end
    end
  end
build_modern_indicies(index) click to toggle source

Builds indicies for RubyGems 1.2 and newer. Handles full, latest, prerelease

# File lib/rubygems/indexer.rb, line 277
  def build_modern_indicies(index)
    build_modern_index(index.released_specs.sort, @specs_index, 'specs')
    build_modern_index(index.latest_specs.sort,
                       @latest_specs_index,
                       'latest specs')
    build_modern_index(index.prerelease_specs.sort,
                       @prerelease_specs_index,
                       'prerelease specs')

    @files += [@specs_index,
               "#{@specs_index}.gz",
               @latest_specs_index,
               "#{@latest_specs_index}.gz",
               @prerelease_specs_index,
               "#{@prerelease_specs_index}.gz"]
  end
build_rss(index) click to toggle source

Builds an RSS feed for past two days gem releases according to the gem’s date.

# File lib/rubygems/indexer.rb, line 298
  def build_rss(index)
    if @rss_host.nil? or @rss_gems_host.nil? then
      if Gem.configuration.really_verbose then
        alert_warning "no --rss-host or --rss-gems-host, RSS generation disabled"
      end
      return
    end

    require 'cgi'
    require 'rubygems/text'

    extend Gem::Text

    Gem.time 'Generated rss' do
      open @rss_index, 'wb' do |io|
        rss_host = CGI.escapeHTML @rss_host
        rss_title = CGI.escapeHTML(@rss_title || 'gems')

        io.puts "<?xml version=\"1.0\"?>\n<rss version=\"2.0\">\n<channel>\n<title>\#{rss_title}</title>\n<link>http://\#{rss_host}</link>\n<description>Recently released gems from http://\#{rss_host}</description>\n<generator>RubyGems v\#{Gem::RubyGemsVersion}</generator>\n<docs>http://cyber.law.harvard.edu/rss/rss.html</docs>\n"

        today = Gem::Specification::TODAY
        yesterday = today - 86400

        index = index.select do |_, spec|
          spec_date = spec.date

          case spec_date
          when Date
            Time.parse(spec_date.to_s) >= yesterday
          when Time
            spec_date >= yesterday
          end
        end

        index = index.select do |_, spec|
          spec_date = spec.date

          case spec_date
          when Date
            Time.parse(spec_date.to_s) <= today
          when Time
            spec_date <= today
          end
        end

        index.sort_by { |_, spec| [-spec.date.to_i, spec] }.each do |_, spec|
          gem_path = CGI.escapeHTML "http://#{@rss_gems_host}/gems/#{spec.full_name}.gem"
          size = File.stat(spec.loaded_from).size rescue next

          description = spec.description || spec.summary || ''
          authors = Array spec.authors
          emails = Array spec.email
          authors = emails.zip(authors).map do |email, author|
            email += " (#{author})" if author and not author.empty?
          end.join ', '

          description = description.split(/\n\n+/).map do |chunk|
            format_text chunk, 78
          end

          description = description.join "\n\n"

          item = ''

          item << "<item>\n<title>\#{CGI.escapeHTML spec.full_name}</title>\n<description>\n&lt;pre&gt;\#{CGI.escapeHTML description.chomp}&lt;/pre&gt;\n</description>\n<author>\#{CGI.escapeHTML authors}</author>\n<guid>\#{CGI.escapeHTML spec.full_name}</guid>\n<enclosure url=\\\"\#{gem_path}\\\"\nlength=\\\"\#{size}\\\" type=\\\"application/octet-stream\\\" />\n<pubDate>\#{spec.date.rfc2822}</pubDate>\n"

          item << "<link>\#{CGI.escapeHTML spec.homepage}</link>\n" if spec.homepage

          item << "</item>\n"

          io.puts item
        end

        io.puts "</channel>\n</rss>\n"
      end
    end

    @files << @rss_index
  end
collect_specs(gems = gem_file_list) click to toggle source

Collect specifications from .gem files from the gem directory.

# File lib/rubygems/indexer.rb, line 413
  def collect_specs(gems = gem_file_list)
    index = Gem::SourceIndex.new

    progress = ui.progress_reporter gems.size,
                                    "Loading #{gems.size} gems from #{@dest_directory}",
                                    "Loaded all gems"

    Gem.time 'loaded' do
      gems.each do |gemfile|
        if File.size(gemfile.to_s) == 0 then
          alert_warning "Skipping zero-length gem: #{gemfile}"
          next
        end

        begin
          spec = Gem::Format.from_file_by_path(gemfile).spec
          spec.loaded_from = gemfile

          unless gemfile =~ /\/#{Regexp.escape spec.original_name}.*\.gem\z/i then
            expected_name = spec.full_name
            expected_name << " (#{spec.original_name})" if
              spec.original_name != spec.full_name
            alert_warning "Skipping misnamed gem: #{gemfile} should be named #{expected_name}"
            next
          end

          abbreviate spec
          sanitize spec

          index.add_spec spec, spec.original_name

          progress.updated spec.original_name

        rescue SignalException => e
          alert_error "Received signal, exiting"
          raise
        rescue Exception => e
          alert_error "Unable to process #{gemfile}\n#{e.message} (#{e.class})\n\t#{e.backtrace.join "\n\t"}"
        end
      end

      progress.done
    end

    index
  end
compact_specs(specs) click to toggle source

Compacts Marshal output for the specs index data source by using identical objects as much as possible.

# File lib/rubygems/indexer.rb, line 495
  def compact_specs(specs)
    names = {}
    versions = {}
    platforms = {}

    specs.map do |(name, version, platform)|
      names[name] = name unless names.include? name
      versions[version] = version unless versions.include? version
      platforms[platform] = platform unless platforms.include? platform

      [names[name], versions[version], platforms[platform]]
    end
  end
compress(filename, extension) click to toggle source

Compress filename with extension.

# File lib/rubygems/indexer.rb, line 512
  def compress(filename, extension)
    data = Gem.read_binary filename

    zipped = Gem.deflate data

    open "#{filename}.#{extension}", 'wb' do |io|
      io.write zipped
    end
  end
compress_indicies() click to toggle source

Compresses indicies on disk

# File lib/rubygems/indexer.rb, line 465
  def compress_indicies
    say "Compressing indicies"

    Gem.time 'Compressed indicies' do
      if @build_legacy then
        compress @quick_index, 'rz'
        paranoid @quick_index, 'rz'

        compress @latest_index, 'rz'
        paranoid @latest_index, 'rz'

        compress @marshal_index, 'Z'
        paranoid @marshal_index, 'Z'

        compress @master_index, 'Z'
        paranoid @master_index, 'Z'
      end

      if @build_modern then
        gzip @specs_index
        gzip @latest_specs_index
        gzip @prerelease_specs_index
      end
    end
  end
gem_file_list() click to toggle source

List of gem file names to index.

# File lib/rubygems/indexer.rb, line 525
  def gem_file_list
    Dir.glob(File.join(@dest_directory, "gems", "*.gem"))
  end
generate_index() click to toggle source

Builds and installs indicies.

# File lib/rubygems/indexer.rb, line 532
  def generate_index
    make_temp_directories
    index = collect_specs
    build_indicies index
    install_indicies
  rescue SignalException
  ensure
    FileUtils.rm_rf @directory
  end
gzip(filename) click to toggle source

Zlib::GzipWriter wrapper that gzips filename on disk.

# File lib/rubygems/indexer.rb, line 545
  def gzip(filename)
    Zlib::GzipWriter.open "#{filename}.gz" do |io|
      io.write Gem.read_binary(filename)
    end
  end
install_indicies() click to toggle source

Install generated indicies into the destination directory.

# File lib/rubygems/indexer.rb, line 554
  def install_indicies
    verbose = Gem.configuration.really_verbose

    say "Moving index into production dir #{@dest_directory}" if verbose

    files = @files.dup
    files.delete @quick_marshal_dir if files.include? @quick_dir

    if files.include? @quick_marshal_dir and
       not files.include? @quick_dir then
      files.delete @quick_marshal_dir
      quick_marshal_dir = @quick_marshal_dir.sub @directory, ''

      dst_name = File.join @dest_directory, quick_marshal_dir

      FileUtils.mkdir_p File.dirname(dst_name), :verbose => verbose
      FileUtils.rm_rf dst_name, :verbose => verbose
      FileUtils.mv @quick_marshal_dir, dst_name, :verbose => verbose,
                   :force => true
    end

    files = files.map do |path|
      path.sub @directory, ''
    end

    files.each do |file|
      src_name = File.join @directory, file
      dst_name = File.join @dest_directory, file

      FileUtils.rm_rf dst_name, :verbose => verbose
      FileUtils.mv src_name, @dest_directory, :verbose => verbose,
                   :force => true
    end
  end
make_temp_directories() click to toggle source

Make directories for index generation

# File lib/rubygems/indexer.rb, line 592
  def make_temp_directories
    FileUtils.rm_rf @directory
    FileUtils.mkdir_p @directory, :mode => 0700
    FileUtils.mkdir_p @quick_marshal_dir
  end
paranoid(path, extension) click to toggle source

Ensure path and path with extension are identical.

# File lib/rubygems/indexer.rb, line 601
  def paranoid(path, extension)
    data = Gem.read_binary path
    compressed_data = Gem.read_binary "#{path}.#{extension}"

    unless data == Gem.inflate(compressed_data) then
      raise "Compressed file #{compressed_path} does not match uncompressed file #{path}"
    end
  end
sanitize(spec) click to toggle source

Sanitize the descriptive fields in the spec. Sometimes non-ASCII characters will garble the site index. Non-ASCII characters will be replaced by their XML entity equivalent.

# File lib/rubygems/indexer.rb, line 615
  def sanitize(spec)
    spec.summary = sanitize_string(spec.summary)
    spec.description = sanitize_string(spec.description)
    spec.post_install_message = sanitize_string(spec.post_install_message)
    spec.authors = spec.authors.collect { |a| sanitize_string(a) }

    spec
  end
sanitize_string(string) click to toggle source

Sanitize a single string.

# File lib/rubygems/indexer.rb, line 627
  def sanitize_string(string)
    # HACK the #to_s is in here because RSpec has an Array of Arrays of
    # Strings for authors.  Need a way to disallow bad values on gempsec
    # generation.  (Probably won't happen.)
    string ? string.to_s.to_xs : string
  end
update_index() click to toggle source

Perform an in-place update of the repository from newly added gems. Only works for modern indicies, and sets build_legacy to false when run.

# File lib/rubygems/indexer.rb, line 638
  def update_index
    @build_legacy = false

    make_temp_directories

    specs_mtime = File.stat(@dest_specs_index).mtime
    newest_mtime = Time.at 0

    updated_gems = gem_file_list.select do |gem|
      gem_mtime = File.stat(gem).mtime
      newest_mtime = gem_mtime if gem_mtime > newest_mtime
      gem_mtime >= specs_mtime
    end

    if updated_gems.empty? then
      say 'No new gems'
      terminate_interaction 0
    end

    index = collect_specs updated_gems

    files = build_marshal_gemspecs index

    Gem.time 'Updated indexes' do
      update_specs_index index.released_gems, @dest_specs_index, @specs_index
      update_specs_index index.released_gems, @dest_latest_specs_index, @latest_specs_index
      update_specs_index(index.prerelease_gems, @dest_prerelease_specs_index,
                         @prerelease_specs_index)
    end

    compress_indicies

    verbose = Gem.configuration.really_verbose

    say "Updating production dir #{@dest_directory}" if verbose

    files << @specs_index
    files << "#{@specs_index}.gz"
    files << @latest_specs_index
    files << "#{@latest_specs_index}.gz"
    files << @prerelease_specs_index
    files << "#{@prerelease_specs_index}.gz"

    files = files.map do |path|
      path.sub @directory, ''
    end

    files.each do |file|
      src_name = File.join @directory, file
      dst_name = File.join @dest_directory, File.dirname(file)

      FileUtils.mv src_name, dst_name, :verbose => verbose,
                   :force => true

      File.utime newest_mtime, newest_mtime, dst_name
    end
  end
update_specs_index(index, source, dest) click to toggle source

Combines specs in index and source then writes out a new copy to dest. For a latest index, does not ensure the new file is minimal.

# File lib/rubygems/indexer.rb, line 700
  def update_specs_index(index, source, dest)
    specs_index = Marshal.load Gem.read_binary(source)

    index.each do |_, spec|
      platform = spec.original_platform
      platform = Gem::Platform::RUBY if platform.nil? or platform.empty?
      specs_index << [spec.name, spec.version, platform]
    end

    specs_index = compact_specs specs_index.uniq.sort

    open dest, 'wb' do |io|
      Marshal.dump specs_index, io
    end
  end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.