2 use 5.008001; # sane UTF-8 support
5 package YAML::Tiny; # git description: v1.69-8-g2c1e266
6 # XXX-INGY is 5.8.1 too old/broken for utf8?
7 # XXX-XDG Lancaster consensus was that it was sufficient until
10 our $VERSION = '1.70';
12 #####################################################################
15 # These are the currently documented API functions/methods and
19 our @ISA = qw{ Exporter };
20 our @EXPORT = qw{ Load Dump };
21 our @EXPORT_OK = qw{ LoadFile DumpFile freeze thaw };
24 # Functional/Export API:
27 return YAML::Tiny->new(@_)->_dump_string;
30 # XXX-INGY Returning last document seems a bad behavior.
31 # XXX-XDG I think first would seem more natural, but I don't know
32 # that it's worth changing now
34 my $self = YAML::Tiny->_load_string(@_);
38 # To match YAML.pm, return the last document
43 # XXX-INGY Do we really need freeze and thaw?
44 # XXX-XDG I don't think so. I'd support deprecating them.
52 return YAML::Tiny->new(@_)->_dump_file($file);
57 my $self = YAML::Tiny->_load_file($file);
61 # Return only the last document to match YAML.pm,
68 # Object Oriented API:
70 # Create an empty YAML::Tiny object
71 # XXX-INGY Why do we use ARRAY object?
72 # NOTE: I get it now, but I think it's confusing and not needed.
73 # Will change it on a branch later, for review.
75 # XXX-XDG I don't support changing it yet. It's a very well-documented
76 # "API" of YAML::Tiny. I'd support deprecating it, but Adam suggested
77 # we not change it until YAML.pm's own OO API is established so that
78 # users only have one API change to digest, not two
84 # XXX-INGY It probably doesn't matter, and it's probably too late to
85 # change, but 'read/write' are the wrong names. Read and Write
86 # are actions that take data from storage to memory
87 # characters/strings. These take the data to/from storage to native
88 # Perl objects, which the terms dump and load are meant. As long as
89 # this is a legacy quirk to YAML::Tiny it's ok, but I'd prefer not
90 # to add new {read,write}_* methods to this API.
94 $self->_load_string(@_);
99 $self->_dump_string(@_);
104 $self->_load_file(@_);
109 $self->_dump_file(@_);
115 #####################################################################
118 # Printed form of the unprintable characters in the lowest range
119 # of ASCII characters, listed by ASCII ordinal position.
120 my @UNPRINTABLE = qw(
121 0 x01 x02 x03 x04 x05 x06 a
123 x10 x11 x12 x13 x14 x15 x16 x17
124 x18 x19 x1A e x1C x1D x1E x1F
127 # Printable characters for escapes
129 0 => "\x00", z => "\x00", N => "\x85",
130 a => "\x07", b => "\x08", t => "\x09",
131 n => "\x0a", v => "\x0b", f => "\x0c",
132 r => "\x0d", e => "\x1b", '\\' => '\\',
136 # I(ngy) need to decide if these values should be quoted in
137 # YAML::Tiny or not. Probably yes.
139 # These 3 values have special meaning when unquoted and using the
140 # default YAML schema. They need quotes if they are strings.
141 my %QUOTE = map { $_ => 1 } qw{
145 # The commented out form is simpler, but overloaded the Perl regex
146 # engine due to recursion and backtracking problems on strings
147 # larger than 32,000ish characters. Keep it for reference purposes.
148 # qr/\"((?:\\.|[^\"])*)\"/
149 my $re_capture_double_quoted = qr/\"([^\\"]*(?:\\.[^\\"]*)*)\"/;
150 my $re_capture_single_quoted = qr/\'([^\']*(?:\'\'[^\']*)*)\'/;
151 # unquoted re gets trailing space that needs to be stripped
152 my $re_capture_unquoted_key = qr/([^:]+(?::+\S(?:[^:]*|.*?(?=:)))*)(?=\s*\:(?:\s+|$))/;
153 my $re_trailing_comment = qr/(?:\s+\#.*)?/;
154 my $re_key_value_separator = qr/\s*:(?:\s+(?:\#.*)?|$)/;
160 #####################################################################
161 # YAML::Tiny Implementation.
163 # These are the private methods that do all the work. They may change
170 # Create an object from a file
172 my $class = ref $_[0] ? ref shift : shift;
175 my $file = shift or $class->_error( 'You did not specify a file name' );
176 $class->_error( "File '$file' does not exist" )
178 $class->_error( "'$file' is a directory, not a file" )
180 $class->_error( "Insufficient permissions to read '$file'" )
183 # Open unbuffered with strict UTF-8 decoding and no translation layers
184 open( my $fh, "<:unix:encoding(UTF-8)", $file );
186 $class->_error("Failed to open file '$file': $!");
189 # flock if available (or warn if not possible for OS-specific reasons)
190 if ( _can_flock() ) {
191 flock( $fh, Fcntl::LOCK_SH() )
192 or warn "Couldn't lock '$file' for reading: $!";
196 my $contents = eval {
197 use warnings FATAL => 'utf8';
201 if ( my $err = $@ ) {
202 $class->_error("Error reading from file '$file': $err");
205 # close the file (release the lock)
206 unless ( close $fh ) {
207 $class->_error("Failed to close file '$file': $!");
210 $class->_load_string( $contents );
213 # Create an object from a string
215 my $class = ref $_[0] ? ref shift : shift;
216 my $self = bless [], $class;
219 unless ( defined $string ) {
220 die \"Did not provide a string to load";
223 # Check if Perl has it marked as characters, but it's internally
224 # inconsistent. E.g. maybe latin1 got read on a :utf8 layer
225 if ( utf8::is_utf8($string) && ! utf8::valid($string) ) {
227 Read an invalid UTF-8 string (maybe mixed UTF-8 and 8-bit character set).
228 Did you decode with lax ":utf8" instead of strict ":encoding(UTF-8)"?
232 # Ensure Unicode character semantics, even for 0x80-0xff
233 utf8::upgrade($string);
235 # Check for and strip any leading UTF-8 BOM
236 $string =~ s/^\x{FEFF}//;
238 # Check for some special cases
239 return $self unless length $string;
241 # Split the file into lines
242 my @lines = grep { ! /^\s*(?:\#.*)?\z/ }
243 split /(?:\015{1,2}\012|\015|\012)/, $string;
245 # Strip the initial YAML header
246 @lines and $lines[0] =~ /^\%YAML[: ][\d\.]+.*\z/ and shift @lines;
251 # Do we have a document header?
252 if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?\z/ ) {
253 # Handle scalar documents
255 if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML[: ][\d\.]+)\z/ ) {
257 $self->_load_scalar( "$1", [ undef ], \@lines );
263 if ( ! @lines or $lines[0] =~ /^(?:---|\.\.\.)/ ) {
266 while ( @lines and $lines[0] !~ /^---/ ) {
271 # XXX The final '-+$' is to look for -- which ends up being an
273 } elsif ( ! $in_document && @$self ) {
274 # only the first document can be explicit
275 die \"YAML::Tiny failed to classify the line '$lines[0]'";
276 } elsif ( $lines[0] =~ /^\s*\-(?:\s|$|-+$)/ ) {
277 # An array at the root
279 push @$self, $document;
280 $self->_load_array( $document, [ 0 ], \@lines );
282 } elsif ( $lines[0] =~ /^(\s*)\S/ ) {
285 push @$self, $document;
286 $self->_load_hash( $document, [ length($1) ], \@lines );
289 # Shouldn't get here. @lines have whitespace-only lines
290 # stripped, and previous match is a line with any
291 # non-whitespace. So this clause should only be reachable via
292 # a perlbug where \s is not symmetric with \S
294 # uncoverable statement
295 die \"YAML::Tiny failed to classify the line '$lines[0]'";
300 if ( ref $err eq 'SCALAR' ) {
301 $self->_error(${$err});
309 sub _unquote_single {
310 my ($self, $string) = @_;
311 return '' unless length $string;
312 $string =~ s/\'\'/\'/g;
316 sub _unquote_double {
317 my ($self, $string) = @_;
318 return '' unless length $string;
319 $string =~ s/\\"/"/g;
321 s{\\([Nnever\\fartz0b]|x([0-9a-fA-F]{2}))}
322 {(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}}gex;
326 # Load a YAML scalar string to the actual Perl scalar
328 my ($self, $string, $indent, $lines) = @_;
330 # Trim trailing whitespace
331 $string =~ s/\s*\z//;
333 # Explitic null/undef
334 return undef if $string eq '~';
337 if ( $string =~ /^$re_capture_single_quoted$re_trailing_comment\z/ ) {
338 return $self->_unquote_single($1);
342 if ( $string =~ /^$re_capture_double_quoted$re_trailing_comment\z/ ) {
343 return $self->_unquote_double($1);
347 if ( $string =~ /^[\'\"!&]/ ) {
348 die \"YAML::Tiny does not support a feature in line '$string'";
350 return {} if $string =~ /^{}(?:\s+\#.*)?\z/;
351 return [] if $string =~ /^\[\](?:\s+\#.*)?\z/;
353 # Regular unquoted string
354 if ( $string !~ /^[>|]/ ) {
355 die \"YAML::Tiny found illegal characters in plain scalar: '$string'"
356 if $string =~ /^(?:-(?:\s|$)|[\@\%\`])/ or
357 $string =~ /:(?:\s|$)/;
358 $string =~ s/\s+#.*\z//;
363 die \"YAML::Tiny failed to find multi-line scalar content" unless @$lines;
365 # Check the indent depth
366 $lines->[0] =~ /^(\s*)/;
367 $indent->[-1] = length("$1");
368 if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) {
369 die \"YAML::Tiny found bad indenting in line '$lines->[0]'";
375 $lines->[0] =~ /^(\s*)/;
376 last unless length($1) >= $indent->[-1];
377 push @multiline, substr(shift(@$lines), length($1));
380 my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n";
381 my $t = (substr($string, 1, 1) eq '-') ? '' : "\n";
382 return join( $j, @multiline ) . $t;
387 my ($self, $array, $indent, $lines) = @_;
390 # Check for a new document
391 if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {
392 while ( @$lines and $lines->[0] !~ /^---/ ) {
398 # Check the indent level
399 $lines->[0] =~ /^(\s*)/;
400 if ( length($1) < $indent->[-1] ) {
402 } elsif ( length($1) > $indent->[-1] ) {
403 die \"YAML::Tiny found bad indenting in line '$lines->[0]'";
406 if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) {
408 my $indent2 = length("$1");
409 $lines->[0] =~ s/-/ /;
411 $self->_load_hash( $array->[-1], [ @$indent, $indent2 ], $lines );
413 } elsif ( $lines->[0] =~ /^\s*\-\s*\z/ ) {
419 if ( $lines->[0] =~ /^(\s*)\-/ ) {
420 my $indent2 = length("$1");
421 if ( $indent->[-1] == $indent2 ) {
428 $array->[-1], [ @$indent, $indent2 ], $lines
432 } elsif ( $lines->[0] =~ /^(\s*)\S/ ) {
435 $array->[-1], [ @$indent, length("$1") ], $lines
439 die \"YAML::Tiny failed to classify line '$lines->[0]'";
442 } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*\z/ ) {
443 # Array entry with a value
445 push @$array, $self->_load_scalar(
446 "$2", [ @$indent, undef ], $lines
449 } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) {
450 # This is probably a structure like the following...
456 # ... so lets return and let the hash parser handle it
460 die \"YAML::Tiny failed to classify line '$lines->[0]'";
469 my ($self, $hash, $indent, $lines) = @_;
472 # Check for a new document
473 if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {
474 while ( @$lines and $lines->[0] !~ /^---/ ) {
480 # Check the indent level
481 $lines->[0] =~ /^(\s*)/;
482 if ( length($1) < $indent->[-1] ) {
484 } elsif ( length($1) > $indent->[-1] ) {
485 die \"YAML::Tiny found bad indenting in line '$lines->[0]'";
493 s/^\s*$re_capture_single_quoted$re_key_value_separator//
495 $key = $self->_unquote_single($1);
497 elsif ( $lines->[0] =~
498 s/^\s*$re_capture_double_quoted$re_key_value_separator//
500 $key = $self->_unquote_double($1);
502 elsif ( $lines->[0] =~
503 s/^\s*$re_capture_unquoted_key$re_key_value_separator//
508 elsif ( $lines->[0] =~ /^\s*\?/ ) {
509 die \"YAML::Tiny does not support a feature in line '$lines->[0]'";
512 die \"YAML::Tiny failed to classify line '$lines->[0]'";
515 if ( exists $hash->{$key} ) {
516 warn "YAML::Tiny found a duplicate key '$key' in line '$lines->[0]'";
519 # Do we have a value?
520 if ( length $lines->[0] ) {
522 $hash->{$key} = $self->_load_scalar(
523 shift(@$lines), [ @$indent, undef ], $lines
529 $hash->{$key} = undef;
532 if ( $lines->[0] =~ /^(\s*)-/ ) {
535 $hash->{$key}, [ @$indent, length($1) ], $lines
537 } elsif ( $lines->[0] =~ /^(\s*)./ ) {
538 my $indent2 = length("$1");
539 if ( $indent->[-1] >= $indent2 ) {
541 $hash->{$key} = undef;
545 $hash->{$key}, [ @$indent, length($1) ], $lines
559 # Save an object to a file
566 my $file = shift or $self->_error( 'You did not specify a file name' );
569 # flock if available (or warn if not possible for OS-specific reasons)
570 if ( _can_flock() ) {
571 # Open without truncation (truncate comes after lock)
572 my $flags = Fcntl::O_WRONLY()|Fcntl::O_CREAT();
573 sysopen( $fh, $file, $flags )
574 or $self->_error("Failed to open file '$file' for writing: $!");
576 # Use no translation and strict UTF-8
577 binmode( $fh, ":raw:encoding(UTF-8)");
579 flock( $fh, Fcntl::LOCK_EX() )
580 or warn "Couldn't lock '$file' for reading: $!";
582 # truncate and spew contents
587 open $fh, ">:unix:encoding(UTF-8)", $file;
590 # serialize and spew to the handle
591 print {$fh} $self->_dump_string;
593 # close the file (release the lock)
594 unless ( close $fh ) {
595 $self->_error("Failed to close file '$file': $!");
601 # Save an object to a string
604 return '' unless ref $self && @$self;
606 # Iterate over the documents
611 foreach my $cursor ( @$self ) {
615 if ( ! defined $cursor ) {
619 } elsif ( ! ref $cursor ) {
620 $lines[-1] .= ' ' . $self->_dump_scalar( $cursor );
623 } elsif ( ref $cursor eq 'ARRAY' ) {
624 unless ( @$cursor ) {
628 push @lines, $self->_dump_array( $cursor, $indent, {} );
631 } elsif ( ref $cursor eq 'HASH' ) {
632 unless ( %$cursor ) {
636 push @lines, $self->_dump_hash( $cursor, $indent, {} );
639 die \("Cannot serialize " . ref($cursor));
643 if ( ref $@ eq 'SCALAR' ) {
644 $self->_error(${$@});
649 join '', map { "$_\n" } @lines;
652 sub _has_internal_string_value {
654 my $b_obj = B::svref_2object(\$value); # for round trip problem
655 return $b_obj->FLAGS & B::SVf_POK();
661 # Check this before checking length or it winds up looking like a string!
662 my $has_string_flag = _has_internal_string_value($string);
663 return '~' unless defined $string;
664 return "''" unless length $string;
665 if (Scalar::Util::looks_like_number($string)) {
666 # keys and values that have been used as strings get quoted
667 if ( $is_key || $has_string_flag ) {
668 return qq['$string'];
674 if ( $string =~ /[\x00-\x09\x0b-\x0d\x0e-\x1f\x7f-\x9f\'\n]/ ) {
675 $string =~ s/\\/\\\\/g;
676 $string =~ s/"/\\"/g;
677 $string =~ s/\n/\\n/g;
678 $string =~ s/[\x85]/\\N/g;
679 $string =~ s/([\x00-\x1f])/\\$UNPRINTABLE[ord($1)]/g;
680 $string =~ s/([\x7f-\x9f])/'\x' . sprintf("%X",ord($1))/ge;
681 return qq|"$string"|;
683 if ( $string =~ /(?:^[~!@#%&*|>?:,'"`{}\[\]]|^-+$|\s|:\z)/ or
692 my ($self, $array, $indent, $seen) = @_;
693 if ( $seen->{refaddr($array)}++ ) {
694 die \"YAML::Tiny does not support circular references";
697 foreach my $el ( @$array ) {
698 my $line = (' ' x $indent) . '-';
701 $line .= ' ' . $self->_dump_scalar( $el );
704 } elsif ( $type eq 'ARRAY' ) {
707 push @lines, $self->_dump_array( $el, $indent + 1, $seen );
713 } elsif ( $type eq 'HASH' ) {
716 push @lines, $self->_dump_hash( $el, $indent + 1, $seen );
723 die \"YAML::Tiny does not support $type references";
731 my ($self, $hash, $indent, $seen) = @_;
732 if ( $seen->{refaddr($hash)}++ ) {
733 die \"YAML::Tiny does not support circular references";
736 foreach my $name ( sort keys %$hash ) {
737 my $el = $hash->{$name};
738 my $line = (' ' x $indent) . $self->_dump_scalar($name, 1) . ":";
741 $line .= ' ' . $self->_dump_scalar( $el );
744 } elsif ( $type eq 'ARRAY' ) {
747 push @lines, $self->_dump_array( $el, $indent + 1, $seen );
753 } elsif ( $type eq 'HASH' ) {
756 push @lines, $self->_dump_hash( $el, $indent + 1, $seen );
763 die \"YAML::Tiny does not support $type references";
772 #####################################################################
773 # DEPRECATED API methods:
775 # Error storage (DEPRECATED as of 1.57)
782 $errstr =~ s/ at \S+ line \d+.*//;
783 Carp::croak( $errstr );
790 Carp::carp( "YAML::Tiny->errstr and \$YAML::Tiny::errstr is deprecated" )
791 unless $errstr_warned++;
798 #####################################################################
799 # Helper functions. Possibly not needed.
802 # Use to detect nv or iv
805 # XXX-INGY Is flock YAML::Tiny's responsibility?
806 # Some platforms can't flock :-(
807 # XXX-XDG I think it is. When reading and writing files, we ought
808 # to be locking whenever possible. People (foolishly) use YAML
809 # files for things like session storage, which has race issues.
812 if ( defined $HAS_FLOCK ) {
817 my $c = \%Config::Config;
818 $HAS_FLOCK = grep { $c->{$_} } qw/d_flock d_fcntl_can_lock d_lockf/;
819 require Fcntl if $HAS_FLOCK;
825 # XXX-INGY Is this core in 5.8.1? Can we remove this?
826 # XXX-XDG Scalar::Util 1.18 didn't land until 5.8.8, so we need this
827 #####################################################################
828 # Use Scalar::Util if possible, otherwise emulate it
833 if ( eval { Scalar::Util->VERSION(1.18); } ) {
834 *refaddr = *Scalar::Util::refaddr;
838 # Scalar::Util failed to load or too old
840 my $pkg = ref($_[0]) or return undef;
841 if ( !! UNIVERSAL::can($_[0], 'can') ) {
842 bless $_[0], 'Scalar::Util::Fake';
846 "$_[0]" =~ /0x(\w+)/;
847 my $i = do { no warnings 'portable'; hex $1 };
848 bless $_[0], $pkg if defined $pkg;
855 delete $YAML::Tiny::{refaddr};
859 # XXX-INGY Doc notes I'm putting up here. Changing the doc when it's wrong
860 # but leaving grey area stuff up here.
862 # I would like to change Read/Write to Load/Dump below without
863 # changing the actual API names.
865 # It might be better to put Load/Dump API in the SYNOPSIS instead of the
868 # null and bool explanations may be outdated.