function [justified_txt, jtc] = txt_justify( txt , max_length , options )
% Copyright (C) 2005,2006,2007,2008,2009 Daniele de Rigo
%
% This file is part of Mastrave.
%
% Mastrave is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your options) any later version.
%
% Mastrave is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Mastrave.  If not, see <http://www.gnu.org/licenses/>.
%
% ---------------------------------------------------------------------------
%
% [justified_txt, jtc] = txt_justify( txt , max_length , options )
%
% Justify the string <txt> ensuring that the maximum line-length will be
% <max_length> and performing a customized transformation that can be set by
% configuring <options>.
% Returns the justified string <justified_txt> as first output argument.
% The optional second output argument is the cell-array <jtc> each cell of
% it is a row of the justified text.
%
% Input arguments:
%
% <txt>        ::string::
%              character array
%
% <max_length> ::scalar_natural_nonzero::
%              maximum number of character per line
%
% <options>    ::cellstring::
%              one or more of the following strings:
%              [ 'left' | 'center' | 'right' | 'justified' ]
%                 returns a left-aligned|middle-aligned|right-aligned|justified
%                 version of <txt>
%              'un_wordwrap'
%                 erases all spaces inside empty lines (lines filled with
%                 spaces only) and removes all single line terminators leaving
%                 untouched all consecutively repeated line terminators
%              'unique_spaced'
%                 removes all duplicated spaces, and all starting and ending
%                 spaces leaving untouched the other "blank" characters
%                 ( \t, \n, \r, ...)
%              if omitted, default value is 'justified'
%              if <options> needs to invoke multiple option-strings, they must
%              be passed as elements of a cell-array of strings (an empty cell
%              array is interpreted as if it were { 'justified' } ).
%
%
% Example of usage:
%
%    s = [  'Lorem ipsum dolor sit amet, consectetur adipisici elit, sed '  ...
%           'eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut ' ...
%           'enim ad minim veniam, quis nostrud exercitation ullamco '      ...
%           'laboris nisi ut aliquid ex ea commodi consequat. Quis aute '   ...
%           'iure reprehenderit in voluptate velit esse cillum dolore eu '  ...
%           'fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non ' ...
%           'proident, sunt in culpa qui officia deserunt mollit anim id '  ...
%           'est laborum.'  ];
%
%    [js c] = txt_justify( s, 50)
%    [js c] = txt_justify( s, 50,  'justified'  )
%    [js c] = txt_justify( s, 50,  'right'      )
%    [js c] = txt_justify( s, 50,  'center'     )
%    [js c] = txt_justify( s, 50,  'left'       )
%
%    wordwrapped = sprintf([  'Lorem ipsum dolor sit amet, consectetur\n'   ...
%                             'adipisici elit, sed eiusmod tempor\n'        ...
%                             'incidunt ut labore et dolore magna\n'        ...
%                             'aliqua.   Ut enim ad minim veniam, quis\n'   ...
%                             'nostrud exercitation ullamco laboris\n'      ...
%                             'nisi ut aliquid ex ea commodi consequat.\n'  ...
%                             '\n'                                          ...
%                             '   Quis aute iure reprehenderit in\n'        ...
%                             '   voluptate velit esse cillum dolore eu\n'  ...
%                             '   fugiat nulla pariatur.\n'                 ...
%                             '     \n'                                     ...
%                             'Excepteur       sint\n'                      ...
%                             'obcaecat cupiditat non proident, sunt in\n'  ...
%                             'culpa qui officia deserunt mollit anim\n'    ...
%                             'id est laborum.'                           ])
%
%     [js c] = txt_justify( wordwrapped, 50,  'left'  )
%     [js c] = txt_justify( wordwrapped, 50,  'right'  )
%     [js c] = txt_justify( wordwrapped, 50,  'center'  )
%     [js c] = txt_justify( wordwrapped, 50,  'justified'  )
%     [js c] = txt_justify( wordwrapped, 50,  'un_wordwrap'  )
%     [js c] = txt_justify( wordwrapped, 50,  {  'un_wordwrap'  'justified'  } )
%     [js c] = txt_justify( js, 40,  {  'un_wordwrap'  'unique_spaced'  } )
%     [js c] = txt_justify( js, 40, ...
%                           {  'un_wordwrap'  'unique_spaced'  'justified'  } )
%
%
% version: 0.5.8


where     = sprintf(  '(in function %s)'  , mfilename );
justified_txt =  txt;
jtc           = {txt};
usage_msg = sprintf(                                       ...
   [                                                       ...
      'usage: txt_justify( txt , max_length )\n'           ...
      '       txt_justify( txt , max_length , options )\n' ...
   ]                                                       ...
);

opt_list = {  'left'         ;  'right'        ;  'center'          ; ...
              'justified'    ;  'un_wordwrap'  ;  'unique_spaced'  };
just_id  = 1:4;  % mutually exclusive justification-options

% mastrave-kernel: this function is used by check_is, therefore all the
% precondition checks are performed without invoking check_is or check_nargin

if nargin < 2
   fprintf( 2 ,  '%s'  , usage_msg );
   fprintf( 2 ,  'error: not enough input arguments.\n'  );
   error(  ' '  );
end
if nargin > 3
   fprintf( 2 ,  '%s'  , usage_msg );
   fprintf( 2 ,  'error: too many input arguments.\n'  );
   error(  ' '  );
end


% check_is( max_length ,  'scalar_natural_nonzero'  , ... )
if(                                     ...
   ~isnumeric( max_length )          || ...
   numel( max_length ) ~= 1          || ...
          max_length    < 1          || ...
   round( max_length ) ~= max_length    ...
)
   fprintf(                                                    ...
      2 ,                                                      ...
      [                                                        ...
         'error %s: the 2nd argument <max_length> must be a '  ...
         'positive scalar integer.\n'                          ...
      ] ,                                                      ...
      where                                                    ...
   );
   error(  ' '  );
end

% check_is( txt ,  'string'  , ... )
if( ~ischar( txt ) )
   fprintf(                                                      ...
      2 ,                                                        ...
      'error %s: the first argument <txt> must be a string.\n' , ...
      where                                                      ...
   );
   error(  ' '  );
end

if all( isspace( txt ) ) return; end

% check_is( options ,  'cellstring'  , ... )
if nargin < 3
   options = {  'justified'  };
else
   if isempty(options)
      options = {  'justified'  };
   end
   if ischar( options )
      options = { options };
   end
   if iscell( options )
      if ~all( cellfun(  'isclass'  , options ,  'char'  ) )
         p = find( ~cellfun(  'isclass'  , options ,  'char'  ) );
         fprintf(                                                    ...
            2 ,                                                      ...
            [                                                        ...
               'error %s: the 3rd argument <options> must be a '     ...
               'string or a cell-array of strings.\n'                ...
            ] ,                                                      ...
            where                                                    ...
         );
         fprintf(                                                    ...
            2 ,                                                      ...
            [                                                        ...
               'error: <options> at position %d is of type "%s" '    ...
               'instead of "char".\n'                                ...
            ] ,                                                      ...
            p ,                                                      ...
            class( options{p} )                                      ...
         );
         error(  ' '  );
      end
      options = { options{:} }; % ensure to have a one-row cell-array
      [row,col] = find( strcmp( ...
                     repmat( options  , numel(opt_list), 1              ), ...
                     repmat( opt_list , 1              , numel(options) )  ...
                  ) );
      if numel( col ) < numel( options )
         id        = ones( 1, numel(options) );
         id( col ) = 0;
         id        = find( id );
         fprintf(                                                    ...
            2 ,                                                      ...
            [                                                        ...
               'error %s: in 3rd argument <options>.\n'              ...
               'Invalid option: "%s"\n'                              ...
            ] ,                                                      ...
            where ,                                                  ...
            options{ id(1) }                                         ...
         );
         error(  ' '  );
      end
      id = find( any( repmat( just_id, numel(row), 1              ) == ...
                      repmat( row    , 1         , numel(just_id) )    , 2 ) );
      if length(id)>1
         fprintf(                                                    ...
            2 ,                                                      ...
            [                                                        ...
               'error %s: in 3rd argument <options>.\nThe options '  ...
               '"%s" and "%s" are mutually exclusive\n'              ...
            ] ,                                                      ...
            where ,                                                  ...
            options{ id(1) } ,                                       ...
            options{ id(2) }                                         ...
         );
         error(  ' '  );
      end
   else
      fprintf(                                                       ...
         2 ,                                                         ...
         [                                                           ...
            'error %s: the 3rd argument <options> must be a string ' ...
            'or a cell-array of strings.\n'                          ...
         ] ,                                                         ...
         where                                                       ...
      );
      fprintf(                                                       ...
         2 ,                                                         ...
         [                                                           ...
            'error: <options> is of type "%s" instead of "char" '    ...
            'or "cell" .\n'                                          ...
         ] ,                                                         ...
         class( options )                                            ...
      );
      error(  ' '  );
   end
end

if any( strcmp( options ,  'un_wordwrap'  ) )
   txt = un_wordwrap( txt );
end

if any( strcmp( options ,  'unique_spaced'  ) )
   txt = unique_spaced( txt );
end

% first elements must be the end-line characters
% separator final text (sft) indicates what to do with the separator
% during the splitting of <txt> in the cell-array of rows <jtc>:
%   1    erase the separator
%   0    move the separator at the begin of the next row
%  -1    leave the separator at the end of the current row
% please notice that:
%   (sft>=0)   is the number of characters to remove from the current row
%   (sft==0)   is the number of characters to add to the next row
sep_list = { sprintf('\n\r') , sprintf('\t\v \f') , '-/' };
sft_list = { 1               , 1                  , -1   };
s        = txt;

n        = max_length;

% justified text cell-array
jtc      = {};

% when <id> is returned empty, the string <s> has been completely parsed
[id,sft] = next( s , n , sep_list , sft_list );
while id
   jtc{ end+1 } = s(1:id-(sft>=0));
   s            = s(id+(sft~=0):end);
   [ id , sft ] = next( s , n , sep_list , sft_list );
end
jtc{ end+1 }    = s;

[ row , col ]   = find(                                             ...
   strcmp(                                                          ...
      repmat(            options             , 3, 1              ), ...
      repmat( { 'left' ; 'right' ; 'center' }, 1, numel(options) )  ...
   )                                                                ...
);

if length(col)
   jtc = cellstr( cellstr_just( jtc, options{col(1)} ) );
elseif any( strcmp( options,  'justified'  ) )
   jtc = justify( jtc , n );
end

justified_txt = [  sprintf(  '%s\n'  , jtc{1:end-1} )  jtc{end}  ];



function jstr = cellstr_just( cellstring , opt )
   switch opt
   case 'left'
      jstr = strjust(                                                      ...
         [ repmat( char(1) , numel(cellstring), 1 ) char( cellstring ) ] , ...
         opt                                                               ...
      );
      jstr = jstr(:,2:end);
   otherwise
      jstr = strjust( char( cellstring ) , opt );
   end



function [id,sft] = next( s , n , sep_list , sft_list )
   id = [];
   if numel( s ) <= n % it is the last row
      id  = numel( s );
      sft = -1;
      return
   end
   for i=1:numel(sep_list)
      % (sft_list{i}>=0) is the number of characters to remove from
      % the current row
      maxlen = min( numel( s ), n+(sft_list{i}>=0) );
      sid    = regexp( s(1:maxlen) , [  '['  sep_list{i}  ']'  ]  );
      if numel( sid )
         sft = sft_list{i};
         if i==1  % special: end-line characters
            id = sid(1);
         else
            id = sid(end);
         end
         return
      end
   end
   if isempty( id )
      sft = 0;
      id  = min( numel( s )+1, n+1 );
      if id>numel( s ), id=[]; end
   end



function justified = justify( cell_txt , n )
   justified = cell_txt;
   for i=1:length(cell_txt)
      ci    = cell_txt{i};
      lc    = length( ci );
      l     = n-lc;
      id    = find( ci ==  ' '  );

      % skip the customized beginning spacing
      begin = find(diff([0 id])>1);
      if length( begin )
         id = id( begin(1):end );
      else
         id = [];
      end

      lid   = length( id );
      % skip justification for non-spaced or too short rows
      if ~lid | l*3>n
         continue;
      end
      p     = randperm( lid );
      p     = sum(                                                    ...
         full(                                                        ...
            sparse(                                                   ...
               [1:l l+2] ,                                            ...
               [p(mod(0:l-1,lid)+1) lid] ,                            ...
               [ones(1,l) 0]                                          ...
            )                                                         ...
         )                                                            ...
      );
      o     = zeros( 1, lc );
      o(id) = p;
      o     = cumsum( o+1 );
      tt    = repmat(  ' '  , 1 , n );
      tt(o) = ci;
      justified{i} = tt;
   end



% erase all the spaces of the empty lines filled with spaces only
function txt = erase_empty_lines( txt )
   ret       = sprintf(  '\n'  );  % end-line character
   spc       = ' ';                % space character
   txt       = [txt(:)' ret]; % add a spurious closing \n in order to semplify
                              % the erasing process when the last line contains
                              % only spaces but does not end with a \n

   s         = numel(txt);
   id        = find( txt==ret );       % at least the spurious closing \n
   idfrom    = [ 1   min( s , id+1 )]; % point to the begin of each line
   idto      = [id        s         ]; % point to the end of each line
   idlen     = idto-idfrom+1;          % length of each line


   lidx      = zeros( 1, s );  % indicates to which line belongs each character
   lidx(id)  = 1;
   lidx      = cumsum(lidx)+1;

   sp        = sparse( 1:s, lidx , txt~=ret & txt~=spc );
   nid       = find( ~sum( sp , 1 ) ); % indices pointing to the empty lines

   nid       = nid( idlen(nid)>1 ); % only rows with spaces before \n
   l         = idlen(nid)-1;        % only remove spaces leaving the final \n
   nidx      = ones(1,sum(l));      % indices of the spaces to erase
   cl        = zeros(size(l));
   cl(2:end) = l(1:end-1);
   from      = idfrom(nid);
   if length(from)
      from( 2:end )        = diff( from ) + 1;
      nidx( cumsum(cl)+1 ) = from - cl;
      nidx                 = cumsum( nidx );
   end

   txt(nidx) =  ''  ;        % remove all the spaces of the empty rows
   txt       = txt(1:end-1); % remove the spurious closing \n



% erase all the spaces of the empty lines (lines filled with spaces only)
% and remove all single \n line terminators leaving untouched the repeated \n
function txt = un_wordwrap( txt )
   txt = erase_empty_lines( txt );
   ret = sprintf(  '\n'  );
   idx = find( txt == ret );
   id  = idx( diff([idx numel(txt)+1]) == 1 ); % preserve only duplicated \n
   txt( id ) = char( 1 );                      % and sign them

   % preserve sentences
   id_end = idx( idx > 1 )-1;
   pos    = find(                                      ...
      any(                                             ...
         repmat( txt(id_end)  , 3 , 1             ) == ...
         repmat( [  '.?!'  ]' , 1 , numel(id_end) )  , ...
         1                                             ...
      )                                                ...
   );
   id_end = id_end(pos);
   txt( id_end+1 ) = char(1);

   % avoid first-letters to be spaces, preserving paragraphs
   txt = strrep( txt, [char(1) ret], char([1 1]) );

   txt( txt == ret     ) =  ' '  ; % replace non-duplicated \n with spaces
   txt( txt == char(1) ) =  ret  ; % restore the signed \n



% removes all duplicated spaces, and all starting and ending spaces
% leaving untouched the other "blank" characters ( \t, \n, \r, ...)
function txt = unique_spaced( txt )
   id  = find( txt ==  ' '  );
   txt( id( diff([0 id])==1 ) ) =  ''  ;
   txt = txt(1:end-(txt(end)==' '));




% Local Variables:
% mode:mastrave
% End:

