File: upperclass_spans.php

Recommend this page to a friend!
upperclass_spans.php
File:	`upperclass_spans.php`
Role:	Auxiliary script
Content type:	`text/plain`
Description:	Auxiliary script
Class:	Sweeper Clean HTML to remove unwanted tags and attributes
Author:	By Jill Lingoff
Last change:
Date:	6 years ago
Size:	`3,071 bytes`
Download

<?php



// should be good enough for names in fernch bibliographies although it doesn't do stop words

// ex. AUSTRALIAN GOVERNMENT DEPARTMENT OF HEALTH AND AGEING => Australian Government Department Of Health And Ageing



$file = 'C:\wamp\www\sweeper\not-swept\im-014-fr.html';

$contents = file_get_contents($file);



preg_match_all('/<span style="text-transform:uppercase">(.*?)<\/span>/is', $contents, $upperclass_span_matches, PREG_OFFSET_CAPTURE);

$counter = sizeof($upperclass_span_matches[0]) - 1;

print('<table>');

while($counter > -1) {

    $span_content = $upperclass_span_matches[1][$counter][0];

    $span_offset = $upperclass_span_matches[0][$counter][1];

    print('<tr>

<th align="left">' . $span_content . '</th>

<td>');

    $counter2 = 0;

    $parsing_word = false;

    $parsing_characer_entity = false;

    $parsing_mac = false;

    $possibly_parsing_mac = false;

    $new_span_content = '';

    while($counter2 < strlen($span_content)) {

        if($parsing_characer_entity) {

            if($span_content[$counter2] === ';') {

                $parsing_characer_entity = false;

            }

            $new_span_content .= strtolower($span_content[$counter2]); // notice that the intention is for character entities to also be converted to lower class by this

        } else {

            if($span_content[$counter2] === '&') {

                $parsing_characer_entity = true;

                $new_span_content .= $span_content[$counter2];

            } else {

                 if(!$parsing_word) {

                    if($span_content[$counter2] === 'M' || $span_content[$counter2] === 'm') {

                        $possibly_parsing_mac = true;

                        $parsing_word = true;

                        $new_span_content .= strtoupper($span_content[$counter2]);

                    } elseif(preg_match('/[A-Z]/is', $span_content[$counter2])) {

                        $parsing_word = true;

                        $new_span_content .= strtoupper($span_content[$counter2]);

                    } else {

                        $new_span_content .= strtolower($span_content[$counter2]);

                    }

                } else {

                    if($possibly_parsing_mac) {

                        if($span_content[$counter2] === 'C' || $span_content[$counter2] === 'c') {

                            $parsing_mac = true;

                        } elseif(!preg_match('/[A-Z]/is', $span_content[$counter2])) {

                            $parsing_word = false;

                        }

                        $new_span_content .= strtolower($span_content[$counter2]);

                        $possibly_parsing_mac = false;

                    } elseif($parsing_mac) {

                        $new_span_content .= strtoupper($span_content[$counter2]);

                        $parsing_mac = false;

                    } else {

                        if($span_content[$counter2] === 'C' || $span_content[$counter2] === 'c') {

                            if($possibly_parsing_mac) {

                                $parsing_mac = true;

                                $possibly_parsing_mac = false;

                            }

                        } elseif(!preg_match('/[A-Z]/is', $span_content[$counter2])) {

                            $parsing_word = false;

                        }

                        $new_span_content .= strtolower($span_content[$counter2]);

                    }

                }

            }

        }

        $counter2++;

    }

    print($new_span_content . '</td>

</tr>

');

    $contents = substr($contents, 0, $span_offset) . '<span style="text-transform:uppercase">' . $new_span_content . '</span>' . substr($contents, $span_offset + strlen($upperclass_span_matches[0][$counter][0]));

    $counter--;

}

print('</table>');



file_put_contents($file, $contents);



?>
About us
Advertise on this site
For more information send a message to info at phpclasses dot org.
File: upperclass_spans.php

Contents