User:Kate/SQL

From Wikipedia, the free encyclopedia

useful things...

find all pages linking to X but not Y or Z:

SELECT l1.l_from, cur_title, cur_namespace, cur_id 
  FROM links AS l1, cur 
  LEFT JOIN links AS l2 ON
        l1.l_from = l2.l_from 
    AND l2.l_to in (Y, Z)
WHERE l2.l_from IS NULL
  AND l1.l_to = X
  AND cur_id = l1.l_from 
  AND cur_namespace = 0 
  AND cur_is_redirect = 0;

find pages linking to both X and Y:

SELECT cur_namespace, cur_title, cur_id, l1.l_from, l1.l_to, l2.l_from, l2.l_to 
  FROM cur, links l1, links l2 
WHERE l1.l_from = cur_id 
  AND l2.l_from = cur_id 
  AND l1.l_to = X 
  AND l2.l_to = Y 
  AND cur_namespace = 0;

find unreverted edits from an IP range or user:

SELECT cur_title 
  FROM cur, recentchanges 
  WHERE rc_cur_id = cur_id 
    AND rc_namespace = 0 
    AND rc_timestamp > '20040911200000' 
    AND rc_user_text LIKE '205.188.%' 
    AND cur_user_text = rc_user_text;


Hemanshu's query[edit]

mysql> select cur_title from cur where cur_namespace = 4;
+----------------------------------------------------------------------------------+
| cur_title                                                                        |
+----------------------------------------------------------------------------------+
|                                                                                  |
| !अनाथ_लेख                                                          |
| All_pages_by_title                                                               |
| All_system_messages                                                              |
| Alphabetical_index                                                               |
| Bureaucrat_log                                                                   |
| Community_Portal                                                                 |
| Copyrights                                                                       |
| Deletion_log                                                                     |
| How_does_one_edit_a_page                                                         |
| IRC_चॅनल                                                                 |
| IRC_चेनल                                                                 |
| Long_articles                                                                    |
| Multilingual_coordination                                                        |
| Recent_changes                                                                   |
| Recentchanges                                                                    |
| Request_for_adminship                                                            |
| Requests_for_Adminship                                                           |
| Requests_for_adminship                                                           |
| Setting_up_your_browser_for_Indic_scripts                                        |
| Sources                                                                          |
| TODO_List                                                                        |
| Upload_log                                                                       |
| अक्सर_पूछे_जाने_वाले_सवाल              |
| अबाउट                                                                  |
| क्या_होता_है_विकिपीडिया_का_लेख |
| गाँव_का_पम्प                                                 |
| घोशना                                                                  |
| घोशनाएँ                                                            |
| चुनाव                                                                  |
| धूल_का_डब्बा                                                 |
| नया_लेख_कैसे_बनायें                              |
| नीती_वाचनालय                                               |
| प्रबन्धक                                                         |
| प्रयोगस्थल                                                   |
| प्रयोगस्‍थल                                                |
| रेफ़रन्स_डेस्क                                            |
| लेख_कैसे_बदलें                                           |
| विकिपिडियन                                                   |
| विकिपीडिया_के_दोस्त                            |
| विकिपीडिया_के_बारे_में                     |
| विभागों_की_सूची                                        |
| सहायता                                                               |
| स्वागत,_नये_आनेवालों                           |
| हटाने_के_मत                                                    |
| हटाने_के_लिये_मत                                       |
| हमें_सम्पर्क_करें                                  |
+----------------------------------------------------------------------------------+
47 rows in set (0.01 sec)

Mark's query[edit]

mysql> select old_user_text,old_namespace,old_title,count(*) as number from old where old_user_text like '202.67.%' group by old_namespace,old_title;
+----------------+---------------+-------------------------------------------+--------+
| old_user_text  | old_namespace | old_title                                 | number |
+----------------+---------------+-------------------------------------------+--------+
| 202.67.71.49   |             0 | 1904_in_science                           |      1 |
| 202.67.238.250 |             0 | 29_(number)                               |      1 |
| 202.67.238.250 |             0 | 666_(number)                              |      1 |
| 202.67.71.49   |             0 | 70_(number)                               |      1 |
| 202.67.71.28   |             0 | A._S._Byatt                               |      8 |
| 202.67.65.166  |             0 | April_19                                  |      2 |
| 202.67.101.89  |             0 | Arthur_Evans                              |      1 |
| 202.67.64.154  |             0 | Australia                                 |      2 |
| 202.67.64.139  |             0 | Australia/Foreign_relations               |      2 |
| 202.67.121.206 |             0 | Australian_Rules_Football                 |      1 |
| 202.67.238.250 |             0 | Caesar_Augustus                           |      1 |
| 202.67.82.116  |             0 | Canon                                     |      1 |
| 202.67.64.154  |             0 | Carl_Auer_von_Welsbach                    |      1 |
| 202.67.181.225 |             0 | Central_and_Western_district_of_Hong_Kong |      1 |
| 202.67.71.49   |             0 | Chaperon                                  |      1 |
| 202.67.121.67  |             0 | Christianity                              |      1 |
| 202.67.64.154  |             0 | Communications_in_Australia               |      1 |
| 202.67.118.247 |             0 | Debbie_Reynolds                           |      1 |
| 202.67.64.155  |             0 | Deconstructionism                         |      1 |
| 202.67.238.251 |             0 | Dog_Latin                                 |      1 |
| 202.67.181.225 |             0 | Eastern_District                          |      1 |
| 202.67.64.154  |             0 | Erotica                                   |      1 |
| 202.67.238.250 |             0 | F-16_Fighting_Falcon                      |      1 |
| 202.67.65.166  |             0 | Football_(soccer)                         |      4 |
| 202.67.69.28   |             0 | Ford_Telstar                              |      1 |
| 202.67.71.49   |             0 | GNU                                       |      2 |
| 202.67.92.20   |             0 | Geography_of_Brunei                       |      1 |
| 202.67.64.155  |             0 | Glass                                     |      1 |
| 202.67.64.155  |             0 | Governor-General_of_Australia             |      1 |
| 202.67.238.250 |             0 | HKCEE                                     |      2 |
| 202.67.238.251 |             0 | Heathrow_Express                          |      1 |
| 202.67.238.251 |             0 | IPX                                       |      2 |
| 202.67.238.251 |             0 | Internet_Protocol                         |      1 |
| 202.67.64.156  |             0 | Iroquois                                  |      1 |
| 202.67.64.154  |             0 | Jack_Lang_(Australia)                     |      1 |
| 202.67.238.251 |             0 | Japan_Airlines_Flight_123                 |      1 |
| 202.67.64.154  |             0 | John_Howard                               |      1 |
| 202.67.64.154  |             0 | Kalamunda_National_Park                   |      1 |
| 202.67.92.xxx  |             0 | Kathryn_Janeway                           |      2 |
| 202.67.118.247 |             0 | Kiss_Me,_Kate                             |      1 |
| 202.67.238.250 |             0 | Larry_Wall                                |      1 |
| 202.67.68.xxx  |             0 | Law                                       |      2 |
| 202.67.121.206 |             0 | List_of_Australians                       |      1 |
| 202.67.238.250 |             0 | List_of_programming_languages             |      1 |
| 202.67.238.250 |             0 | List_of_space_disasters                   |      1 |
| 202.67.64.154  |             0 | Louis_XVI_of_France                       |      2 |
| 202.67.91.18   |             0 | Marcus_Vipsanius_Agrippa                  |      1 |
| 202.67.64.143  |             0 | Matthias_Jakob_Schleiden                  |      1 |
| 202.67.238.250 |             0 | Menuet                                    |      1 |
| 202.67.64.155  |             0 | Mickey_Mouse                              |      2 |
| 202.67.64.156  |             0 | Microcomputer                             |      1 |
| 202.67.121.67  |             0 | Mormonism_and_Christianity                |      5 |
| 202.67.64.155  |             0 | Nimrod_Theatre_Company                    |      1 |
| 202.67.64.154  |             0 | Nitroglycerin                             |      1 |
| 202.67.97.43   |             0 | November_19                               |      1 |
| 202.67.64.155  |             0 | Nylon                                     |      1 |
| 202.67.238.250 |             0 | PS2                                       |      1 |
| 202.67.82.116  |             0 | Parallel_port                             |      1 |
| 202.67.238.250 |             0 | Perl                                      |      1 |
| 202.67.64.154  |             0 | Perth                                     |      1 |
| 202.67.111.176 |             0 | Perth,_Australia                          |      5 |
| 202.67.64.141  |             0 | Perth_WA                                  |      1 |
| 202.67.238.251 |             0 | Politics_of_Taiwan                        |      1 |
| 202.67.64.156  |             0 | Praseodymium                              |      1 |
| 202.67.71.247  |             0 | Protectorate_of_Bohemia_and_Moravia       |      1 |
| 202.67.82.116  |             0 | Qantas                                    |      1 |
| 202.67.64.154  |             0 | Racial_policy_of_Nazi_Germany             |      1 |
| 202.67.101.73  |             0 | Rainbow                                   |      1 |
| 202.67.238.250 |             0 | Reflexology                               |      1 |
| 202.67.71.49   |             0 | Seventy,_Preisthood_Office                |      1 |
| 202.67.238.250 |             0 | Square_number                             |      1 |
| 202.67.118.247 |             0 | Talkie                                    |      1 |
| 202.67.71.49   |             0 | Tamworth,_New_South_Wales                 |      6 |
| 202.67.84.235  |             0 | Terra_Australis                           |      1 |
| 202.67.103.230 |             0 | Terrorism                                 |      1 |
| 202.67.181.225 |             0 | The_Good_News_Bible                       |      2 |
| 202.67.64.155  |             0 | Timeline_of_communication_technology      |      2 |
| 202.67.64.155  |             0 | Timeline_of_computing_500_BC-1949         |      1 |
| 202.67.238.251 |             0 | Transportation_in_Hong_Kong               |      1 |
| 202.67.64.155  |             0 | Triad                                     |      1 |
| 202.67.80.185  |             0 | United_States                             |      1 |
| 202.67.238.251 |             0 | University_of_Hong_Kong                   |      2 |
| 202.67.122.36  |             0 | Uru:_Ages_Beyond_Myst                     |      2 |
| 202.67.181.225 |             0 | Wan_Chai_District                         |      1 |
| 202.67.64.155  |             0 | Western_Australia                         |      2 |
| 202.67.64.147  |             0 | White_Australia_policy                    |      1 |
| 202.67.99.xxx  |             0 | Wikipedia_chat                            |      1 |
| 202.67.80.185  |             1 | Fag                                       |      1 |
| 202.67.65.166  |             1 | Go_(board_game)                           |      5 |
| 202.67.64.141  |             1 | Human_sexual_behavior                     |      1 |
| 202.67.68.137  |             1 | Microsoft                                 |      1 |
| 202.67.65.165  |             1 | Narcissism                                |      1 |
| 202.67.64.141  |             1 | Nicole_Kidman                             |      1 |
| 202.67.68.87   |             1 | Pi                                        |      1 |
| 202.67.97.43   |             1 | Prime_Minister_of_Australia               |      1 |
| 202.67.111.247 |             1 | Western_Australia                         |      1 |
| 202.67.199.249 |             1 | Yuen_Long                                 |      4 |
| 202.67.238.250 |             2 | Kelvin                                    |      1 |
| 202.67.80.146  |             2 | Mark                                      |      1 |
| 202.67.64.141  |             3 | Runegirl                                  |      1 |
| 202.67.238.251 |             4 | Create_a_new_language_in_Wikipedia        |      3 |
| 202.67.64.155  |             4 | Votes_for_deletion_archive_May_2004       |      1 |
+----------------+---------------+-------------------------------------------+--------+
102 rows in set (23.76 sec)

Joy's query[edit]

mysql> select cur_namespace,cur_title,cur_id,l1.l_from,l1.l_to,l2.l_from,l2.l_to from cur, links l1, links l2 where l1.l_from=cur_Id and l2.l_from=cur_id and l1.l_to=392667 and l2.l_to = 390292 and cur_namespace=0;
+---------------+--------------------------------------------------------+--------+--------+--------+--------+--------+
| cur_namespace | cur_title                                              | cur_id | l_from | l_to   | l_from | l_to   |
+---------------+--------------------------------------------------------+--------+--------+--------+--------+--------+
|             0 | Cannibalism                                            |   5658 |   5658 | 392667 |   5658 | 390292 |
|             0 | Exurb                                                  | 158557 | 158557 | 392667 | 158557 | 390292 |
|             0 | Rocketdyne_Santa_Susana_Field_Laboratory_Contamination | 321219 | 321219 | 392667 | 321219 | 390292 |
|             0 | List_of_Arab_localities_in_Palestine_1948              | 337611 | 337611 | 392667 | 337611 | 390292 |
|             0 | Christians_in_Iran                                     | 400285 | 400285 | 392667 | 400285 | 390292 |
|             0 | Military_action_in_Lebanon                             | 447962 | 447962 | 392667 | 447962 | 390292 |
|             0 | Israel_and_the_United_Nations                          | 479389 | 479389 | 392667 | 479389 | 390292 |
|             0 | World_War_II_evacuation_and_expulsion                  | 501554 | 501554 | 392667 | 501554 | 390292 |
|             0 | Centre_Against_Expulsions                              | 583450 | 583450 | 392667 | 583450 | 390292 |
|             0 | Katerina_Mavromatis                                    | 917264 | 917264 | 392667 | 917264 | 390292 |
+---------------+--------------------------------------------------------+--------+--------+--------+--------+--------+
10 rows in set (0.00 sec)
 

Links list with titles, ns0 only[edit]

You should delete the three tables on Bacon when you are finished with them. Queries weren't run in a transaction so there may be a few nulls due to records being added or moved in cur between the first and last queries.

create table james_kate_cur (
  jc_id int(8) unsigned not null,
  jc_title varchar(255) binary not null default '',
  unique key id (jc_id)
) type=InnoDB;

insert into james_kate_cur (jc_id, jc_title) select cur_id, cur_title from cur;
-- Query OK, 965202 rows affected (58.83 sec)
-- Records: 965202  Duplicates: 0  Warnings: 0

analyze table james_kate_cur;

create table james_kate_ns (
  jn_id int(8) unsigned not null,
  jn_ns tinyint(2) unsigned NOT NULL,
  unique key id (jn_id)
) type=InnoDB;

insert into james_kate_ns (jn_id, jn_ns) select cur_id, cur_namespace from cur;
-- Query OK, 965210 rows affected (13.51 sec)
-- Records: 965210  Duplicates: 0  Warnings: 0

analyze table james_kate_ns;

create table james_kate_links (
  jl_from int(8) unsigned not null,
  jl_to int(8) unsigned not null,
  jl_from_title varchar(255) binary not null default '',
  jl_to_title varchar(255) binary not null default '',
  unique key from_to (jl_from, jl_to)
) type=InnoDB;

insert into james_kate_links (jl_from, jl_to, jl_from_title, jl_to_title)
select l_from, l_to, curfrom.jc_title, curto.jc_title
from james_kate_ns as nsfrom, james_kate_ns as nsto,
     links,
     james_kate_cur as curfrom, james_kate_cur as curto
where 
     nsfrom.jn_id = l_from
and  nsfrom.jn_ns = 0
and  nsto.jn_id = l_to
and  nsto.jn_ns = 0
and  curfrom.jc_id = l_from
and  curto.jc_id = l_to
;
-- Query OK, 8066058 rows affected (5 min 35.85 sec)
-- Records: 8066058  Duplicates: 0  Warnings: 0